diff --git a/.github/ISSUE_TEMPLATE/85_bug-report.md b/.github/ISSUE_TEMPLATE/85_bug-report.md index 08d03c284ca..fde5917a8a7 100644 --- a/.github/ISSUE_TEMPLATE/85_bug-report.md +++ b/.github/ISSUE_TEMPLATE/85_bug-report.md @@ -21,8 +21,7 @@ assignees: '' **Enable crash reporting** -> If possible, change "enabled" to true in "send_crash_reports" section in `config.xml`: - +> Change "enabled" to true in "send_crash_reports" section in `config.xml`: ``` diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index d69168b01ee..a9503136b1a 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -76,6 +76,7 @@ jobs: uses: ClickHouse/checkout@v1 with: clear-repository: true + fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags - name: Download changed aarch64 images uses: actions/download-artifact@v3 with: diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index a0d0e49b95b..a6be21fb14a 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -73,6 +73,7 @@ jobs: uses: ClickHouse/checkout@v1 with: clear-repository: true + fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags - name: Download changed aarch64 images uses: actions/download-artifact@v3 with: diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index e5b797beebd..0a21fb93beb 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -60,6 +60,7 @@ jobs: uses: ClickHouse/checkout@v1 with: clear-repository: true + fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags - name: Download changed aarch64 images uses: actions/download-artifact@v3 with: @@ -892,6 +893,48 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinS390X: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_s390x + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -975,6 +1018,7 @@ jobs: - BuilderBinFreeBSD - BuilderBinPPC64 - BuilderBinRISCV64 + - BuilderBinS390X - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy @@ -3643,7 +3687,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/unit_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Unit tests (release-clang) + CHECK_NAME=Unit tests (release) REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 9de0444bd83..8162dc37223 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -53,6 +53,7 @@ jobs: uses: ClickHouse/checkout@v1 with: clear-repository: true + fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags - name: Download changed aarch64 images uses: actions/download-artifact@v3 with: diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index dd834959578..3845ebdcac7 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -94,6 +94,7 @@ jobs: uses: ClickHouse/checkout@v1 with: clear-repository: true + fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags - name: Download changed aarch64 images uses: actions/download-artifact@v3 with: @@ -952,6 +953,47 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinS390X: + needs: [DockerHubPush, FastTest, StyleCheck] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_s390x + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -1034,6 +1076,7 @@ jobs: - BuilderBinFreeBSD - BuilderBinPPC64 - BuilderBinRISCV64 + - BuilderBinS390X - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy @@ -4541,7 +4584,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/unit_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Unit tests (release-clang) + CHECK_NAME=Unit tests (release) REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports @@ -5182,3 +5225,39 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" +############################################################################################## +##################################### SQL TEST ############################################### +############################################################################################## + SQLTest: + needs: [BuilderDebRelease] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/sqltest + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=SQLTest + REPO_COPY=${{runner.temp}}/sqltest/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: SQLTest + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 sqltest.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 21284815583..6d999d1bee7 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -52,6 +52,7 @@ jobs: uses: ClickHouse/checkout@v1 with: clear-repository: true + fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags - name: Download changed aarch64 images uses: actions/download-artifact@v3 with: diff --git a/.gitmodules b/.gitmodules index ba71a8ae3a7..caed96b0320 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,7 +3,7 @@ url = https://github.com/facebook/zstd [submodule "contrib/lz4"] path = contrib/lz4 - url = https://github.com/lz4/lz4 + url = https://github.com/ClickHouse/lz4 [submodule "contrib/librdkafka"] path = contrib/librdkafka url = https://github.com/ClickHouse/librdkafka @@ -13,7 +13,6 @@ [submodule "contrib/zlib-ng"] path = contrib/zlib-ng url = https://github.com/ClickHouse/zlib-ng - branch = clickhouse-2.0.x [submodule "contrib/googletest"] path = contrib/googletest url = https://github.com/google/googletest @@ -41,13 +40,9 @@ [submodule "contrib/boost"] path = contrib/boost url = https://github.com/ClickHouse/boost -[submodule "contrib/base64"] - path = contrib/base64 - url = https://github.com/ClickHouse/Turbo-Base64 [submodule "contrib/arrow"] path = contrib/arrow url = https://github.com/ClickHouse/arrow - branch = blessed/release-6.0.1 [submodule "contrib/thrift"] path = contrib/thrift url = https://github.com/apache/thrift @@ -93,7 +88,6 @@ [submodule "contrib/grpc"] path = contrib/grpc url = https://github.com/ClickHouse/grpc - branch = v1.33.2 [submodule "contrib/aws"] path = contrib/aws url = https://github.com/ClickHouse/aws-sdk-cpp @@ -140,11 +134,9 @@ [submodule "contrib/cassandra"] path = contrib/cassandra url = https://github.com/ClickHouse/cpp-driver - branch = clickhouse [submodule "contrib/libuv"] path = contrib/libuv url = https://github.com/ClickHouse/libuv - branch = clickhouse [submodule "contrib/fmtlib"] path = contrib/fmtlib url = https://github.com/fmtlib/fmt @@ -157,11 +149,9 @@ [submodule "contrib/cyrus-sasl"] path = contrib/cyrus-sasl url = https://github.com/ClickHouse/cyrus-sasl - branch = cyrus-sasl-2.1 [submodule "contrib/croaring"] path = contrib/croaring url = https://github.com/RoaringBitmap/CRoaring - branch = v0.2.66 [submodule "contrib/miniselect"] path = contrib/miniselect url = https://github.com/danlark1/miniselect @@ -174,7 +164,6 @@ [submodule "contrib/abseil-cpp"] path = contrib/abseil-cpp url = https://github.com/abseil/abseil-cpp - branch = lts_2021_11_02 [submodule "contrib/dragonbox"] path = contrib/dragonbox url = https://github.com/ClickHouse/dragonbox @@ -187,7 +176,6 @@ [submodule "contrib/boringssl"] path = contrib/boringssl url = https://github.com/ClickHouse/boringssl - branch = unknown_branch_from_artur [submodule "contrib/NuRaft"] path = contrib/NuRaft url = https://github.com/ClickHouse/NuRaft @@ -248,7 +236,6 @@ [submodule "contrib/annoy"] path = contrib/annoy url = https://github.com/ClickHouse/annoy - branch = ClickHouse-master [submodule "contrib/qpl"] path = contrib/qpl url = https://github.com/intel/qpl @@ -282,7 +269,6 @@ [submodule "contrib/openssl"] path = contrib/openssl url = https://github.com/openssl/openssl - branch = openssl-3.0 [submodule "contrib/google-benchmark"] path = contrib/google-benchmark url = https://github.com/google/benchmark @@ -331,6 +317,10 @@ [submodule "contrib/liburing"] path = contrib/liburing url = https://github.com/axboe/liburing +[submodule "contrib/libarchive"] + path = contrib/libarchive + url = https://github.com/libarchive/libarchive.git + ignore = dirty [submodule "contrib/libfiu"] path = contrib/libfiu url = https://github.com/ClickHouse/libfiu.git @@ -340,3 +330,21 @@ [submodule "contrib/c-ares"] path = contrib/c-ares url = https://github.com/c-ares/c-ares.git +[submodule "contrib/incbin"] + path = contrib/incbin + url = https://github.com/graphitemaster/incbin.git +[submodule "contrib/usearch"] + path = contrib/usearch + url = https://github.com/unum-cloud/usearch.git +[submodule "contrib/SimSIMD"] + path = contrib/SimSIMD + url = https://github.com/ashvardanian/SimSIMD.git +[submodule "contrib/FP16"] + path = contrib/FP16 + url = https://github.com/Maratyszcza/FP16.git +[submodule "contrib/robin-map"] + path = contrib/robin-map + url = https://github.com/Tessil/robin-map.git +[submodule "contrib/aklomp-base64"] + path = contrib/aklomp-base64 + url = https://github.com/aklomp/base64.git diff --git a/CHANGELOG.md b/CHANGELOG.md index bf6b309ef2c..24a03e283e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,6 @@ ### Table of Contents +**[ClickHouse release v23.8 LTS, 2023-08-31](#238)**
+**[ClickHouse release v23.7, 2023-07-27](#237)**
**[ClickHouse release v23.6, 2023-06-30](#236)**
**[ClickHouse release v23.5, 2023-06-08](#235)**
**[ClickHouse release v23.4, 2023-04-26](#234)**
@@ -9,6 +11,401 @@ # 2023 Changelog +### ClickHouse release 23.8 LTS, 2023-08-31 + +#### Backward Incompatible Change +* If a dynamic disk contains a name, it should be specified as `disk = disk(name = 'disk_name'`, ...) in disk function arguments. In previous version it could be specified as `disk = disk_(...)`, which is no longer supported. [#52820](https://github.com/ClickHouse/ClickHouse/pull/52820) ([Kseniia Sumarokova](https://github.com/kssenii)). +* `clickhouse-benchmark` will establish connections in parallel when invoked with `--concurrency` more than one. Previously it was unusable if you ran it with 1000 concurrent connections from Europe to the US. Correct calculation of QPS for connections with high latency. Backward incompatible change: the option for JSON output of `clickhouse-benchmark` is removed. If you've used this option, you can also extract data from the `system.query_log` in JSON format as a workaround. [#53293](https://github.com/ClickHouse/ClickHouse/pull/53293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `microseconds` column is removed from the `system.text_log`, and the `milliseconds` column is removed from the `system.metric_log`, because they are redundant in the presence of the `event_time_microseconds` column. [#53601](https://github.com/ClickHouse/ClickHouse/pull/53601) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Deprecate the metadata cache feature. It is experimental and we have never used it. The feature is dangerous: [#51182](https://github.com/ClickHouse/ClickHouse/issues/51182). Remove the `system.merge_tree_metadata_cache` system table. The metadata cache is still available in this version but will be removed soon. This closes [#39197](https://github.com/ClickHouse/ClickHouse/issues/39197). [#51303](https://github.com/ClickHouse/ClickHouse/pull/51303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable support for 3DES in TLS connections. [#52893](https://github.com/ClickHouse/ClickHouse/pull/52893) ([Kenji Noguchi](https://github.com/knoguchi)). + +#### New Feature +* Direct import from zip/7z/tar archives. Example: `file('*.zip :: *.csv')`. [#50321](https://github.com/ClickHouse/ClickHouse/pull/50321) ([nikitakeba](https://github.com/nikitakeba)). +* Add column `ptr` to `system.trace_log` for `trace_type = 'MemorySample'`. This column contains an address of allocation. Added function `flameGraph` which can build flamegraph containing allocated and not released memory. Reworking of [#38391](https://github.com/ClickHouse/ClickHouse/issues/38391). [#45322](https://github.com/ClickHouse/ClickHouse/pull/45322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Added table function `azureBlobStorageCluster`. The supported set of features is very similar to table function `s3Cluster`. [#50795](https://github.com/ClickHouse/ClickHouse/pull/50795) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Allow using `cluster`, `clusterAllReplicas`, `remote`, and `remoteSecure` without table name in issue [#50808](https://github.com/ClickHouse/ClickHouse/issues/50808). [#50848](https://github.com/ClickHouse/ClickHouse/pull/50848) ([Yangkuan Liu](https://github.com/LiuYangkuan)). +* A system table to monitor kafka consumers. [#50999](https://github.com/ClickHouse/ClickHouse/pull/50999) ([Ilya Golshtein](https://github.com/ilejn)). +* Added `max_sessions_for_user` setting. [#51724](https://github.com/ClickHouse/ClickHouse/pull/51724) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* New functions `toUTCTimestamp/fromUTCTimestamp` to act same as spark's `to_utc_timestamp/from_utc_timestamp`. [#52117](https://github.com/ClickHouse/ClickHouse/pull/52117) ([KevinyhZou](https://github.com/KevinyhZou)). +* Add new functions `structureToCapnProtoSchema`/`structureToProtobufSchema` that convert ClickHouse table structure to CapnProto/Protobuf format schema. Allow to input/output data in CapnProto/Protobuf format without external format schema using autogenerated schema from table structure (controled by settings `format_capn_proto_use_autogenerated_schema`/`format_protobuf_use_autogenerated_schema`). Allow to export autogenerated schema while input/outoput using setting `output_format_schema`. [#52278](https://github.com/ClickHouse/ClickHouse/pull/52278) ([Kruglov Pavel](https://github.com/Avogar)). +* A new field `query_cache_usage` in `system.query_log` now shows if and how the query cache was used. [#52384](https://github.com/ClickHouse/ClickHouse/pull/52384) ([Robert Schulze](https://github.com/rschu1ze)). +* Add new function `startsWithUTF8` and `endsWithUTF8`. [#52555](https://github.com/ClickHouse/ClickHouse/pull/52555) ([李扬](https://github.com/taiyang-li)). +* Allow variable number of columns in TSV/CuatomSeprarated/JSONCompactEachRow, make schema inference work with variable number of columns. Add settings `input_format_tsv_allow_variable_number_of_columns`, `input_format_custom_allow_variable_number_of_columns`, `input_format_json_compact_allow_variable_number_of_columns`. [#52692](https://github.com/ClickHouse/ClickHouse/pull/52692) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `SYSTEM STOP/START PULLING REPLICATION LOG` queries (for testing `ReplicatedMergeTree`). [#52881](https://github.com/ClickHouse/ClickHouse/pull/52881) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow to execute constant non-deterministic functions in mutations on initiator. [#53129](https://github.com/ClickHouse/ClickHouse/pull/53129) ([Anton Popov](https://github.com/CurtizJ)). +* Add input format `One` that doesn't read any data and always returns single row with column `dummy` with type `UInt8` and value `0` like `system.one`. It can be used together with `_file/_path` virtual columns to list files in file/s3/url/hdfs/etc table functions without reading any data. [#53209](https://github.com/ClickHouse/ClickHouse/pull/53209) ([Kruglov Pavel](https://github.com/Avogar)). +* Add `tupleConcat` function. Closes [#52759](https://github.com/ClickHouse/ClickHouse/issues/52759). [#53239](https://github.com/ClickHouse/ClickHouse/pull/53239) ([Nikolay Degterinsky](https://github.com/evillique)). +* Support `TRUNCATE DATABASE` operation. [#53261](https://github.com/ClickHouse/ClickHouse/pull/53261) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `max_threads_for_indexes` setting to limit number of threads used for primary key processing. [#53313](https://github.com/ClickHouse/ClickHouse/pull/53313) ([jorisgio](https://github.com/jorisgio)). +* Re-add SipHash keyed functions. [#53525](https://github.com/ClickHouse/ClickHouse/pull/53525) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* ([#52755](https://github.com/ClickHouse/ClickHouse/issues/52755) , [#52895](https://github.com/ClickHouse/ClickHouse/issues/52895)) Added functions `arrayRotateLeft`, `arrayRotateRight`, `arrayShiftLeft`, `arrayShiftRight`. [#53557](https://github.com/ClickHouse/ClickHouse/pull/53557) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Add column `name` to `system.clusters` as an alias to cluster. [#53605](https://github.com/ClickHouse/ClickHouse/pull/53605) ([irenjj](https://github.com/irenjj)). +* The advanced dashboard now allows mass editing (save/load). [#53608](https://github.com/ClickHouse/ClickHouse/pull/53608) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The advanced dashboard now has an option to maximize charts and move them around. [#53622](https://github.com/ClickHouse/ClickHouse/pull/53622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added support for adding and subtracting arrays: `[5,2] + [1,7]`. Division and multiplication were not implemented due to confusion between pointwise multiplication and the scalar product of arguments. Closes [#49939](https://github.com/ClickHouse/ClickHouse/issues/49939). [#52625](https://github.com/ClickHouse/ClickHouse/pull/52625) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add support for string literals as table names. Closes [#52178](https://github.com/ClickHouse/ClickHouse/issues/52178). [#52635](https://github.com/ClickHouse/ClickHouse/pull/52635) ([hendrik-m](https://github.com/hendrik-m)). + +#### Experimental Feature +* Add new table engine `S3Queue` for streaming data import from s3. Closes [#37012](https://github.com/ClickHouse/ClickHouse/issues/37012). [#49086](https://github.com/ClickHouse/ClickHouse/pull/49086) ([s-kat](https://github.com/s-kat)). It is not ready to use. Do not use it. +* Enable parallel reading from replicas over distributed table. Related to [#49708](https://github.com/ClickHouse/ClickHouse/issues/49708). [#53005](https://github.com/ClickHouse/ClickHouse/pull/53005) ([Igor Nikonov](https://github.com/devcrafter)). +* Add experimental support for HNSW as approximate neighbor search method. [#53447](https://github.com/ClickHouse/ClickHouse/pull/53447) ([Davit Vardanyan](https://github.com/davvard)). This is currently intended for those who continue working on the implementation. Do not use it. + +#### Performance Improvement +* Parquet filter pushdown. I.e. when reading Parquet files, row groups (chunks of the file) are skipped based on the WHERE condition and the min/max values in each column. In particular, if the file is roughly sorted by some column, queries that filter by a short range of that column will be much faster. [#52951](https://github.com/ClickHouse/ClickHouse/pull/52951) ([Michael Kolupaev](https://github.com/al13n321)). +* Optimize reading small row groups by batching them together in Parquet. Closes [#53069](https://github.com/ClickHouse/ClickHouse/issues/53069). [#53281](https://github.com/ClickHouse/ClickHouse/pull/53281) ([Kruglov Pavel](https://github.com/Avogar)). +* Optimize count from files in most input formats. Closes [#44334](https://github.com/ClickHouse/ClickHouse/issues/44334). [#53637](https://github.com/ClickHouse/ClickHouse/pull/53637) ([Kruglov Pavel](https://github.com/Avogar)). +* Use filter by file/path before reading in `url`/`file`/`hdfs` table functins. [#53529](https://github.com/ClickHouse/ClickHouse/pull/53529) ([Kruglov Pavel](https://github.com/Avogar)). +* Enable JIT compilation for AArch64, PowerPC, SystemZ, RISC-V. [#38217](https://github.com/ClickHouse/ClickHouse/pull/38217) ([Maksim Kita](https://github.com/kitaisreal)). +* Add setting `rewrite_count_distinct_if_with_count_distinct_implementation` to rewrite `countDistinctIf` with `count_distinct_implementation`. Closes [#30642](https://github.com/ClickHouse/ClickHouse/issues/30642). [#46051](https://github.com/ClickHouse/ClickHouse/pull/46051) ([flynn](https://github.com/ucasfl)). +* Speed up merging of states of `uniq` and `uniqExact` aggregate functions by parallelizing conversion before merge. [#50748](https://github.com/ClickHouse/ClickHouse/pull/50748) ([Jiebin Sun](https://github.com/jiebinn)). +* Optimize aggregation performance of nullable string key when using a large number of variable length keys. [#51399](https://github.com/ClickHouse/ClickHouse/pull/51399) ([LiuNeng](https://github.com/liuneng1994)). +* Add a pass in Analyzer for time filter optimization with preimage. The performance experiments of SSB on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of 8.5% to the geomean QPS when the experimental analyzer is enabled. [#52091](https://github.com/ClickHouse/ClickHouse/pull/52091) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Optimize the merge if all hash sets are single-level in the `uniqExact` (COUNT DISTINCT) function. [#52973](https://github.com/ClickHouse/ClickHouse/pull/52973) ([Jiebin Sun](https://github.com/jiebinn)). +* `Join` table engine: do not clone hash join data structure with all columns. [#53046](https://github.com/ClickHouse/ClickHouse/pull/53046) ([Duc Canh Le](https://github.com/canhld94)). +* Implement native `ORC` input format without the "apache arrow" library to improve performance. [#53324](https://github.com/ClickHouse/ClickHouse/pull/53324) ([李扬](https://github.com/taiyang-li)). +* The dashboard will tell the server to compress the data, which is useful for large time frames over slow internet connections. For example, one chart with 86400 points can be 1.5 MB uncompressed and 60 KB compressed with `br`. [#53569](https://github.com/ClickHouse/ClickHouse/pull/53569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better utilization of thread pool for BACKUPs and RESTOREs. [#53649](https://github.com/ClickHouse/ClickHouse/pull/53649) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Load filesystem cache metadata on startup in parallel. Configured by `load_metadata_threads` (default: 1) cache config setting. Related to [#52037](https://github.com/ClickHouse/ClickHouse/issues/52037). [#52943](https://github.com/ClickHouse/ClickHouse/pull/52943) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Improve `move_primary_key_columns_to_end_of_prewhere`. [#53337](https://github.com/ClickHouse/ClickHouse/pull/53337) ([Han Fei](https://github.com/hanfei1991)). +* This optimizes the interaction with ClickHouse Keeper. Previously the caller could register the same watch callback multiple times. In that case each entry was consuming memory and the same callback was called multiple times which didn't make much sense. In order to avoid this the caller could have some logic to not add the same watch multiple times. With this change this deduplication is done internally if the watch callback is passed via shared_ptr. [#53452](https://github.com/ClickHouse/ClickHouse/pull/53452) ([Alexander Gololobov](https://github.com/davenger)). +* Cache number of rows in files for count in file/s3/url/hdfs/azure functions. The cache can be enabled/disabled by setting `use_cache_for_count_from_files` (enabled by default). Continuation of https://github.com/ClickHouse/ClickHouse/pull/53637. [#53692](https://github.com/ClickHouse/ClickHouse/pull/53692) ([Kruglov Pavel](https://github.com/Avogar)). +* More careful thread management will improve the speed of the S3 table function over a large number of files by more than ~25%. [#53668](https://github.com/ClickHouse/ClickHouse/pull/53668) ([pufit](https://github.com/pufit)). + +#### Improvement +* Add `stderr_reaction` configuration/setting to control the reaction (none, log or throw) when external command stderr has data. This helps make debugging external command easier. [#43210](https://github.com/ClickHouse/ClickHouse/pull/43210) ([Amos Bird](https://github.com/amosbird)). +* Add `partition` column to the `system part_log` and merge table. [#48990](https://github.com/ClickHouse/ClickHouse/pull/48990) ([Jianfei Hu](https://github.com/incfly)). +* The sizes of the (index) uncompressed/mark, mmap and query caches can now be configured dynamically at runtime (without server restart). [#51446](https://github.com/ClickHouse/ClickHouse/pull/51446) ([Robert Schulze](https://github.com/rschu1ze)). +* If a dictionary is created with a complex key, automatically choose the "complex key" layout variant. [#49587](https://github.com/ClickHouse/ClickHouse/pull/49587) ([xiebin](https://github.com/xbthink)). +* Add setting `use_concurrency_control` for better testing of the new concurrency control feature. [#49618](https://github.com/ClickHouse/ClickHouse/pull/49618) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added suggestions for mistyped names for databases and tables. [#49801](https://github.com/ClickHouse/ClickHouse/pull/49801) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* While read small files from HDFS by Gluten, we found that it will cost more times when compare to directly query by Spark. And we did something with that. [#50063](https://github.com/ClickHouse/ClickHouse/pull/50063) ([KevinyhZou](https://github.com/KevinyhZou)). +* There were too many worthless error logs after session expiration, which we didn't like. [#50171](https://github.com/ClickHouse/ClickHouse/pull/50171) ([helifu](https://github.com/helifu)). +* Introduce fallback ZooKeeper sessions which are time-bound. Fixed `index` column in system.zookeeper_connection for DNS addresses. [#50424](https://github.com/ClickHouse/ClickHouse/pull/50424) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Add ability to log when max_partitions_per_insert_block is reached. [#50948](https://github.com/ClickHouse/ClickHouse/pull/50948) ([Sean Haynes](https://github.com/seandhaynes)). +* Added a bunch of custom commands to clickhouse-keeper-client (mostly to make ClickHouse debugging easier). [#51117](https://github.com/ClickHouse/ClickHouse/pull/51117) ([pufit](https://github.com/pufit)). +* Updated check for connection string in `azureBlobStorage` table function as connection string with "sas" does not always begin with the default endpoint and updated connection URL to include "sas" token after adding Azure's container to URL. [#51141](https://github.com/ClickHouse/ClickHouse/pull/51141) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix description for filtering sets in the `full_sorting_merge` JOIN algorithm. [#51329](https://github.com/ClickHouse/ClickHouse/pull/51329) ([Tanay Tummalapalli](https://github.com/ttanay)). +* Fixed memory consumption in `Aggregator` when `max_block_size` is huge. [#51566](https://github.com/ClickHouse/ClickHouse/pull/51566) ([Nikita Taranov](https://github.com/nickitat)). +* Add `SYSTEM SYNC FILESYSTEM CACHE` command. It will compare in-memory state of filesystem cache with what it has on disk and fix in-memory state if needed. This is only needed if you are making manual interventions in on-disk data, which is highly discouraged. [#51622](https://github.com/ClickHouse/ClickHouse/pull/51622) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Attempt to create a generic proxy resolver for CH while keeping backwards compatibility with existing S3 storage conf proxy resolver. [#51749](https://github.com/ClickHouse/ClickHouse/pull/51749) ([Arthur Passos](https://github.com/arthurpassos)). +* Support reading tuple subcolumns from file/s3/hdfs/url/azureBlobStorage table functions. [#51806](https://github.com/ClickHouse/ClickHouse/pull/51806) ([Kruglov Pavel](https://github.com/Avogar)). +* Function `arrayIntersect` now returns the values in the order, corresponding to the first argument. Closes [#27622](https://github.com/ClickHouse/ClickHouse/issues/27622). [#51850](https://github.com/ClickHouse/ClickHouse/pull/51850) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add new queries, which allow to create/drop of access entities in specified access storage or move access entities from one access storage to another. [#51912](https://github.com/ClickHouse/ClickHouse/pull/51912) ([pufit](https://github.com/pufit)). +* Make `ALTER TABLE FREEZE` queries not replicated in the Replicated database engine. [#52064](https://github.com/ClickHouse/ClickHouse/pull/52064) ([Mike Kot](https://github.com/myrrc)). +* Added possibility to flush system tables on unexpected shutdown. [#52174](https://github.com/ClickHouse/ClickHouse/pull/52174) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix the case when `s3` table function refused to work with pre-signed URLs. close [#50846](https://github.com/ClickHouse/ClickHouse/issues/50846). [#52310](https://github.com/ClickHouse/ClickHouse/pull/52310) ([chen](https://github.com/xiedeyantu)). +* Add column `name` as an alias to `event` and `metric` in the `system.events` and `system.metrics` tables. Closes [#51257](https://github.com/ClickHouse/ClickHouse/issues/51257). [#52315](https://github.com/ClickHouse/ClickHouse/pull/52315) ([chen](https://github.com/xiedeyantu)). +* Added support of syntax `CREATE UNIQUE INDEX` in parser as a no-op for better SQL compatibility. `UNIQUE` index is not supported. Set `create_index_ignore_unique = 1` to ignore UNIQUE keyword in queries. [#52320](https://github.com/ClickHouse/ClickHouse/pull/52320) ([Ilya Yatsishin](https://github.com/qoega)). +* Add support of predefined macro (`{database}` and `{table}`) in some Kafka engine settings: topic, consumer, client_id, etc. [#52386](https://github.com/ClickHouse/ClickHouse/pull/52386) ([Yury Bogomolov](https://github.com/ybogo)). +* Disable updating the filesystem cache during backup/restore. Filesystem cache must not be updated during backup/restore, it seems it just slows down the process without any profit (because the BACKUP command can read a lot of data and it's no use to put all the data to the filesystem cache and immediately evict it). [#52402](https://github.com/ClickHouse/ClickHouse/pull/52402) ([Vitaly Baranov](https://github.com/vitlibar)). +* The configuration of S3 endpoint allow using it from the root, and append '/' automatically if needed. [#47809](https://github.com/ClickHouse/ClickHouse/issues/47809). [#52600](https://github.com/ClickHouse/ClickHouse/pull/52600) ([xiaolei565](https://github.com/xiaolei565)). +* For clickhouse-local allow positional options and populate global UDF settings (user_scripts_path and user_defined_executable_functions_config). [#52643](https://github.com/ClickHouse/ClickHouse/pull/52643) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* `system.asynchronous_metrics` now includes metrics "QueryCacheEntries" and "QueryCacheBytes" to inspect the query cache. [#52650](https://github.com/ClickHouse/ClickHouse/pull/52650) ([Robert Schulze](https://github.com/rschu1ze)). +* Added possibility to use `s3_storage_class` parameter in the `SETTINGS` clause of the `BACKUP` statement for backups to S3. [#52658](https://github.com/ClickHouse/ClickHouse/pull/52658) ([Roman Vasin](https://github.com/rvasin)). +* Add utility `print-backup-info.py` which parses a backup metadata file and prints information about the backup. [#52690](https://github.com/ClickHouse/ClickHouse/pull/52690) ([Vitaly Baranov](https://github.com/vitlibar)). +* Closes [#49510](https://github.com/ClickHouse/ClickHouse/issues/49510). Currently we have database and table names case-sensitive, but BI tools query `information_schema` sometimes in lowercase, sometimes in uppercase. For this reason we have `information_schema` database, containing lowercase tables, such as `information_schema.tables` and `INFORMATION_SCHEMA` database, containing uppercase tables, such as `INFORMATION_SCHEMA.TABLES`. But some tools are querying `INFORMATION_SCHEMA.tables` and `information_schema.TABLES`. The proposed solution is to duplicate both lowercase and uppercase tables in lowercase and uppercase `information_schema` database. [#52695](https://github.com/ClickHouse/ClickHouse/pull/52695) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Query`CHECK TABLE` has better performance and usability (sends progress updates, cancellable). [#52745](https://github.com/ClickHouse/ClickHouse/pull/52745) ([vdimir](https://github.com/vdimir)). +* Add support for `modulo`, `intDiv`, `intDivOrZero` for tuples by distributing them across tuple's elements. [#52758](https://github.com/ClickHouse/ClickHouse/pull/52758) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Search for default `yaml` and `yml` configs in clickhouse-client after `xml`. [#52767](https://github.com/ClickHouse/ClickHouse/pull/52767) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* When merging into non-'clickhouse' rooted configuration, configs with different root node name just bypassed without exception. [#52770](https://github.com/ClickHouse/ClickHouse/pull/52770) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Now it's possible to specify min (`memory_profiler_sample_min_allocation_size`) and max (`memory_profiler_sample_max_allocation_size`) size for allocations to be tracked with sampling memory profiler. [#52779](https://github.com/ClickHouse/ClickHouse/pull/52779) ([alesapin](https://github.com/alesapin)). +* Add `precise_float_parsing` setting to switch float parsing methods (fast/precise). [#52791](https://github.com/ClickHouse/ClickHouse/pull/52791) ([Andrey Zvonov](https://github.com/zvonand)). +* Use the same default paths for `clickhouse-keeper` (symlink) as for `clickhouse-keeper` (executable). [#52861](https://github.com/ClickHouse/ClickHouse/pull/52861) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improve error message for table function `remote`. Closes [#40220](https://github.com/ClickHouse/ClickHouse/issues/40220). [#52959](https://github.com/ClickHouse/ClickHouse/pull/52959) ([jiyoungyoooo](https://github.com/jiyoungyoooo)). +* Added the possibility to specify custom storage policy in the `SETTINGS` clause of `RESTORE` queries. [#52970](https://github.com/ClickHouse/ClickHouse/pull/52970) ([Victor Krasnov](https://github.com/sirvickr)). +* Add the ability to throttle the S3 requests on backup operations (`BACKUP` and `RESTORE` commands now honor `s3_max_[get/put]_[rps/burst]`). [#52974](https://github.com/ClickHouse/ClickHouse/pull/52974) ([Daniel Pozo Escalona](https://github.com/danipozo)). +* Add settings to ignore ON CLUSTER clause in queries for management of replicated user-defined functions or access control entities with replicated storage. [#52975](https://github.com/ClickHouse/ClickHouse/pull/52975) ([Aleksei Filatov](https://github.com/aalexfvk)). +* EXPLAIN actions for JOIN step. [#53006](https://github.com/ClickHouse/ClickHouse/pull/53006) ([Maksim Kita](https://github.com/kitaisreal)). +* Make `hasTokenOrNull` and `hasTokenCaseInsensitiveOrNull` return null for empty needles. [#53059](https://github.com/ClickHouse/ClickHouse/pull/53059) ([ltrk2](https://github.com/ltrk2)). +* Allow to restrict allowed paths for filesystem caches. Mainly useful for dynamic disks. If in server config `filesystem_caches_path` is specified, all filesystem caches' paths will be restricted to this directory. E.g. if the `path` in cache config is relative - it will be put in `filesystem_caches_path`; if `path` in cache config is absolute, it will be required to lie inside `filesystem_caches_path`. If `filesystem_caches_path` is not specified in config, then behaviour will be the same as in earlier versions. [#53124](https://github.com/ClickHouse/ClickHouse/pull/53124) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added a bunch of custom commands (mostly to make ClickHouse debugging easier). [#53127](https://github.com/ClickHouse/ClickHouse/pull/53127) ([pufit](https://github.com/pufit)). +* Add diagnostic info about file name during schema inference - it helps when you process multiple files with globs. [#53135](https://github.com/ClickHouse/ClickHouse/pull/53135) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Client will load suggestions using the main connection if the second connection is not allowed to create a session. [#53177](https://github.com/ClickHouse/ClickHouse/pull/53177) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Add EXCEPT clause to `SYSTEM STOP/START LISTEN QUERIES [ALL/DEFAULT/CUSTOM]` query, for example `SYSTEM STOP LISTEN QUERIES ALL EXCEPT TCP, HTTP`. [#53280](https://github.com/ClickHouse/ClickHouse/pull/53280) ([Nikolay Degterinsky](https://github.com/evillique)). +* Change the default of `max_concurrent_queries` from 100 to 1000. It's ok to have many concurrent queries if they are not heavy, and mostly waiting for the network. Note: don't confuse concurrent queries and QPS: for example, ClickHouse server can do tens of thousands of QPS with less than 100 concurrent queries. [#53285](https://github.com/ClickHouse/ClickHouse/pull/53285) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Limit number of concurrent background partition optimize merges. [#53405](https://github.com/ClickHouse/ClickHouse/pull/53405) ([Duc Canh Le](https://github.com/canhld94)). +* Added a setting `allow_moving_table_directory_to_trash` that allows to ignore `Directory for table data already exists` error when replicating/recovering a `Replicated` database. [#53425](https://github.com/ClickHouse/ClickHouse/pull/53425) ([Alexander Tokmakov](https://github.com/tavplubix)). +* If server settings `asynchronous_metrics_update_period_s` and `asynchronous_heavy_metrics_update_period_s` are misconfigured to 0, it will now fail gracefully instead of terminating the application. [#53428](https://github.com/ClickHouse/ClickHouse/pull/53428) ([Robert Schulze](https://github.com/rschu1ze)). +* The ClickHouse server now respects memory limits changed via cgroups when reloading its configuration. [#53455](https://github.com/ClickHouse/ClickHouse/pull/53455) ([Robert Schulze](https://github.com/rschu1ze)). +* Add ability to turn off flush of Distributed tables on `DETACH`, `DROP`, or server shutdown. [#53501](https://github.com/ClickHouse/ClickHouse/pull/53501) ([Azat Khuzhin](https://github.com/azat)). +* The `domainRFC` function now supports IPv6 in square brackets. [#53506](https://github.com/ClickHouse/ClickHouse/pull/53506) ([Chen768959](https://github.com/Chen768959)). +* Use longer timeout for S3 CopyObject requests, which are used in backups. [#53533](https://github.com/ClickHouse/ClickHouse/pull/53533) ([Michael Kolupaev](https://github.com/al13n321)). +* Added server setting `aggregate_function_group_array_max_element_size`. This setting is used to limit array size for `groupArray` function at serialization. The default value is `16777215`. [#53550](https://github.com/ClickHouse/ClickHouse/pull/53550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* `SCHEMA()` was added as alias for `DATABASE()` to improve MySQL compatibility. [#53587](https://github.com/ClickHouse/ClickHouse/pull/53587) ([Daniël van Eeden](https://github.com/dveeden)). +* Add asynchronous metrics about tables in the system database. For example, `TotalBytesOfMergeTreeTablesSystem`. This closes [#53603](https://github.com/ClickHouse/ClickHouse/issues/53603). [#53604](https://github.com/ClickHouse/ClickHouse/pull/53604) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* SQL editor in the Play UI and Dashboard will not use Grammarly. [#53614](https://github.com/ClickHouse/ClickHouse/pull/53614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* As expert-level settings, it is now possible to (1) configure the size_ratio (i.e. the relative size of the protected queue) of the [index] mark/uncompressed caches, (2) configure the cache policy of the index mark and index uncompressed caches. [#53657](https://github.com/ClickHouse/ClickHouse/pull/53657) ([Robert Schulze](https://github.com/rschu1ze)). +* Added client info validation to the query packet in TCPHandler. [#53673](https://github.com/ClickHouse/ClickHouse/pull/53673) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Retry loading parts in case of network errors while interaction with Microsoft Azure. [#53750](https://github.com/ClickHouse/ClickHouse/pull/53750) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Stacktrace for exceptions, Materailized view exceptions are propagated. [#53766](https://github.com/ClickHouse/ClickHouse/pull/53766) ([Ilya Golshtein](https://github.com/ilejn)). +* If no hostname or port were specified, keeper client will try to search for a connection string in the ClickHouse's config.xml. [#53769](https://github.com/ClickHouse/ClickHouse/pull/53769) ([pufit](https://github.com/pufit)). +* Add profile event `PartsLockMicroseconds` which shows the amount of microseconds we hold the data parts lock in MergeTree table engine family. [#53797](https://github.com/ClickHouse/ClickHouse/pull/53797) ([alesapin](https://github.com/alesapin)). +* Make reconnect limit in RAFT limits configurable for keeper. This configuration can help to make keeper to rebuild connection with peers quicker if the current connection is broken. [#53817](https://github.com/ClickHouse/ClickHouse/pull/53817) ([Pengyuan Bian](https://github.com/bianpengyuan)). +* Ignore foreign keys in tables definition to improve compatibility with MySQL, so a user wouldn't need to rewrite his SQL of the foreign key part, ref [#53380](https://github.com/ClickHouse/ClickHouse/issues/53380). [#53864](https://github.com/ClickHouse/ClickHouse/pull/53864) ([jsc0218](https://github.com/jsc0218)). + +#### Build/Testing/Packaging Improvement +* Don't expose symbols from ClickHouse binary to dynamic linker. It might fix [#43933](https://github.com/ClickHouse/ClickHouse/issues/43933). [#47475](https://github.com/ClickHouse/ClickHouse/pull/47475) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add https://github.com/elliotchance/sqltest to CI to report the SQL 2016 conformance. [#52293](https://github.com/ClickHouse/ClickHouse/pull/52293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Upgrade PRQL to 0.9.3. [#53060](https://github.com/ClickHouse/ClickHouse/pull/53060) ([Maximilian Roos](https://github.com/max-sixty)). +* System tables from CI checks are exported to ClickHouse Cloud. [#53086](https://github.com/ClickHouse/ClickHouse/pull/53086) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud. [#53100](https://github.com/ClickHouse/ClickHouse/pull/53100) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up Debug and Tidy builds. [#53178](https://github.com/ClickHouse/ClickHouse/pull/53178) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up the build by removing tons and tonnes of garbage. One of the frequently included headers was poisoned by boost. [#53180](https://github.com/ClickHouse/ClickHouse/pull/53180) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove even more garbage. [#53182](https://github.com/ClickHouse/ClickHouse/pull/53182) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The function `arrayAUC` was using heavy C++ templates - ditched them. [#53183](https://github.com/ClickHouse/ClickHouse/pull/53183) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some translation units were always rebuilt regardless of ccache. The culprit is found and fixed. [#53184](https://github.com/ClickHouse/ClickHouse/pull/53184) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in stateful tests to ClickHouse Cloud. [#53351](https://github.com/ClickHouse/ClickHouse/pull/53351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in stress tests. [#53353](https://github.com/ClickHouse/ClickHouse/pull/53353) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in fuzzer. [#53354](https://github.com/ClickHouse/ClickHouse/pull/53354) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Follow up for [#53418](https://github.com/ClickHouse/ClickHouse/issues/53418). Small improvements for install_check.py, adding tests for proper ENV parameters passing to the main process on `init.d start`. [#53457](https://github.com/ClickHouse/ClickHouse/pull/53457) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Reorganize file management in CMake to prevent potential duplications. For instance, `indexHint.cpp` is duplicated in both `dbms_sources` and `clickhouse_functions_sources`. [#53621](https://github.com/ClickHouse/ClickHouse/pull/53621) ([Amos Bird](https://github.com/amosbird)). +* Upgrade snappy to 1.1.10. [#53672](https://github.com/ClickHouse/ClickHouse/pull/53672) ([李扬](https://github.com/taiyang-li)). +* Slightly improve cmake build by sanitizing some dependencies and removing some duplicates. Each commit includes a short description of the changes made. [#53759](https://github.com/ClickHouse/ClickHouse/pull/53759) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Do not reset (experimental) Annoy index during build-up with more than one mark [#51325](https://github.com/ClickHouse/ClickHouse/pull/51325) ([Tian Xinhui](https://github.com/xinhuitian)). +* Fix usage of temporary directories during RESTORE [#51493](https://github.com/ClickHouse/ClickHouse/pull/51493) ([Azat Khuzhin](https://github.com/azat)). +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 data types as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* A fix for checksum of compress marks [#51777](https://github.com/ClickHouse/ClickHouse/pull/51777) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix mistakenly comma parsing as part of datetime in CSV best effort parsing [#51950](https://github.com/ClickHouse/ClickHouse/pull/51950) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't throw exception when executable UDF has parameters [#51961](https://github.com/ClickHouse/ClickHouse/pull/51961) ([Nikita Taranov](https://github.com/nickitat)). +* Fix recalculation of skip indexes and projections in `ALTER DELETE` queries [#52530](https://github.com/ClickHouse/ClickHouse/pull/52530) ([Anton Popov](https://github.com/CurtizJ)). +* MaterializedMySQL: Fix the infinite loop in ReadBuffer::read [#52621](https://github.com/ClickHouse/ClickHouse/pull/52621) ([Val Doroshchuk](https://github.com/valbok)). +* Load suggestion only with `clickhouse` dialect [#52628](https://github.com/ClickHouse/ClickHouse/pull/52628) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Init and destroy ares channel on demand. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix filtering by virtual columns with OR expression [#52653](https://github.com/ClickHouse/ClickHouse/pull/52653) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* Fix named collections on cluster [#52687](https://github.com/ClickHouse/ClickHouse/pull/52687) ([Al Korgun](https://github.com/alkorgun)). +* Fix reading of unnecessary column in case of multistage `PREWHERE` [#52689](https://github.com/ClickHouse/ClickHouse/pull/52689) ([Anton Popov](https://github.com/CurtizJ)). +* Fix unexpected sort result on multi columns with nulls first direction [#52761](https://github.com/ClickHouse/ClickHouse/pull/52761) ([copperybean](https://github.com/copperybean)). +* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix sorting of sparse columns with large limit [#52827](https://github.com/ClickHouse/ClickHouse/pull/52827) ([Anton Popov](https://github.com/CurtizJ)). +* clickhouse-keeper: fix implementation of server with poll. [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Make regexp analyzer recognize named capturing groups [#52840](https://github.com/ClickHouse/ClickHouse/pull/52840) ([Han Fei](https://github.com/hanfei1991)). +* Fix possible assert in `~PushingAsyncPipelineExecutor` in clickhouse-local [#52862](https://github.com/ClickHouse/ClickHouse/pull/52862) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading of empty `Nested(Array(LowCardinality(...)))` [#52949](https://github.com/ClickHouse/ClickHouse/pull/52949) ([Anton Popov](https://github.com/CurtizJ)). +* Added new tests for session_log and fixed the inconsistency between login and logout. [#52958](https://github.com/ClickHouse/ClickHouse/pull/52958) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Convert sparse column format to full in CreateSetAndFilterOnTheFlyStep [#53000](https://github.com/ClickHouse/ClickHouse/pull/53000) ([vdimir](https://github.com/vdimir)). +* Fix rare race condition with empty key prefix directory deletion in fs cache [#53055](https://github.com/ClickHouse/ClickHouse/pull/53055) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix ZstdDeflatingWriteBuffer truncating the output sometimes [#53064](https://github.com/ClickHouse/ClickHouse/pull/53064) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix query_id in part_log with async flush queries [#53103](https://github.com/ClickHouse/ClickHouse/pull/53103) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible error from cache "Read unexpected size" [#53121](https://github.com/ClickHouse/ClickHouse/pull/53121) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable the new parquet encoder [#53130](https://github.com/ClickHouse/ClickHouse/pull/53130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix "Not-ready Set" exception [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix character escaping in the PostgreSQL engine [#53250](https://github.com/ClickHouse/ClickHouse/pull/53250) ([Nikolay Degterinsky](https://github.com/evillique)). +* Experimental session_log table: Added new tests for session_log and fixed the inconsistency between login and logout. [#53255](https://github.com/ClickHouse/ClickHouse/pull/53255) ([Alexey Gerasimchuck](https://github.com/Demilivor)). Fixed inconsistency between login success and logout [#53302](https://github.com/ClickHouse/ClickHouse/pull/53302) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix adding sub-second intervals to DateTime [#53309](https://github.com/ClickHouse/ClickHouse/pull/53309) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix "Context has expired" error in dictionaries [#53342](https://github.com/ClickHouse/ClickHouse/pull/53342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Forbid use_structure_from_insertion_table_in_table_functions when execute Scalar [#53348](https://github.com/ClickHouse/ClickHouse/pull/53348) ([flynn](https://github.com/ucasfl)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fixed system.data_skipping_indices for MaterializedMySQL [#53381](https://github.com/ClickHouse/ClickHouse/pull/53381) ([Filipp Ozinov](https://github.com/bakwc)). +* Fix processing single carriage return in TSV file segmentation engine [#53407](https://github.com/ClickHouse/ClickHouse/pull/53407) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `Context has expired` error properly [#53433](https://github.com/ClickHouse/ClickHouse/pull/53433) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix `timeout_overflow_mode` when having subquery in the rhs of IN [#53439](https://github.com/ClickHouse/ClickHouse/pull/53439) ([Duc Canh Le](https://github.com/canhld94)). +* Fix an unexpected behavior in [#53152](https://github.com/ClickHouse/ClickHouse/issues/53152) [#53440](https://github.com/ClickHouse/ClickHouse/pull/53440) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Fix JSON_QUERY Function parse error while path is all number [#53470](https://github.com/ClickHouse/ClickHouse/pull/53470) ([KevinyhZou](https://github.com/KevinyhZou)). +* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed SELECTing from ReplacingMergeTree with do_not_merge_across_partitions_select_final [#53511](https://github.com/ClickHouse/ClickHouse/pull/53511) ([Vasily Nemkov](https://github.com/Enmk)). +* Flush async insert queue first on shutdown [#53547](https://github.com/ClickHouse/ClickHouse/pull/53547) ([joelynch](https://github.com/joelynch)). +* Fix crash in join on sparse columna [#53548](https://github.com/ClickHouse/ClickHouse/pull/53548) ([vdimir](https://github.com/vdimir)). +* Fix possible UB in Set skipping index for functions with incorrect args [#53559](https://github.com/ClickHouse/ClickHouse/pull/53559) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible UB in inverted indexes (experimental feature) [#53560](https://github.com/ClickHouse/ClickHouse/pull/53560) ([Azat Khuzhin](https://github.com/azat)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix number of dropped granules in EXPLAIN PLAN index=1 [#53616](https://github.com/ClickHouse/ClickHouse/pull/53616) ([wangxiaobo](https://github.com/wzb5212)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Prepared set cache in mutation pipeline stuck [#53645](https://github.com/ClickHouse/ClickHouse/pull/53645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug on mutations with subcolumns of type JSON in predicates of UPDATE and DELETE queries. [#53677](https://github.com/ClickHouse/ClickHouse/pull/53677) ([VanDarkholme7](https://github.com/VanDarkholme7)). +* Fix filter pushdown for full_sorting_merge join [#53699](https://github.com/ClickHouse/ClickHouse/pull/53699) ([vdimir](https://github.com/vdimir)). +* Try to fix bug with `NULL::LowCardinality(Nullable(...)) NOT IN` [#53706](https://github.com/ClickHouse/ClickHouse/pull/53706) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* `transform`: correctly handle default column with multiple rows [#53742](https://github.com/ClickHouse/ClickHouse/pull/53742) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix fuzzer crash in parseDateTime [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). +* MaterializedPostgreSQL: fix uncaught exception in getCreateTableQueryImpl [#53832](https://github.com/ClickHouse/ClickHouse/pull/53832) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible segfault while using PostgreSQL engine [#53847](https://github.com/ClickHouse/ClickHouse/pull/53847) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix named_collection_admin alias [#54066](https://github.com/ClickHouse/ClickHouse/pull/54066) ([Kseniia Sumarokova](https://github.com/kssenii)). + +### ClickHouse release 23.7, 2023-07-27 + +#### Backward Incompatible Change +* Add `NAMED COLLECTION` access type (aliases `USE NAMED COLLECTION`, `NAMED COLLECTION USAGE`). This PR is backward incompatible because this access type is disabled by default (because a parent access type `NAMED COLLECTION ADMIN` is disabled by default as well). Proposed in [#50277](https://github.com/ClickHouse/ClickHouse/issues/50277). To grant use `GRANT NAMED COLLECTION ON collection_name TO user` or `GRANT NAMED COLLECTION ON * TO user`, to be able to give these grants `named_collection_admin` is required in config (previously it was named `named_collection_control`, so will remain as an alias). [#50625](https://github.com/ClickHouse/ClickHouse/pull/50625) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixing a typo in the `system.parts` column name `last_removal_attemp_time`. Now it is named `last_removal_attempt_time`. [#52104](https://github.com/ClickHouse/ClickHouse/pull/52104) ([filimonov](https://github.com/filimonov)). +* Bump version of the distributed_ddl_entry_format_version to 5 by default (enables opentelemetry and initial_query_idd pass through). This will not allow to process existing entries for distributed DDL after *downgrade* (but note, that usually there should be no such unprocessed entries). [#52128](https://github.com/ClickHouse/ClickHouse/pull/52128) ([Azat Khuzhin](https://github.com/azat)). +* Check projection metadata the same way we check ordinary metadata. This change may prevent the server from starting in case there was a table with an invalid projection. An example is a projection that created positional columns in PK (e.g. `projection p (select * order by 1, 4)` which is not allowed in table PK and can cause a crash during insert/merge). Drop such projections before the update. Fixes [#52353](https://github.com/ClickHouse/ClickHouse/issues/52353). [#52361](https://github.com/ClickHouse/ClickHouse/pull/52361) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). +* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)). +* Kafka connector can fetch Avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)). +* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Add a column `is_obsolete` to `system.settings` and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)). +* Implement support of encrypted elements in configuration file. Added possibility to use encrypted text in leaf elements of configuration file. The text is encrypted using encryption codecs from `` section. [#50986](https://github.com/ClickHouse/ClickHouse/pull/50986) ([Roman Vasin](https://github.com/rvasin)). +* Grace Hash Join algorithm is now applicable to FULL and RIGHT JOINs. [#49483](https://github.com/ClickHouse/ClickHouse/issues/49483). [#51013](https://github.com/ClickHouse/ClickHouse/pull/51013) ([lgbo](https://github.com/lgbo-ustc)). +* Add `SYSTEM STOP LISTEN` query for more graceful termination. Closes [#47972](https://github.com/ClickHouse/ClickHouse/issues/47972). [#51016](https://github.com/ClickHouse/ClickHouse/pull/51016) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add `input_format_csv_allow_variable_number_of_columns` options. [#51273](https://github.com/ClickHouse/ClickHouse/pull/51273) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Another boring feature: add function `substring_index`, as in Spark or MySQL. [#51472](https://github.com/ClickHouse/ClickHouse/pull/51472) ([李扬](https://github.com/taiyang-li)). +* A system table `jemalloc_bins` to show stats for jemalloc bins. Example `SELECT *, size * (nmalloc - ndalloc) AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 ORDER BY allocated_bytes DESC LIMIT 10`. Enjoy. [#51674](https://github.com/ClickHouse/ClickHouse/pull/51674) ([Alexander Gololobov](https://github.com/davenger)). +* Add `RowBinaryWithDefaults` format with extra byte before each column as a flag for using the column's default value. Closes [#50854](https://github.com/ClickHouse/ClickHouse/issues/50854). [#51695](https://github.com/ClickHouse/ClickHouse/pull/51695) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `default_temporary_table_engine` setting. Same as `default_table_engine` but for temporary tables. [#51292](https://github.com/ClickHouse/ClickHouse/issues/51292). [#51708](https://github.com/ClickHouse/ClickHouse/pull/51708) ([velavokr](https://github.com/velavokr)). +* Added new `initcap` / `initcapUTF8` functions which convert the first letter of each word to upper case and the rest to lower case. [#51735](https://github.com/ClickHouse/ClickHouse/pull/51735) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Create table now supports `PRIMARY KEY` syntax in column definition. Columns are added to primary index in the same order columns are defined. [#51881](https://github.com/ClickHouse/ClickHouse/pull/51881) ([Ilya Yatsishin](https://github.com/qoega)). +* Added the possibility to use date and time format specifiers in log and error log file names, either in config files (`log` and `errorlog` tags) or command line arguments (`--log-file` and `--errorlog-file`). [#51945](https://github.com/ClickHouse/ClickHouse/pull/51945) ([Victor Krasnov](https://github.com/sirvickr)). +* Added Peak Memory Usage statistic to HTTP headers. [#51946](https://github.com/ClickHouse/ClickHouse/pull/51946) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added new `hasSubsequence` (+`CaseInsensitive` and `UTF8` versions) functions to match subsequences in strings. [#52050](https://github.com/ClickHouse/ClickHouse/pull/52050) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Add `array_agg` as alias of `groupArray` for PostgreSQL compatibility. Closes [#52100](https://github.com/ClickHouse/ClickHouse/issues/52100). ### Documentation entry for user-facing changes. [#52135](https://github.com/ClickHouse/ClickHouse/pull/52135) ([flynn](https://github.com/ucasfl)). +* Add `any_value` as a compatibility alias for `any` aggregate function. Closes [#52140](https://github.com/ClickHouse/ClickHouse/issues/52140). [#52147](https://github.com/ClickHouse/ClickHouse/pull/52147) ([flynn](https://github.com/ucasfl)). +* Add aggregate function `array_concat_agg` for compatibility with BigQuery, it's alias of `groupArrayArray`. Closes [#52139](https://github.com/ClickHouse/ClickHouse/issues/52139). [#52149](https://github.com/ClickHouse/ClickHouse/pull/52149) ([flynn](https://github.com/ucasfl)). +* Add `OCTET_LENGTH` as an alias to `length`. Closes [#52153](https://github.com/ClickHouse/ClickHouse/issues/52153). [#52176](https://github.com/ClickHouse/ClickHouse/pull/52176) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Added `firstLine` function to extract the first line from the multi-line string. This closes [#51172](https://github.com/ClickHouse/ClickHouse/issues/51172). [#52209](https://github.com/ClickHouse/ClickHouse/pull/52209) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Implement KQL-style formatting for the `Interval` data type. This is only needed for compatibility with the `Kusto` query language. [#45671](https://github.com/ClickHouse/ClickHouse/pull/45671) ([ltrk2](https://github.com/ltrk2)). +* Added query `SYSTEM FLUSH ASYNC INSERT QUEUE` which flushes all pending asynchronous inserts to the destination tables. Added a server-side setting `async_insert_queue_flush_on_shutdown` (`true` by default) which determines whether to flush queue of asynchronous inserts on graceful shutdown. Setting `async_insert_threads` is now a server-side setting. [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) ([Anton Popov](https://github.com/CurtizJ)). +* Aliases `current_database` and a new function `current_schemas` for compatibility with PostgreSQL. [#51076](https://github.com/ClickHouse/ClickHouse/pull/51076) ([Pedro Riera](https://github.com/priera)). +* Add alias for functions `today` (now available under the `curdate`/`current_date` names) and `now` (`current_timestamp`). [#52106](https://github.com/ClickHouse/ClickHouse/pull/52106) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). +* Support `async_deduplication_token` for async insert. [#52136](https://github.com/ClickHouse/ClickHouse/pull/52136) ([Han Fei](https://github.com/hanfei1991)). +* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Performance Improvement +* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)). +* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)). +* Now we use fixed-size tasks in `MergeTreePrefetchedReadPool` as in `MergeTreeReadPool`. Also from now we use connection pool for S3 requests. [#49732](https://github.com/ClickHouse/ClickHouse/pull/49732) ([Nikita Taranov](https://github.com/nickitat)). +* More pushdown to the right side of join. [#50532](https://github.com/ClickHouse/ClickHouse/pull/50532) ([Nikita Taranov](https://github.com/nickitat)). +* Improve grace_hash join by reserving hash table's size (resubmit). [#50875](https://github.com/ClickHouse/ClickHouse/pull/50875) ([lgbo](https://github.com/lgbo-ustc)). +* Waiting on lock in `OpenedFileCache` could be noticeable sometimes. We sharded it into multiple sub-maps (each with its own lock) to avoid contention. [#51341](https://github.com/ClickHouse/ClickHouse/pull/51341) ([Nikita Taranov](https://github.com/nickitat)). +* Move conditions with primary key columns to the end of PREWHERE chain. The idea is that conditions with PK columns are likely to be used in PK analysis and will not contribute much more to PREWHERE filtering. [#51958](https://github.com/ClickHouse/ClickHouse/pull/51958) ([Alexander Gololobov](https://github.com/davenger)). +* Speed up `COUNT(DISTINCT)` for String types by inlining SipHash. The performance experiments of *OnTime* on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of *11.6%* to the QPS of the query *Q8* while having no impact on others. [#52036](https://github.com/ClickHouse/ClickHouse/pull/52036) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Enable `allow_vertical_merges_from_compact_to_wide_parts` by default. It will save memory usage during merges. [#52295](https://github.com/ClickHouse/ClickHouse/pull/52295) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1`. This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823). This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173). [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). +* Reduce the number of syscalls in `FileCache::loadMetadata` - this speeds up server startup if the filesystem cache is configured. [#52435](https://github.com/ClickHouse/ClickHouse/pull/52435) ([Raúl Marín](https://github.com/Algunenano)). +* Allow to have strict lower boundary for file segment size by downloading remaining data in the background. Minimum size of file segment (if actual file size is bigger) is configured as cache configuration setting `boundary_alignment`, by default `4Mi`. Number of background threads are configured as cache configuration setting `background_download_threads`, by default `2`. Also `max_file_segment_size` was increased from `8Mi` to `32Mi` in this PR. [#51000](https://github.com/ClickHouse/ClickHouse/pull/51000) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Decreased default timeouts for S3 from 30 seconds to 3 seconds, and for other HTTP from 180 seconds to 30 seconds. [#51171](https://github.com/ClickHouse/ClickHouse/pull/51171) ([Michael Kolupaev](https://github.com/al13n321)). +* New setting `merge_tree_determine_task_size_by_prewhere_columns` added. If set to `true` only sizes of the columns from `PREWHERE` section will be considered to determine reading task size. Otherwise all the columns from query are considered. [#52606](https://github.com/ClickHouse/ClickHouse/pull/52606) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions for better progress indication. [#51286](https://github.com/ClickHouse/ClickHouse/pull/51286) ([Kruglov Pavel](https://github.com/Avogar)). +* Introduce a table setting `wait_for_unique_parts_send_before_shutdown_ms` which specify the amount of time replica will wait before closing interserver handler for replicated sends. Also fix inconsistency with shutdown of tables and interserver handlers: now server shutdown tables first and only after it shut down interserver handlers. [#51851](https://github.com/ClickHouse/ClickHouse/pull/51851) ([alesapin](https://github.com/alesapin)). +* Allow SQL standard `FETCH` without `OFFSET`. See https://antonz.org/sql-fetch/. [#51293](https://github.com/ClickHouse/ClickHouse/pull/51293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow filtering HTTP headers for the URL/S3 table functions with the new `http_forbid_headers` section in config. Both exact matching and regexp filters are available. [#51038](https://github.com/ClickHouse/ClickHouse/pull/51038) ([Nikolay Degterinsky](https://github.com/evillique)). +* Don't show messages about `16 EiB` free space in logs, as they don't make sense. This closes [#49320](https://github.com/ClickHouse/ClickHouse/issues/49320). [#49342](https://github.com/ClickHouse/ClickHouse/pull/49342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Properly check the limit for the `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block`. This is needed for generic query fuzzer. [#49343](https://github.com/ClickHouse/ClickHouse/pull/49343) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix two issues in `geoHash` functions. [#50066](https://github.com/ClickHouse/ClickHouse/pull/50066) ([李扬](https://github.com/taiyang-li)). +* Log async insert flush queries into `system.query_log`. [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) ([Raúl Marín](https://github.com/Algunenano)). +* Functions `date_diff` and `age` now support millisecond/microsecond unit and work with microsecond precision. [#51291](https://github.com/ClickHouse/ClickHouse/pull/51291) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Improve parsing of path in clickhouse-keeper-client. [#51359](https://github.com/ClickHouse/ClickHouse/pull/51359) ([Azat Khuzhin](https://github.com/azat)). +* A third-party product depending on ClickHouse (Gluten: a Plugin to Double SparkSQL's Performance) had a bug. This fix avoids heap overflow in that third-party product while reading from HDFS. [#51386](https://github.com/ClickHouse/ClickHouse/pull/51386) ([李扬](https://github.com/taiyang-li)). +* Add ability to disable native copy for S3 (setting for BACKUP/RESTORE `allow_s3_native_copy`, and `s3_allow_native_copy` for `s3`/`s3_plain` disks). [#51448](https://github.com/ClickHouse/ClickHouse/pull/51448) ([Azat Khuzhin](https://github.com/azat)). +* Add column `primary_key_size` to `system.parts` table to show compressed primary key size on disk. Closes [#51400](https://github.com/ClickHouse/ClickHouse/issues/51400). [#51496](https://github.com/ClickHouse/ClickHouse/pull/51496) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Allow running `clickhouse-local` without procfs, without home directory existing, and without name resolution plugins from glibc. [#51518](https://github.com/ClickHouse/ClickHouse/pull/51518) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add placeholder `%a` for rull filename in rename_files_after_processing setting. [#51603](https://github.com/ClickHouse/ClickHouse/pull/51603) ([Kruglov Pavel](https://github.com/Avogar)). +* Add column `modification_time` into `system.parts_columns`. [#51685](https://github.com/ClickHouse/ClickHouse/pull/51685) ([Azat Khuzhin](https://github.com/azat)). +* Add new setting `input_format_csv_use_default_on_bad_values` to CSV format that allows to insert default value when parsing of a single field failed. [#51716](https://github.com/ClickHouse/ClickHouse/pull/51716) ([KevinyhZou](https://github.com/KevinyhZou)). +* Added a crash log flush to the disk after the unexpected crash. [#51720](https://github.com/ClickHouse/ClickHouse/pull/51720) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix behavior in dashboard page where errors unrelated to authentication are not shown. Also fix 'overlapping' chart behavior. [#51744](https://github.com/ClickHouse/ClickHouse/pull/51744) ([Zach Naimon](https://github.com/ArctypeZach)). +* Allow UUID to UInt128 conversion. [#51765](https://github.com/ClickHouse/ClickHouse/pull/51765) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added support for function `range` of Nullable arguments. [#51767](https://github.com/ClickHouse/ClickHouse/pull/51767) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Convert condition like `toyear(x) = c` to `c1 <= x < c2`. [#51795](https://github.com/ClickHouse/ClickHouse/pull/51795) ([Han Fei](https://github.com/hanfei1991)). +* Improve MySQL compatibility of the statement `SHOW INDEX`. [#51796](https://github.com/ClickHouse/ClickHouse/pull/51796) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix `use_structure_from_insertion_table_in_table_functions` does not work with `MATERIALIZED` and `ALIAS` columns. Closes [#51817](https://github.com/ClickHouse/ClickHouse/issues/51817). Closes [#51019](https://github.com/ClickHouse/ClickHouse/issues/51019). [#51825](https://github.com/ClickHouse/ClickHouse/pull/51825) ([flynn](https://github.com/ucasfl)). +* Cache dictionary now requests only unique keys from source. Closes [#51762](https://github.com/ClickHouse/ClickHouse/issues/51762). [#51853](https://github.com/ClickHouse/ClickHouse/pull/51853) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed the case when settings were not applied for EXPLAIN query when FORMAT was provided. [#51859](https://github.com/ClickHouse/ClickHouse/pull/51859) ([Nikita Taranov](https://github.com/nickitat)). +* Allow SETTINGS before FORMAT in DESCRIBE TABLE query for compatibility with SELECT query. Closes [#51544](https://github.com/ClickHouse/ClickHouse/issues/51544). [#51899](https://github.com/ClickHouse/ClickHouse/pull/51899) ([Nikolay Degterinsky](https://github.com/evillique)). +* Var-Int encoded integers (e.g. used by the native protocol) can now use the full 64-bit range. 3rd party clients are advised to update their var-int code accordingly. [#51905](https://github.com/ClickHouse/ClickHouse/pull/51905) ([Robert Schulze](https://github.com/rschu1ze)). +* Update certificates when they change without the need to manually SYSTEM RELOAD CONFIG. [#52030](https://github.com/ClickHouse/ClickHouse/pull/52030) ([Mike Kot](https://github.com/myrrc)). +* Added `allow_create_index_without_type` setting that allow to ignore `ADD INDEX` queries without specified `TYPE`. Standard SQL queries will just succeed without changing table schema. [#52056](https://github.com/ClickHouse/ClickHouse/pull/52056) ([Ilya Yatsishin](https://github.com/qoega)). +* Log messages are written to the `system.text_log` from the server startup. [#52113](https://github.com/ClickHouse/ClickHouse/pull/52113) ([Dmitry Kardymon](https://github.com/kardymonds)). +* In cases where the HTTP endpoint has multiple IP addresses and the first of them is unreachable, a timeout exception was thrown. Made session creation with handling all resolved endpoints. [#52116](https://github.com/ClickHouse/ClickHouse/pull/52116) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Avro input format now supports Union even if it contains only a single type. Closes [#52131](https://github.com/ClickHouse/ClickHouse/issues/52131). [#52137](https://github.com/ClickHouse/ClickHouse/pull/52137) ([flynn](https://github.com/ucasfl)). +* Add setting `optimize_use_implicit_projections` to disable implicit projections (currently only `min_max_count` projection). [#52152](https://github.com/ClickHouse/ClickHouse/pull/52152) ([Amos Bird](https://github.com/amosbird)). +* It was possible to use the function `hasToken` for infinite loop. Now this possibility is removed. This closes [#52156](https://github.com/ClickHouse/ClickHouse/issues/52156). [#52160](https://github.com/ClickHouse/ClickHouse/pull/52160) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Create ZK ancestors optimistically. [#52195](https://github.com/ClickHouse/ClickHouse/pull/52195) ([Raúl Marín](https://github.com/Algunenano)). +* Fix [#50582](https://github.com/ClickHouse/ClickHouse/issues/50582). Avoid the `Not found column ... in block` error in some cases of reading in-order and constants. [#52259](https://github.com/ClickHouse/ClickHouse/pull/52259) ([Chen768959](https://github.com/Chen768959)). +* Check whether S2 geo primitives are invalid as early as possible on ClickHouse side. This closes: [#27090](https://github.com/ClickHouse/ClickHouse/issues/27090). [#52260](https://github.com/ClickHouse/ClickHouse/pull/52260) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add back missing projection QueryAccessInfo when `query_plan_optimize_projection = 1`. This fixes [#50183](https://github.com/ClickHouse/ClickHouse/issues/50183) . This fixes [#50093](https://github.com/ClickHouse/ClickHouse/issues/50093). [#52327](https://github.com/ClickHouse/ClickHouse/pull/52327) ([Amos Bird](https://github.com/amosbird)). +* When `ZooKeeperRetriesControl` rethrows an error, it's more useful to see its original stack trace, not the one from `ZooKeeperRetriesControl` itself. [#52347](https://github.com/ClickHouse/ClickHouse/pull/52347) ([Vitaly Baranov](https://github.com/vitlibar)). +* Wait for zero copy replication lock even if some disks don't support it. [#52376](https://github.com/ClickHouse/ClickHouse/pull/52376) ([Raúl Marín](https://github.com/Algunenano)). +* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)). + +#### Experimental Feature +* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). This is controlled by the setting `output_format_parquet_use_custom_encoder` which is disabled by default, because the feature is non-ideal. +* Added support for [PRQL](https://prql-lang.org/) as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). This syntax can be changed in this release. +* (experimental MaterializedMySQL) Fixed crash when `mysqlxx::Pool::Entry` is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) `CREATE TABLE ... AS SELECT` .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Introduced automatic conversion of text types to utf8 for MaterializedMySQL. [#52084](https://github.com/ClickHouse/ClickHouse/pull/52084) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Now unquoted UTF-8 strings are supported in DDL for MaterializedMySQL. [#52318](https://github.com/ClickHouse/ClickHouse/pull/52318) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)). +* Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)). +* The `session_timezone` setting (new in version 23.6) is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). It is suspected that this feature is incomplete. + +#### Build/Testing/Packaging Improvement +* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add integration test check with the enabled Analyzer. [#50926](https://github.com/ClickHouse/ClickHouse/pull/50926) [#52210](https://github.com/ClickHouse/ClickHouse/pull/52210) ([Dmitry Novik](https://github.com/novikd)). +* Reproducible builds for Rust. [#52395](https://github.com/ClickHouse/ClickHouse/pull/52395) ([Azat Khuzhin](https://github.com/azat)). +* Update Cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). +* Make the function `CHColumnToArrowColumn::fillArrowArrayWithArrayColumnData` to work with nullable arrays, which are not possible in ClickHouse, but needed for Gluten. [#52112](https://github.com/ClickHouse/ClickHouse/pull/52112) ([李扬](https://github.com/taiyang-li)). +* We've updated the CCTZ library to master, but there are no user-visible changes. [#52124](https://github.com/ClickHouse/ClickHouse/pull/52124) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `system.licenses` table now includes the hard-forked library Poco. This closes [#52066](https://github.com/ClickHouse/ClickHouse/issues/52066). [#52127](https://github.com/ClickHouse/ClickHouse/pull/52127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check that there are no cases of bad punctuation: whitespace before a comma like `Hello ,world` instead of `Hello, world`. [#52549](https://github.com/ClickHouse/ClickHouse/pull/52549) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix MaterializedPostgreSQL syncTables [#49698](https://github.com/ClickHouse/ClickHouse/pull/49698) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix projection with optimize_aggregators_of_group_by_keys [#49709](https://github.com/ClickHouse/ClickHouse/pull/49709) ([Amos Bird](https://github.com/amosbird)). +* Fix optimize_skip_unused_shards with JOINs [#51037](https://github.com/ClickHouse/ClickHouse/pull/51037) ([Azat Khuzhin](https://github.com/azat)). +* Fix formatDateTime() with fractional negative datetime64 [#51290](https://github.com/ClickHouse/ClickHouse/pull/51290) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Functions `hasToken*` were totally wrong. Add a test for [#43358](https://github.com/ClickHouse/ClickHouse/issues/43358) [#51378](https://github.com/ClickHouse/ClickHouse/pull/51378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix SIGSEGV for clusters with zero weight across all shards (fixes INSERT INTO FUNCTION clusterAllReplicas()) [#51545](https://github.com/ClickHouse/ClickHouse/pull/51545) ([Azat Khuzhin](https://github.com/azat)). +* Fix timeout for hedged requests [#51582](https://github.com/ClickHouse/ClickHouse/pull/51582) ([Azat Khuzhin](https://github.com/azat)). +* Fix logical error in ANTI join with NULL [#51601](https://github.com/ClickHouse/ClickHouse/pull/51601) ([vdimir](https://github.com/vdimir)). +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join [#51633](https://github.com/ClickHouse/ClickHouse/pull/51633) ([vdimir](https://github.com/vdimir)). +* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Fix segfault when create invalid EmbeddedRocksdb table [#51847](https://github.com/ClickHouse/ClickHouse/pull/51847) ([Duc Canh Le](https://github.com/canhld94)). +* Fix inserts into MongoDB tables [#51876](https://github.com/ClickHouse/ClickHouse/pull/51876) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix error in subquery operators [#51922](https://github.com/ClickHouse/ClickHouse/pull/51922) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not remove inputs after ActionsDAG::merge [#51947](https://github.com/ClickHouse/ClickHouse/pull/51947) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fix for toDateTime64() for dates after 2283-12-31 [#52130](https://github.com/ClickHouse/ClickHouse/pull/52130) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis when aggregation expression contains monotonic functions [#52151](https://github.com/ClickHouse/ClickHouse/pull/52151) ([Amos Bird](https://github.com/amosbird)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable direct join for range dictionary [#52187](https://github.com/ClickHouse/ClickHouse/pull/52187) ([Duc Canh Le](https://github.com/canhld94)). +* Fix sticky mutations test (and extremely rare race condition) [#52197](https://github.com/ClickHouse/ClickHouse/pull/52197) ([alesapin](https://github.com/alesapin)). +* Fix race in Web disk [#52211](https://github.com/ClickHouse/ClickHouse/pull/52211) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix data race in Connection::setAsyncCallback on unknown packet from server [#52219](https://github.com/ClickHouse/ClickHouse/pull/52219) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix temp data deletion on startup, add test [#52275](https://github.com/ClickHouse/ClickHouse/pull/52275) ([vdimir](https://github.com/vdimir)). +* Don't use minmax_count projections when counting nullable columns [#52297](https://github.com/ClickHouse/ClickHouse/pull/52297) ([Amos Bird](https://github.com/amosbird)). +* MergeTree/ReplicatedMergeTree should use server timezone for log entries [#52325](https://github.com/ClickHouse/ClickHouse/pull/52325) ([Azat Khuzhin](https://github.com/azat)). +* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)). +* Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)). +* Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() [#52490](https://github.com/ClickHouse/ClickHouse/pull/52490) ([Azat Khuzhin](https://github.com/azat)). +* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). + + ### ClickHouse release 23.6, 2023-06-29 #### Backward Incompatible Change diff --git a/CMakeLists.txt b/CMakeLists.txt index 45c3c422d7a..65ff9dc5384 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,7 +118,11 @@ endif() # - sanitize.cmake add_library(global-libs INTERFACE) -include (cmake/fuzzer.cmake) +# We don't want to instrument everything with fuzzer, but only specific targets (see below), +# also, since we build our own llvm, we specifically don't want to instrument +# libFuzzer library itself - it would result in infinite recursion +#include (cmake/fuzzer.cmake) + include (cmake/sanitize.cmake) option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON) @@ -165,8 +169,14 @@ elseif(GLIBC_COMPATIBILITY) message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration") endif () -# Make sure the final executable has symbols exported -set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") +if (OS_LINUX) + # We should not export dynamic symbols, because: + # - The main clickhouse binary does not use dlopen, + # and whatever is poisoning it by LD_PRELOAD should not link to our symbols. + # - The clickhouse-odbc-bridge and clickhouse-library-bridge binaries + # should not expose their symbols to ODBC drivers and libraries. + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") +endif () if (OS_DARWIN) # The `-all_load` flag forces loading of all symbols from all libraries, @@ -202,9 +212,6 @@ option(OMIT_HEAVY_DEBUG_SYMBOLS "Do not generate debugger info for heavy modules (ClickHouse functions and dictionaries, some contrib)" ${OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT}) -if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") - set(USE_DEBUG_HELPERS ON) -endif() option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS}) option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF) @@ -555,6 +562,46 @@ add_subdirectory (programs) add_subdirectory (tests) add_subdirectory (utils) +# Function get_all_targets collects all targets recursively +function(get_all_targets var) + macro(get_all_targets_recursive targets dir) + get_property(subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES) + foreach(subdir ${subdirectories}) + get_all_targets_recursive(${targets} ${subdir}) + endforeach() + get_property(current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS) + list(APPEND ${targets} ${current_targets}) + endmacro() + + set(targets) + get_all_targets_recursive(targets ${CMAKE_CURRENT_SOURCE_DIR}) + set(${var} ${targets} PARENT_SCOPE) +endfunction() + +if (FUZZER) + # Bundle fuzzers target + add_custom_target(fuzzers) + # Instrument all targets fuzzer and link with libfuzzer + get_all_targets(all_targets) + foreach(target ${all_targets}) + if (NOT(target STREQUAL "_fuzzer" OR target STREQUAL "_fuzzer_no_main")) + get_target_property(target_type ${target} TYPE) + if (NOT(target_type STREQUAL "INTERFACE_LIBRARY" OR target_type STREQUAL "UTILITY")) + target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link") + endif() + # clickhouse fuzzer isn't working correctly + # initial PR https://github.com/ClickHouse/ClickHouse/pull/27526 + #if (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse") + if (target MATCHES ".+_fuzzer") + message(STATUS "${target} instrumented with fuzzer") + target_link_libraries(${target} PUBLIC ch_contrib::fuzzer) + # Add to fuzzers bundle + add_dependencies(fuzzers ${target}) + endif() + endif() + endforeach() +endif() + include (cmake/sanitize_targets.cmake) # Build native targets if necessary diff --git a/PreLoad.cmake b/PreLoad.cmake index b456c724cc6..4879e721ae3 100644 --- a/PreLoad.cmake +++ b/PreLoad.cmake @@ -78,6 +78,14 @@ if (OS MATCHES "Linux" AND ("$ENV{CC}" MATCHES ".*clang.*" OR CMAKE_C_COMPILER MATCHES ".*clang.*")) if (ARCH MATCHES "amd64|x86_64") + # NOTE: right now musl is not ready, since unwind is too slow with it + # + # FWIW the following had been tried: + # - update musl + # - compile musl with debug + # - compile musl with debug and -fasynchronous-unwind-tables + # + # But none of this changes anything so far. set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-x86_64.cmake" CACHE INTERNAL "") elseif (ARCH MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)") set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-aarch64.cmake" CACHE INTERNAL "") diff --git a/README.md b/README.md index f0a7dbe2408..4ff9b9caaa1 100644 --- a/README.md +++ b/README.md @@ -23,11 +23,8 @@ curl https://clickhouse.com/ | sh ## Upcoming Events -* [**v23.7 Release Webinar**](https://clickhouse.com/company/events/v23-7-community-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-07) - Jul 27 - 23.7 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. -* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/293913596) - Jul 18 -* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/293913441) - Jul 19 -* [**ClickHouse Meetup in Toronto**](https://www.meetup.com/clickhouse-toronto-user-group/events/294183127) - Jul 20 -* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/294428050/) - Jul 27 +* [**v23.8 Community Call**](https://clickhouse.com/company/events/v23-8-community-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-08) - Aug 31 - 23.8 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. +* [**ClickHouse & AI - A Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/294472987) - Aug 8 * [**ClickHouse Meetup in Paris**](https://www.meetup.com/clickhouse-france-user-group/events/294283460) - Sep 12 Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler clickhouse com. diff --git a/SECURITY.md b/SECURITY.md index 4ba5f13d09c..c3e36b90367 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,17 +13,14 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 23.8 | ✔️ | +| 23.7 | ✔️ | | 23.6 | ✔️ | -| 23.5 | ✔️ | -| 23.4 | ✔️ | +| 23.5 | ❌ | +| 23.4 | ❌ | | 23.3 | ✔️ | | 23.2 | ❌ | | 23.1 | ❌ | -| 22.12 | ❌ | -| 22.11 | ❌ | -| 22.10 | ❌ | -| 22.9 | ❌ | -| 22.8 | ✔️ | | 22.* | ❌ | | 21.* | ❌ | | 20.* | ❌ | diff --git a/base/base/EnumReflection.h b/base/base/EnumReflection.h index 0d1f8ae0a40..e3208f16a75 100644 --- a/base/base/EnumReflection.h +++ b/base/base/EnumReflection.h @@ -3,6 +3,7 @@ #include #include + template concept is_enum = std::is_enum_v; namespace detail diff --git a/base/base/JSON.cpp b/base/base/JSON.cpp index 4c6d97b4444..0b43be38149 100644 --- a/base/base/JSON.cpp +++ b/base/base/JSON.cpp @@ -7,8 +7,6 @@ #include #include -#include - #define JSON_MAX_DEPTH 100 diff --git a/base/base/StringRef.h b/base/base/StringRef.h index f428b7c747f..9a97b2ea5cc 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -8,8 +8,10 @@ #include #include +#include #include #include +#include #include @@ -28,6 +30,11 @@ #define CRC_INT __crc32cd #endif +#if defined(__aarch64__) && defined(__ARM_NEON) + #include + #pragma clang diagnostic ignored "-Wreserved-identifier" +#endif + /** * The std::string_view-like container to avoid creating strings to find substrings in the hash table. @@ -73,14 +80,14 @@ using StringRefs = std::vector; * For more information, see hash_map_string_2.cpp */ -inline bool compareSSE2(const char * p1, const char * p2) +inline bool compare8(const char * p1, const char * p2) { return 0xFFFF == _mm_movemask_epi8(_mm_cmpeq_epi8( _mm_loadu_si128(reinterpret_cast(p1)), _mm_loadu_si128(reinterpret_cast(p2)))); } -inline bool compareSSE2x4(const char * p1, const char * p2) +inline bool compare64(const char * p1, const char * p2) { return 0xFFFF == _mm_movemask_epi8( _mm_and_si128( @@ -100,7 +107,30 @@ inline bool compareSSE2x4(const char * p1, const char * p2) _mm_loadu_si128(reinterpret_cast(p2) + 3))))); } -inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size) +#elif defined(__aarch64__) && defined(__ARM_NEON) + +inline bool compare8(const char * p1, const char * p2) +{ + uint64_t mask = getNibbleMask(vceqq_u8( + vld1q_u8(reinterpret_cast(p1)), vld1q_u8(reinterpret_cast(p2)))); + return 0xFFFFFFFFFFFFFFFF == mask; +} + +inline bool compare64(const char * p1, const char * p2) +{ + uint64_t mask = getNibbleMask(vandq_u8( + vandq_u8(vceqq_u8(vld1q_u8(reinterpret_cast(p1)), vld1q_u8(reinterpret_cast(p2))), + vceqq_u8(vld1q_u8(reinterpret_cast(p1 + 16)), vld1q_u8(reinterpret_cast(p2 + 16)))), + vandq_u8(vceqq_u8(vld1q_u8(reinterpret_cast(p1 + 32)), vld1q_u8(reinterpret_cast(p2 + 32))), + vceqq_u8(vld1q_u8(reinterpret_cast(p1 + 48)), vld1q_u8(reinterpret_cast(p2 + 48)))))); + return 0xFFFFFFFFFFFFFFFF == mask; +} + +#endif + +#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON)) + +inline bool memequalWide(const char * p1, const char * p2, size_t size) { /** The order of branches and the trick with overlapping comparisons * are the same as in memcpy implementation. @@ -137,7 +167,7 @@ inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size) while (size >= 64) { - if (compareSSE2x4(p1, p2)) + if (compare64(p1, p2)) { p1 += 64; p2 += 64; @@ -149,17 +179,16 @@ inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size) switch (size / 16) { - case 3: if (!compareSSE2(p1 + 32, p2 + 32)) return false; [[fallthrough]]; - case 2: if (!compareSSE2(p1 + 16, p2 + 16)) return false; [[fallthrough]]; - case 1: if (!compareSSE2(p1, p2)) return false; + case 3: if (!compare8(p1 + 32, p2 + 32)) return false; [[fallthrough]]; + case 2: if (!compare8(p1 + 16, p2 + 16)) return false; [[fallthrough]]; + case 1: if (!compare8(p1, p2)) return false; } - return compareSSE2(p1 + size - 16, p2 + size - 16); + return compare8(p1 + size - 16, p2 + size - 16); } #endif - inline bool operator== (StringRef lhs, StringRef rhs) { if (lhs.size != rhs.size) @@ -168,8 +197,8 @@ inline bool operator== (StringRef lhs, StringRef rhs) if (lhs.size == 0) return true; -#if defined(__SSE2__) - return memequalSSE2Wide(lhs.data, rhs.data, lhs.size); +#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON)) + return memequalWide(lhs.data, rhs.data, lhs.size); #else return 0 == memcmp(lhs.data, rhs.data, lhs.size); #endif @@ -274,6 +303,8 @@ struct CRC32Hash if (size == 0) return 0; + chassert(pos); + if (size < 8) { return static_cast(hashLessThan8(x.data, x.size)); diff --git a/base/base/defines.h b/base/base/defines.h index 6abf8155b95..ee29ecf6118 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -115,8 +115,15 @@ /// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy) #if !defined(chassert) #if defined(ABORT_ON_LOGICAL_ERROR) + // clang-format off + #include + namespace DB + { + void abortOnFailedAssertion(const String & description); + } #define chassert(x) static_cast(x) ? void(0) : ::DB::abortOnFailedAssertion(#x) #define UNREACHABLE() abort() + // clang-format off #else /// Here sizeof() trick is used to suppress unused warning for result, /// since simple "(void)x" will evaluate the expression, while diff --git a/base/base/simd.h b/base/base/simd.h new file mode 100644 index 00000000000..3283c40971c --- /dev/null +++ b/base/base/simd.h @@ -0,0 +1,14 @@ +#pragma once + +#if defined(__aarch64__) && defined(__ARM_NEON) + +# include +# pragma clang diagnostic ignored "-Wreserved-identifier" + +/// Returns a 64 bit mask of nibbles (4 bits for each byte). +inline uint64_t getNibbleMask(uint8x16_t res) +{ + return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(res), 4)), 0); +} + +#endif diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index 411841e6d9f..fc4e9e551ca 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -12,7 +12,6 @@ #include #include -#include #include // NOLINTBEGIN(*) @@ -22,6 +21,7 @@ #define CONSTEXPR_FROM_DOUBLE constexpr using FromDoubleIntermediateType = long double; #else +#include /// `wide_integer_from_builtin` can't be constexpr with non-literal `cpp_bin_float_double_extended` #define CONSTEXPR_FROM_DOUBLE using FromDoubleIntermediateType = boost::multiprecision::cpp_bin_float_double_extended; diff --git a/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp b/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp index 4caf097c28a..1c5555f8cf3 100644 --- a/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp +++ b/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp @@ -19,7 +19,6 @@ #include "Poco/UTF16Encoding.h" #include "Poco/Buffer.h" #include "Poco/Exception.h" -#include using Poco::Buffer; diff --git a/base/poco/Data/include/Poco/Data/TypeHandler.h b/base/poco/Data/include/Poco/Data/TypeHandler.h index 34f88e986f7..e7633de7018 100644 --- a/base/poco/Data/include/Poco/Data/TypeHandler.h +++ b/base/poco/Data/include/Poco/Data/TypeHandler.h @@ -97,7 +97,7 @@ namespace Data /// /// static void extract(std::size_t pos, Person& obj, const Person& defVal, AbstractExtractor::Ptr pExt) /// { - /// // defVal is the default person we should use if we encunter NULL entries, so we take the individual fields + /// // defVal is the default person we should use if we encounter NULL entries, so we take the individual fields /// // as defaults. You can do more complex checking, ie return defVal if only one single entry of the fields is null etc... /// poco_assert_dbg (!pExt.isNull()); /// std::string lastName; diff --git a/base/poco/Foundation/include/Poco/Message.h b/base/poco/Foundation/include/Poco/Message.h index e8f04888ab4..282c7fb5fd1 100644 --- a/base/poco/Foundation/include/Poco/Message.h +++ b/base/poco/Foundation/include/Poco/Message.h @@ -67,6 +67,8 @@ public: Message( const std::string & source, const std::string & text, Priority prio, const char * file, int line, std::string_view fmt_str = {}); + Message( + std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str); /// Creates a Message with the given source, text, priority, /// source file path and line. /// diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h index 1880af4ccd2..30654504e0b 100644 --- a/base/poco/Foundation/include/Poco/URI.h +++ b/base/poco/Foundation/include/Poco/URI.h @@ -57,7 +57,7 @@ public: URI(); /// Creates an empty URI. - explicit URI(const std::string & uri); + explicit URI(const std::string & uri, bool enable_url_encoding = true); /// Parses an URI from the given string. Throws a /// SyntaxException if the uri is not valid. @@ -350,6 +350,10 @@ protected: static const std::string ILLEGAL; private: + void encodePath(std::string & encodedStr) const; + void decodePath(const std::string & encodedStr); + + std::string _scheme; std::string _userInfo; std::string _host; @@ -357,6 +361,8 @@ private: std::string _path; std::string _query; std::string _fragment; + + bool _enable_url_encoding = true; }; diff --git a/base/poco/Foundation/src/Message.cpp b/base/poco/Foundation/src/Message.cpp index 663c96e47a2..54118cc0fc5 100644 --- a/base/poco/Foundation/src/Message.cpp +++ b/base/poco/Foundation/src/Message.cpp @@ -60,6 +60,19 @@ Message::Message(const std::string& source, const std::string& text, Priority pr } +Message::Message(std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str): + _source(std::move(source)), + _text(std::move(text)), + _prio(prio), + _tid(0), + _file(file), + _line(line), + _pMap(0), + _fmt_str(fmt_str) +{ + init(); +} + Message::Message(const Message& msg): _source(msg._source), _text(msg._text), diff --git a/base/poco/Foundation/src/Task.cpp b/base/poco/Foundation/src/Task.cpp index a850ae37eff..4303d50d6e8 100644 --- a/base/poco/Foundation/src/Task.cpp +++ b/base/poco/Foundation/src/Task.cpp @@ -16,7 +16,6 @@ #include "Poco/TaskManager.h" #include "Poco/Exception.h" -#include #include diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp index 5543e02b279..41e331bb080 100644 --- a/base/poco/Foundation/src/URI.cpp +++ b/base/poco/Foundation/src/URI.cpp @@ -36,8 +36,8 @@ URI::URI(): } -URI::URI(const std::string& uri): - _port(0) +URI::URI(const std::string& uri, bool enable_url_encoding): + _port(0), _enable_url_encoding(enable_url_encoding) { parse(uri); } @@ -107,7 +107,8 @@ URI::URI(const URI& uri): _port(uri._port), _path(uri._path), _query(uri._query), - _fragment(uri._fragment) + _fragment(uri._fragment), + _enable_url_encoding(uri._enable_url_encoding) { } @@ -119,7 +120,8 @@ URI::URI(const URI& baseURI, const std::string& relativeURI): _port(baseURI._port), _path(baseURI._path), _query(baseURI._query), - _fragment(baseURI._fragment) + _fragment(baseURI._fragment), + _enable_url_encoding(baseURI._enable_url_encoding) { resolve(relativeURI); } @@ -151,6 +153,7 @@ URI& URI::operator = (const URI& uri) _path = uri._path; _query = uri._query; _fragment = uri._fragment; + _enable_url_encoding = uri._enable_url_encoding; } return *this; } @@ -181,6 +184,7 @@ void URI::swap(URI& uri) std::swap(_path, uri._path); std::swap(_query, uri._query); std::swap(_fragment, uri._fragment); + std::swap(_enable_url_encoding, uri._enable_url_encoding); } @@ -201,7 +205,7 @@ std::string URI::toString() const std::string uri; if (isRelative()) { - encode(_path, RESERVED_PATH, uri); + encodePath(uri); } else { @@ -217,7 +221,7 @@ std::string URI::toString() const { if (!auth.empty() && _path[0] != '/') uri += '/'; - encode(_path, RESERVED_PATH, uri); + encodePath(uri); } else if (!_query.empty() || !_fragment.empty()) { @@ -313,7 +317,7 @@ void URI::setAuthority(const std::string& authority) void URI::setPath(const std::string& path) { _path.clear(); - decode(path, _path); + decodePath(path); } @@ -418,7 +422,7 @@ void URI::setPathEtc(const std::string& pathEtc) std::string URI::getPathEtc() const { std::string pathEtc; - encode(_path, RESERVED_PATH, pathEtc); + encodePath(pathEtc); if (!_query.empty()) { pathEtc += '?'; @@ -436,7 +440,7 @@ std::string URI::getPathEtc() const std::string URI::getPathAndQuery() const { std::string pathAndQuery; - encode(_path, RESERVED_PATH, pathAndQuery); + encodePath(pathAndQuery); if (!_query.empty()) { pathAndQuery += '?'; @@ -681,6 +685,21 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa } } +void URI::encodePath(std::string & encodedStr) const +{ + if (_enable_url_encoding) + encode(_path, RESERVED_PATH, encodedStr); + else + encodedStr = _path; +} + +void URI::decodePath(const std::string & encodedStr) +{ + if (_enable_url_encoding) + decode(encodedStr, _path); + else + _path = encodedStr; +} bool URI::isWellKnownPort() const { @@ -820,7 +839,7 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it { std::string path; while (it != end && *it != '?' && *it != '#') path += *it++; - decode(path, _path); + decodePath(path); } diff --git a/base/poco/JSON/src/Object.cpp b/base/poco/JSON/src/Object.cpp index 7fca65c5b01..b041f570934 100644 --- a/base/poco/JSON/src/Object.cpp +++ b/base/poco/JSON/src/Object.cpp @@ -14,7 +14,6 @@ #include "Poco/JSON/Object.h" #include -#include using Poco::Dynamic::Var; diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index c5697b556d1..2712c0c452e 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -26,7 +26,6 @@ #include "Poco/CountingStream.h" #include "Poco/RegularExpression.h" #include -#include using Poco::NumberFormatter; diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h index 65917ac9dd4..c19eecf5c73 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h @@ -146,7 +146,7 @@ namespace Net std::string cipherList; /// Specifies the supported ciphers in OpenSSL notation. - /// Defaults to "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH". + /// Defaults to "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH". std::string dhParamsFile; /// Specifies a file containing Diffie-Hellman parameters. @@ -172,7 +172,7 @@ namespace Net VerificationMode verificationMode = VERIFY_RELAXED, int verificationDepth = 9, bool loadDefaultCAs = false, - const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH"); + const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH"); /// Creates a Context. /// /// * usage specifies whether the context is used by a client or server. @@ -200,7 +200,7 @@ namespace Net VerificationMode verificationMode = VERIFY_RELAXED, int verificationDepth = 9, bool loadDefaultCAs = false, - const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH"); + const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH"); /// Creates a Context. /// /// * usage specifies whether the context is used by a client or server. diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h index 21a1ed685e5..e4037c87927 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h @@ -76,7 +76,7 @@ namespace Net /// none|relaxed|strict|once /// 1..9 /// true|false - /// ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH + /// ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH /// true|false /// /// KeyFileHandler diff --git a/base/poco/NetSSL_OpenSSL/src/Context.cpp b/base/poco/NetSSL_OpenSSL/src/Context.cpp index ca220c40a33..d0bab902b89 100644 --- a/base/poco/NetSSL_OpenSSL/src/Context.cpp +++ b/base/poco/NetSSL_OpenSSL/src/Context.cpp @@ -41,7 +41,7 @@ Context::Params::Params(): verificationMode(VERIFY_RELAXED), verificationDepth(9), loadDefaultCAs(false), - cipherList("ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH") + cipherList("ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH") { } diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 821b7b46855..5c04bf5a77a 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54476) +SET(VERSION_REVISION 54478) SET(VERSION_MAJOR 23) -SET(VERSION_MINOR 7) +SET(VERSION_MINOR 9) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH d1c7e13d08868cb04d3562dcced704dd577cb1df) -SET(VERSION_DESCRIBE v23.7.1.1-testing) -SET(VERSION_STRING 23.7.1.1) +SET(VERSION_GITHASH ebc7d9a9f3b40be89e0b3e738b35d394aabeea3e) +SET(VERSION_DESCRIBE v23.9.1.1-testing) +SET(VERSION_STRING 23.9.1.1) # end of autochange diff --git a/cmake/dbms_glob_sources.cmake b/cmake/dbms_glob_sources.cmake index 01c4a8b16e9..fbe7f96cea3 100644 --- a/cmake/dbms_glob_sources.cmake +++ b/cmake/dbms_glob_sources.cmake @@ -4,10 +4,19 @@ macro(add_glob cur_list) endmacro() macro(add_headers_and_sources prefix common_path) - add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) - add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h) + add_glob(${prefix}_headers ${common_path}/*.h) + add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c) endmacro() macro(add_headers_only prefix common_path) - add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) + add_glob(${prefix}_headers ${common_path}/*.h) +endmacro() + +macro(extract_into_parent_list src_list dest_list) + list(REMOVE_ITEM ${src_list} ${ARGN}) + get_filename_component(__dir_name ${CMAKE_CURRENT_SOURCE_DIR} NAME) + foreach(file IN ITEMS ${ARGN}) + list(APPEND ${dest_list} ${__dir_name}/${file}) + endforeach() + set(${dest_list} "${${dest_list}}" PARENT_SCOPE) endmacro() diff --git a/cmake/embed_binary.cmake b/cmake/embed_binary.cmake deleted file mode 100644 index e5428c24939..00000000000 --- a/cmake/embed_binary.cmake +++ /dev/null @@ -1,58 +0,0 @@ -# Embed a set of resource files into a resulting object file. -# -# Signature: `clickhouse_embed_binaries(TARGET RESOURCE_DIR RESOURCES ...) -# -# This will generate a static library target named ``, which contains the contents of -# each `` file. The files should be located in ``. defaults to -# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty. -# -# Each resource will result in three symbols in the final archive, based on the name ``. -# These are: -# 1. `_binary__start`: Points to the start of the binary data from ``. -# 2. `_binary__end`: Points to the end of the binary data from ``. -# 2. `_binary__size`: Points to the size of the binary data from ``. -# -# `` is a normalized name derived from ``, by replacing the characters "./-" with -# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated -# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`. -macro(clickhouse_embed_binaries) - set(one_value_args TARGET RESOURCE_DIR) - set(resources RESOURCES) - cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN}) - - if (NOT DEFINED EMBED_TARGET) - message(FATAL_ERROR "A target name must be provided for embedding binary resources into") - endif() - - if (NOT DEFINED EMBED_RESOURCE_DIR) - set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") - endif() - - list(LENGTH EMBED_RESOURCES N_RESOURCES) - if (N_RESOURCES LESS 1) - message(FATAL_ERROR "The list of binary resources to embed may not be empty") - endif() - - add_library("${EMBED_TARGET}" STATIC) - set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C) - - set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in") - - foreach(RESOURCE_FILE ${EMBED_RESOURCES}) - set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S") - set(BINARY_FILE_NAME "${RESOURCE_FILE}") - - # Normalize the name of the resource. - string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex - string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}") - - # Generate the configured assembly file in the output directory. - configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY) - - # Set the include directory for relative paths specified for `.incbin` directive. - set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}") - - target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}") - set_target_properties("${EMBED_TARGET}" PROPERTIES OBJECT_DEPENDS "${RESOURCE_FILE}") - endforeach() -endmacro() diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index acc38b6fa2a..28ccb62e10c 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -47,7 +47,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLE set (PARALLEL_LINK_JOBS 2) endif() -message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB DRAM, 'OFF' means the native core count).") +message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).") if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES) set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index d42d587303a..56a663a708e 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -35,10 +35,6 @@ find_package(Threads REQUIRED) include (cmake/unwind.cmake) include (cmake/cxx.cmake) -# Delay the call to link the global interface after the libc++ libraries are included to avoid circular dependencies -# which are ok with static libraries but not with dynamic ones -link_libraries(global-group) - if (NOT OS_ANDROID) if (NOT USE_MUSL) # Our compatibility layer doesn't build under Android, many errors in musl. @@ -47,6 +43,8 @@ if (NOT OS_ANDROID) add_subdirectory(base/harmful) endif () +link_libraries(global-group) + target_link_libraries(global-group INTERFACE -Wl,--start-group $ diff --git a/cmake/linux/toolchain-ppc64le.cmake b/cmake/linux/toolchain-ppc64le.cmake index 8eb2aab34e9..c46ea954b71 100644 --- a/cmake/linux/toolchain-ppc64le.cmake +++ b/cmake/linux/toolchain-ppc64le.cmake @@ -5,9 +5,9 @@ set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) set (CMAKE_SYSTEM_NAME "Linux") set (CMAKE_SYSTEM_PROCESSOR "ppc64le") -set (CMAKE_C_COMPILER_TARGET "ppc64le-linux-gnu") -set (CMAKE_CXX_COMPILER_TARGET "ppc64le-linux-gnu") -set (CMAKE_ASM_COMPILER_TARGET "ppc64le-linux-gnu") +set (CMAKE_C_COMPILER_TARGET "powerpc64le-linux-gnu") +set (CMAKE_CXX_COMPILER_TARGET "powerpc64le-linux-gnu") +set (CMAKE_ASM_COMPILER_TARGET "powerpc64le-linux-gnu") # Will be changed later, but somehow needed to be set here. set (CMAKE_AR "ar") diff --git a/cmake/linux/toolchain-s390x.cmake b/cmake/linux/toolchain-s390x.cmake index b85d4253b89..945eb9affa4 100644 --- a/cmake/linux/toolchain-s390x.cmake +++ b/cmake/linux/toolchain-s390x.cmake @@ -20,6 +20,9 @@ set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/s390x-linux-gnu/libc") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64") +set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64") +set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64") set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE) set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE) diff --git a/cmake/split_debug_symbols.cmake b/cmake/split_debug_symbols.cmake index a9c2158359a..d6821eb6c48 100644 --- a/cmake/split_debug_symbols.cmake +++ b/cmake/split_debug_symbols.cmake @@ -22,8 +22,9 @@ macro(clickhouse_split_debug_symbols) # Splits debug symbols into separate file, leaves the binary untouched: COMMAND "${OBJCOPY_PATH}" --only-keep-debug "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" - # Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check: - COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + # Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check. + # Also, after we disabled the export of symbols for dynamic linking, we still to keep a static symbol table for good stack traces. + COMMAND "${STRIP_PATH}" --strip-debug --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" # Associate stripped binary with debug symbols: COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" COMMENT "Stripping clickhouse binary" VERBATIM diff --git a/cmake/target.cmake b/cmake/target.cmake index ffab08f1103..e4a2f060f1e 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -19,6 +19,19 @@ else () message (FATAL_ERROR "Platform ${CMAKE_SYSTEM_NAME} is not supported") endif () +# Since we always use toolchain files to generate hermetic builds, cmake will +# always think it's a cross-compilation, See +# https://cmake.org/cmake/help/latest/variable/CMAKE_CROSSCOMPILING.html +# +# This will slow down cmake configuration and compilation. For instance, LLVM +# will try to configure NATIVE LLVM targets with all tests enabled (You'll see +# Building native llvm-tblgen...). +# +# Here, we set it manually by checking the system name and processor. +if (${CMAKE_SYSTEM_NAME} STREQUAL ${CMAKE_HOST_SYSTEM_NAME} AND ${CMAKE_SYSTEM_PROCESSOR} STREQUAL ${CMAKE_HOST_SYSTEM_PROCESSOR}) + set (CMAKE_CROSSCOMPILING 0) +endif () + if (CMAKE_CROSSCOMPILING) if (OS_DARWIN) # FIXME: broken dependencies @@ -47,7 +60,7 @@ if (CMAKE_CROSSCOMPILING) set (ENABLE_RUST OFF CACHE INTERNAL "") elseif (ARCH_S390X) set (ENABLE_GRPC OFF CACHE INTERNAL "") - set (ENABLE_SENTRY OFF CACHE INTERNAL "") + set (ENABLE_RUST OFF CACHE INTERNAL "") endif () elseif (OS_FREEBSD) # FIXME: broken dependencies diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 0f68c0cbc7c..f9c3fddff40 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -92,6 +92,7 @@ add_contrib (google-protobuf-cmake google-protobuf) add_contrib (openldap-cmake openldap) add_contrib (grpc-cmake grpc) add_contrib (msgpack-c-cmake msgpack-c) +add_contrib (libarchive-cmake libarchive) add_contrib (corrosion-cmake corrosion) @@ -134,10 +135,8 @@ add_contrib (aws-cmake aws-cmake ) -add_contrib (base64-cmake base64) -if (NOT ARCH_S390X) +add_contrib (aklomp-base64-cmake aklomp-base64) add_contrib (simdjson-cmake simdjson) -endif() add_contrib (rapidjson-cmake rapidjson) add_contrib (fastops-cmake fastops) add_contrib (libuv-cmake libuv) @@ -164,13 +163,13 @@ add_contrib (libpq-cmake libpq) add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) add_contrib (datasketches-cpp-cmake datasketches-cpp) +add_contrib (incbin-cmake incbin) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) if (ENABLE_NLP) add_contrib (libstemmer-c-cmake libstemmer_c) add_contrib (wordnet-blast-cmake wordnet-blast) add_contrib (lemmagen-c-cmake lemmagen-c) - add_contrib (nlp-data-cmake nlp-data) add_contrib (cld2-cmake cld2) endif() @@ -195,6 +194,17 @@ if (ARCH_S390X) add_contrib(crc32-s390x-cmake crc32-s390x) endif() add_contrib (annoy-cmake annoy) + +option(ENABLE_USEARCH "Enable USearch (Approximate Neighborhood Search, HNSW) support" ${ENABLE_LIBRARIES}) +if (ENABLE_USEARCH) + add_contrib (FP16-cmake FP16) + add_contrib (robin-map-cmake robin-map) + add_contrib (SimSIMD-cmake SimSIMD) + add_contrib (usearch-cmake usearch) # requires: FP16, robin-map, SimdSIMD +else () + message(STATUS "Not using USearch") +endif () + add_contrib (xxHash-cmake xxHash) add_contrib (libbcrypt-cmake libbcrypt) diff --git a/contrib/FP16 b/contrib/FP16 new file mode 160000 index 00000000000..0a92994d729 --- /dev/null +++ b/contrib/FP16 @@ -0,0 +1 @@ +Subproject commit 0a92994d729ff76a58f692d3028ca1b64b145d91 diff --git a/contrib/FP16-cmake/CMakeLists.txt b/contrib/FP16-cmake/CMakeLists.txt new file mode 100644 index 00000000000..f82ad705dcc --- /dev/null +++ b/contrib/FP16-cmake/CMakeLists.txt @@ -0,0 +1 @@ +# See contrib/usearch-cmake/CMakeLists.txt diff --git a/contrib/SimSIMD b/contrib/SimSIMD new file mode 160000 index 00000000000..de2cb75b9e9 --- /dev/null +++ b/contrib/SimSIMD @@ -0,0 +1 @@ +Subproject commit de2cb75b9e9e3389d5e1e51fd9f8ed151f3c17cf diff --git a/contrib/SimSIMD-cmake/CMakeLists.txt b/contrib/SimSIMD-cmake/CMakeLists.txt new file mode 100644 index 00000000000..f82ad705dcc --- /dev/null +++ b/contrib/SimSIMD-cmake/CMakeLists.txt @@ -0,0 +1 @@ +# See contrib/usearch-cmake/CMakeLists.txt diff --git a/contrib/aklomp-base64 b/contrib/aklomp-base64 new file mode 160000 index 00000000000..e77bd70bdd8 --- /dev/null +++ b/contrib/aklomp-base64 @@ -0,0 +1 @@ +Subproject commit e77bd70bdd860c52c561568cffb251d88bba064c diff --git a/contrib/aklomp-base64-cmake/.gitignore b/contrib/aklomp-base64-cmake/.gitignore new file mode 100644 index 00000000000..0e56cf2f8c1 --- /dev/null +++ b/contrib/aklomp-base64-cmake/.gitignore @@ -0,0 +1 @@ +config.h diff --git a/contrib/aklomp-base64-cmake/CMakeLists.txt b/contrib/aklomp-base64-cmake/CMakeLists.txt new file mode 100644 index 00000000000..4b988fad860 --- /dev/null +++ b/contrib/aklomp-base64-cmake/CMakeLists.txt @@ -0,0 +1,68 @@ +option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_BASE64) + message(STATUS "Not using base64") + return() +endif() + +SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aklomp-base64") + +if (ARCH_AMD64) + # These defines enable/disable SIMD codecs in base64's runtime codec dispatch. + # We don't want to limit ourselves --> enable all. + set(HAVE_SSSE3 1) + set(HAVE_SSE41 1) + set(HAVE_SSE42 1) + set(HAVE_AVX 1) + set(HAVE_AVX2 1) + set(HAVE_AVX512 1) +endif () + +if (ARCH_AARCH64) + # The choice of HAVE_NEON* depends on the target machine because base64 provides + # no runtime dispatch on ARM. NEON is only mandatory with the normal build profile. + if(NOT NO_ARMV81_OR_HIGHER) + set(HAVE_NEON64 1) + set(HAVE_NEON32 0) + endif () +endif () + +configure_file(config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) + +add_library(_base64 + "${LIBRARY_DIR}/lib/lib.c" + "${LIBRARY_DIR}/lib/codec_choose.c" + + "${LIBRARY_DIR}/lib/tables/tables.c" + "${LIBRARY_DIR}/lib/tables/table_dec_32bit.h" + "${LIBRARY_DIR}/lib/tables/table_enc_12bit.h" + + "${LIBRARY_DIR}/lib/codecs.h" + + "${CMAKE_CURRENT_BINARY_DIR}/config.h" + + "${LIBRARY_DIR}/lib/arch/generic/codec.c" + "${LIBRARY_DIR}/lib/arch/ssse3/codec.c" + "${LIBRARY_DIR}/lib/arch/sse41/codec.c" + "${LIBRARY_DIR}/lib/arch/sse42/codec.c" + "${LIBRARY_DIR}/lib/arch/avx/codec.c" + "${LIBRARY_DIR}/lib/arch/avx2/codec.c" + "${LIBRARY_DIR}/lib/arch/avx512/codec.c" + + "${LIBRARY_DIR}/lib/arch/neon32/codec.c" + "${LIBRARY_DIR}/lib/arch/neon64/codec.c" +) + +if (ARCH_AMD64) + set_source_files_properties(${LIBRARY_DIR}/lib/arch/ssse3/codec.c PROPERTIES COMPILE_FLAGS "-mssse3") + set_source_files_properties(${LIBRARY_DIR}/lib/arch/sse41/codec.c PROPERTIES COMPILE_FLAGS "-msse4.1") + set_source_files_properties(${LIBRARY_DIR}/lib/arch/sse42/codec.c PROPERTIES COMPILE_FLAGS "-msse4.2") + set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx/codec.c PROPERTIES COMPILE_FLAGS "-mavx") + set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx2/codec.c PROPERTIES COMPILE_FLAGS "-mavx2") + set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx512/codec.c PROPERTIES COMPILE_FLAGS "-mavx512vl -mavx512vbmi") +endif() + +target_include_directories(_base64 SYSTEM PUBLIC ${LIBRARY_DIR}/include) +target_include_directories(_base64 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + +add_library(ch_contrib::base64 ALIAS _base64) diff --git a/contrib/aklomp-base64-cmake/config.h.in b/contrib/aklomp-base64-cmake/config.h.in new file mode 100644 index 00000000000..4dc84632b30 --- /dev/null +++ b/contrib/aklomp-base64-cmake/config.h.in @@ -0,0 +1,9 @@ +#cmakedefine01 HAVE_SSSE3 +#cmakedefine01 HAVE_SSE41 +#cmakedefine01 HAVE_SSE42 +#cmakedefine01 HAVE_AVX +#cmakedefine01 HAVE_AVX2 +#cmakedefine01 HAVE_AVX512 + +#cmakedefine01 HAVE_NEON32 +#cmakedefine01 HAVE_NEON64 diff --git a/contrib/arrow b/contrib/arrow index 1f1b3d35fb6..1d93838f69a 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit 1f1b3d35fb6eb73e6492d3afd8a85cde848d174f +Subproject commit 1d93838f69a802639ca144ea5704a98e2481810d diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 46b86cb4ddb..02e809c560f 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -334,20 +334,36 @@ set(ARROW_SRCS "${LIBRARY_DIR}/compute/api_vector.cc" "${LIBRARY_DIR}/compute/cast.cc" "${LIBRARY_DIR}/compute/exec.cc" + "${LIBRARY_DIR}/compute/exec/accumulation_queue.cc" + "${LIBRARY_DIR}/compute/exec/accumulation_queue.h" + "${LIBRARY_DIR}/compute/exec/aggregate.cc" "${LIBRARY_DIR}/compute/exec/aggregate_node.cc" + "${LIBRARY_DIR}/compute/exec/asof_join_node.cc" + "${LIBRARY_DIR}/compute/exec/bloom_filter.cc" "${LIBRARY_DIR}/compute/exec/exec_plan.cc" "${LIBRARY_DIR}/compute/exec/expression.cc" "${LIBRARY_DIR}/compute/exec/filter_node.cc" - "${LIBRARY_DIR}/compute/exec/project_node.cc" - "${LIBRARY_DIR}/compute/exec/source_node.cc" - "${LIBRARY_DIR}/compute/exec/sink_node.cc" + "${LIBRARY_DIR}/compute/exec/hash_join.cc" + "${LIBRARY_DIR}/compute/exec/hash_join_dict.cc" + "${LIBRARY_DIR}/compute/exec/hash_join_node.cc" + "${LIBRARY_DIR}/compute/exec/key_hash.cc" + "${LIBRARY_DIR}/compute/exec/key_map.cc" + "${LIBRARY_DIR}/compute/exec/map_node.cc" + "${LIBRARY_DIR}/compute/exec/options.cc" "${LIBRARY_DIR}/compute/exec/order_by_impl.cc" "${LIBRARY_DIR}/compute/exec/partition_util.cc" + "${LIBRARY_DIR}/compute/exec/project_node.cc" + "${LIBRARY_DIR}/compute/exec/query_context.cc" + "${LIBRARY_DIR}/compute/exec/sink_node.cc" + "${LIBRARY_DIR}/compute/exec/source_node.cc" + "${LIBRARY_DIR}/compute/exec/swiss_join.cc" + "${LIBRARY_DIR}/compute/exec/task_util.cc" + "${LIBRARY_DIR}/compute/exec/tpch_node.cc" + "${LIBRARY_DIR}/compute/exec/union_node.cc" + "${LIBRARY_DIR}/compute/exec/util.cc" "${LIBRARY_DIR}/compute/function.cc" "${LIBRARY_DIR}/compute/function_internal.cc" "${LIBRARY_DIR}/compute/kernel.cc" - "${LIBRARY_DIR}/compute/light_array.cc" - "${LIBRARY_DIR}/compute/registry.cc" "${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc" "${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc" "${LIBRARY_DIR}/compute/kernels/aggregate_quantile.cc" @@ -355,49 +371,43 @@ set(ARROW_SRCS "${LIBRARY_DIR}/compute/kernels/aggregate_var_std.cc" "${LIBRARY_DIR}/compute/kernels/codegen_internal.cc" "${LIBRARY_DIR}/compute/kernels/hash_aggregate.cc" + "${LIBRARY_DIR}/compute/kernels/row_encoder.cc" "${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc" "${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_dictionary.cc" - "${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_extension.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc" "${LIBRARY_DIR}/compute/kernels/scalar_compare.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc" "${LIBRARY_DIR}/compute/kernels/scalar_nested.cc" "${LIBRARY_DIR}/compute/kernels/scalar_random.cc" "${LIBRARY_DIR}/compute/kernels/scalar_round.cc" "${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_string_ascii.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_string_utf8.cc" "${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc" "${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc" "${LIBRARY_DIR}/compute/kernels/scalar_validity.cc" - "${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc" - "${LIBRARY_DIR}/compute/kernels/scalar_string_ascii.cc" - "${LIBRARY_DIR}/compute/kernels/scalar_string_utf8.cc" "${LIBRARY_DIR}/compute/kernels/util_internal.cc" "${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc" "${LIBRARY_DIR}/compute/kernels/vector_cumulative_ops.cc" "${LIBRARY_DIR}/compute/kernels/vector_hash.cc" - "${LIBRARY_DIR}/compute/kernels/vector_rank.cc" - "${LIBRARY_DIR}/compute/kernels/vector_select_k.cc" "${LIBRARY_DIR}/compute/kernels/vector_nested.cc" + "${LIBRARY_DIR}/compute/kernels/vector_rank.cc" "${LIBRARY_DIR}/compute/kernels/vector_replace.cc" + "${LIBRARY_DIR}/compute/kernels/vector_select_k.cc" "${LIBRARY_DIR}/compute/kernels/vector_selection.cc" "${LIBRARY_DIR}/compute/kernels/vector_sort.cc" - "${LIBRARY_DIR}/compute/kernels/row_encoder.cc" - "${LIBRARY_DIR}/compute/exec/union_node.cc" - "${LIBRARY_DIR}/compute/exec/key_hash.cc" - "${LIBRARY_DIR}/compute/exec/key_map.cc" - "${LIBRARY_DIR}/compute/exec/util.cc" - "${LIBRARY_DIR}/compute/exec/hash_join_dict.cc" - "${LIBRARY_DIR}/compute/exec/hash_join.cc" - "${LIBRARY_DIR}/compute/exec/hash_join_node.cc" - "${LIBRARY_DIR}/compute/exec/task_util.cc" + "${LIBRARY_DIR}/compute/light_array.cc" + "${LIBRARY_DIR}/compute/registry.cc" + "${LIBRARY_DIR}/compute/row/compare_internal.cc" "${LIBRARY_DIR}/compute/row/encode_internal.cc" "${LIBRARY_DIR}/compute/row/grouper.cc" - "${LIBRARY_DIR}/compute/row/compare_internal.cc" "${LIBRARY_DIR}/compute/row/row_internal.cc" "${LIBRARY_DIR}/ipc/dictionary.cc" @@ -502,9 +512,10 @@ target_include_directories(_parquet SYSTEM BEFORE "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src" "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src") target_link_libraries(_parquet - PUBLIC _arrow - PRIVATE + PUBLIC + _arrow ch_contrib::thrift + PRIVATE boost::headers_only boost::regex OpenSSL::Crypto OpenSSL::SSL) diff --git a/contrib/base64 b/contrib/base64 deleted file mode 160000 index 9499e0c4945..00000000000 --- a/contrib/base64 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9499e0c4945589973b9ea1bc927377cfbc84aa46 diff --git a/contrib/base64-cmake/CMakeLists.txt b/contrib/base64-cmake/CMakeLists.txt deleted file mode 100644 index 333e0a96a0b..00000000000 --- a/contrib/base64-cmake/CMakeLists.txt +++ /dev/null @@ -1,60 +0,0 @@ -if(ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE OR ARCH_S390X) - option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES}) -elseif(ENABLE_BASE64) - message (${RECONFIGURE_MESSAGE_LEVEL} "base64 library is only supported on x86_64 and aarch64") -endif() - -if (NOT ENABLE_BASE64) - message(STATUS "Not using base64") - return() -endif() - -SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base64") - -add_library(_base64_scalar OBJECT "${LIBRARY_DIR}/turbob64c.c" "${LIBRARY_DIR}/turbob64d.c") -add_library(_base64_ssse3 OBJECT "${LIBRARY_DIR}/turbob64sse.c") # This file also contains code for ARM NEON - -if (ARCH_AMD64) - add_library(_base64_avx OBJECT "${LIBRARY_DIR}/turbob64sse.c") # This is not a mistake. One file is compiled twice. - add_library(_base64_avx2 OBJECT "${LIBRARY_DIR}/turbob64avx2.c") -endif () - -target_compile_options(_base64_scalar PRIVATE -falign-loops) - -if (ARCH_AMD64) - target_compile_options(_base64_ssse3 PRIVATE -mno-avx -mno-avx2 -mssse3 -falign-loops) - target_compile_options(_base64_avx PRIVATE -falign-loops -mavx) - target_compile_options(_base64_avx2 PRIVATE -falign-loops -mavx2) -else () - if (ARCH_PPC64LE) - target_compile_options(_base64_ssse3 PRIVATE -D__SSSE3__ -falign-loops) - else() - target_compile_options(_base64_ssse3 PRIVATE -falign-loops) - endif() -endif () - -if (ARCH_AMD64) - add_library(_base64 - $ - $ - $ - $) -else () - add_library(_base64 - $ - $) -endif () - -target_include_directories(_base64 SYSTEM PUBLIC ${LIBRARY_DIR}) - -if (XCODE OR XCODE_VERSION) - # https://gitlab.kitware.com/cmake/cmake/issues/17457 - # Some native build systems may not like targets that have only object files, so consider adding at least one real source file - # This applies to Xcode. - if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/dummy.c") - file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c" "") - endif () - target_sources(_base64 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c") -endif () - -add_library(ch_contrib::base64 ALIAS _base64) diff --git a/contrib/boost b/contrib/boost index aec12eea7fc..ae94606a70f 160000 --- a/contrib/boost +++ b/contrib/boost @@ -1 +1 @@ -Subproject commit aec12eea7fc762721ae16943d1361340c66c9c17 +Subproject commit ae94606a70f1e298ce2a5718db858079185c4d9c diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 6f9dce0b042..343e863e496 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -19,18 +19,22 @@ add_library (_boost_filesystem ${SRCS_FILESYSTEM}) add_library (boost::filesystem ALIAS _boost_filesystem) target_include_directories (_boost_filesystem SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}) -# headers-only +if (OS_LINUX) + target_compile_definitions (_boost_filesystem PRIVATE + BOOST_FILESYSTEM_HAS_POSIX_AT_APIS=1 + ) +endif () + +# headers-only add_library (_boost_headers_only INTERFACE) add_library (boost::headers_only ALIAS _boost_headers_only) target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRARY_DIR}) -# asio - target_compile_definitions (_boost_headers_only INTERFACE BOOST_ASIO_STANDALONE=1 - # Avoid using of deprecated in c++ > 17 std::result_of - BOOST_ASIO_HAS_STD_INVOKE_RESULT=1 + BOOST_ASIO_HAS_STD_INVOKE_RESULT=1 # Avoid using of deprecated in c++ > 17 std::result_of + BOOST_TIMER_ENABLE_DEPRECATED=1 # wordnet-blast (enabled via USE_NLP) uses Boost legacy timer classes ) # iostreams @@ -172,9 +176,9 @@ endif() # coroutine set (SRCS_COROUTINE - "${LIBRARY_DIR}/libs/coroutine/detail/coroutine_context.cpp" - "${LIBRARY_DIR}/libs/coroutine/exceptions.cpp" - "${LIBRARY_DIR}/libs/coroutine/posix/stack_traits.cpp" + "${LIBRARY_DIR}/libs/coroutine/src/detail/coroutine_context.cpp" + "${LIBRARY_DIR}/libs/coroutine/src/exceptions.cpp" + "${LIBRARY_DIR}/libs/coroutine/src/posix/stack_traits.cpp" ) add_library (_boost_coroutine ${SRCS_COROUTINE}) add_library (boost::coroutine ALIAS _boost_coroutine) diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 10070fbd949..7161f743de1 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -1,4 +1,3 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz") set (SRCS @@ -23,12 +22,10 @@ if (OS_FREEBSD) endif () # Related to time_zones table: -# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build -# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX -# as the library that's built using embedded tzdata is also specific to OS_LINUX -set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp") +# TimeZones.generated.cpp is autogenerated each time during a build +set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp") # remove existing copies so that its generated fresh on each build. -file(REMOVE ${SYSTEM_STORAGE_TZ_FILE}) +file(REMOVE ${TIMEZONES_FILE}) # get the list of timezones from tzdata shipped with cctz set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo") @@ -36,28 +33,44 @@ file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION) set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}") message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}") -set(TIMEZONE_RESOURCE_FILES) - # each file in that dir (except of tab and localtime) store the info about timezone execute_process(COMMAND bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -" OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE TIMEZONES) -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" ) +file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") +file(APPEND ${TIMEZONES_FILE} "#include \n") + +set (COUNTER 1) +foreach(TIMEZONE ${TIMEZONES}) + file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TZDIR}/${TIMEZONE}\");\n") + MATH(EXPR COUNTER "${COUNTER}+1") +endforeach(TIMEZONE) + +file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" ) foreach(TIMEZONE ${TIMEZONES}) - file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n") - list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}") + file(APPEND ${TIMEZONES_FILE} " \"${TIMEZONE}\",\n") + MATH(EXPR COUNTER "${COUNTER}+1") endforeach(TIMEZONE) -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n") -clickhouse_embed_binaries( - TARGET tzdata - RESOURCE_DIR "${TZDIR}" - RESOURCES ${TIMEZONE_RESOURCE_FILES} -) -add_dependencies(_cctz tzdata) -target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") + +file(APPEND ${TIMEZONES_FILE} " nullptr\n};\n\n") + +file(APPEND ${TIMEZONES_FILE} "#include \n\n") +file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" ) + +set (COUNTER 1) +foreach(TIMEZONE ${TIMEZONES}) + file(APPEND ${TIMEZONES_FILE} " if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n") + MATH(EXPR COUNTER "${COUNTER}+1") +endforeach(TIMEZONE) + +file(APPEND ${TIMEZONES_FILE} " return {};\n") +file(APPEND ${TIMEZONES_FILE} "}\n") + +add_library (tzdata ${TIMEZONES_FILE}) +target_link_libraries(tzdata ch_contrib::incbin) +target_link_libraries(_cctz tzdata) add_library(ch_contrib::cctz ALIAS _cctz) diff --git a/contrib/cityhash102/include/city.h b/contrib/cityhash102/include/city.h index 87363d16444..c98eb7e3585 100644 --- a/contrib/cityhash102/include/city.h +++ b/contrib/cityhash102/include/city.h @@ -73,8 +73,8 @@ struct uint128 uint128() = default; uint128(uint64 low64_, uint64 high64_) : low64(low64_), high64(high64_) {} - friend bool operator ==(const uint128 & x, const uint128 & y) { return (x.low64 == y.low64) && (x.high64 == y.high64); } - friend bool operator !=(const uint128 & x, const uint128 & y) { return !(x == y); } + + friend auto operator<=>(const uint128 &, const uint128 &) = default; }; inline uint64 Uint128Low64(const uint128 & x) { return x.low64; } diff --git a/contrib/corrosion-cmake/CMakeLists.txt b/contrib/corrosion-cmake/CMakeLists.txt index ea8f191564d..8adc2c0b23a 100644 --- a/contrib/corrosion-cmake/CMakeLists.txt +++ b/contrib/corrosion-cmake/CMakeLists.txt @@ -17,20 +17,22 @@ endif() message(STATUS "Checking Rust toolchain for current target") -if(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") +# See https://doc.rust-lang.org/nightly/rustc/platform-support.html + +if((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) + set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl") +elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu") -endif() - -if(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") +elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) + set(Rust_CARGO_TARGET "aarch64-unknown-linux-musl") +elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu") -endif() - -if((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) +elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) set(Rust_CARGO_TARGET "x86_64-apple-darwin") -endif() - -if((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) +elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) set(Rust_CARGO_TARGET "x86_64-unknown-freebsd") +elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64") + set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu") endif() if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le") diff --git a/contrib/curl b/contrib/curl index b0edf0b7dae..eb3b049df52 160000 --- a/contrib/curl +++ b/contrib/curl @@ -1 +1 @@ -Subproject commit b0edf0b7dae44d9e66f270a257cf654b35d5263d +Subproject commit eb3b049df526bf125eda23218e680ce7fa9ec46c diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index 70d9c2816dc..733865d5101 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -8,125 +8,122 @@ endif() set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/curl") set (SRCS - "${LIBRARY_DIR}/lib/fopen.c" - "${LIBRARY_DIR}/lib/noproxy.c" - "${LIBRARY_DIR}/lib/idn.c" - "${LIBRARY_DIR}/lib/cfilters.c" - "${LIBRARY_DIR}/lib/cf-socket.c" + "${LIBRARY_DIR}/lib/altsvc.c" + "${LIBRARY_DIR}/lib/amigaos.c" + "${LIBRARY_DIR}/lib/asyn-thread.c" + "${LIBRARY_DIR}/lib/base64.c" + "${LIBRARY_DIR}/lib/bufq.c" + "${LIBRARY_DIR}/lib/bufref.c" + "${LIBRARY_DIR}/lib/cf-h1-proxy.c" "${LIBRARY_DIR}/lib/cf-haproxy.c" "${LIBRARY_DIR}/lib/cf-https-connect.c" - "${LIBRARY_DIR}/lib/file.c" - "${LIBRARY_DIR}/lib/timeval.c" - "${LIBRARY_DIR}/lib/base64.c" - "${LIBRARY_DIR}/lib/hostip.c" - "${LIBRARY_DIR}/lib/progress.c" - "${LIBRARY_DIR}/lib/formdata.c" - "${LIBRARY_DIR}/lib/cookie.c" - "${LIBRARY_DIR}/lib/http.c" - "${LIBRARY_DIR}/lib/sendf.c" - "${LIBRARY_DIR}/lib/url.c" - "${LIBRARY_DIR}/lib/dict.c" - "${LIBRARY_DIR}/lib/if2ip.c" - "${LIBRARY_DIR}/lib/speedcheck.c" - "${LIBRARY_DIR}/lib/ldap.c" - "${LIBRARY_DIR}/lib/version.c" - "${LIBRARY_DIR}/lib/getenv.c" - "${LIBRARY_DIR}/lib/escape.c" - "${LIBRARY_DIR}/lib/mprintf.c" - "${LIBRARY_DIR}/lib/telnet.c" - "${LIBRARY_DIR}/lib/netrc.c" - "${LIBRARY_DIR}/lib/getinfo.c" - "${LIBRARY_DIR}/lib/transfer.c" - "${LIBRARY_DIR}/lib/strcase.c" - "${LIBRARY_DIR}/lib/easy.c" - "${LIBRARY_DIR}/lib/curl_fnmatch.c" - "${LIBRARY_DIR}/lib/curl_log.c" - "${LIBRARY_DIR}/lib/fileinfo.c" - "${LIBRARY_DIR}/lib/krb5.c" - "${LIBRARY_DIR}/lib/memdebug.c" - "${LIBRARY_DIR}/lib/http_chunks.c" - "${LIBRARY_DIR}/lib/strtok.c" + "${LIBRARY_DIR}/lib/cf-socket.c" + "${LIBRARY_DIR}/lib/cfilters.c" + "${LIBRARY_DIR}/lib/conncache.c" "${LIBRARY_DIR}/lib/connect.c" - "${LIBRARY_DIR}/lib/llist.c" - "${LIBRARY_DIR}/lib/hash.c" - "${LIBRARY_DIR}/lib/multi.c" "${LIBRARY_DIR}/lib/content_encoding.c" - "${LIBRARY_DIR}/lib/share.c" - "${LIBRARY_DIR}/lib/http_digest.c" - "${LIBRARY_DIR}/lib/md4.c" - "${LIBRARY_DIR}/lib/md5.c" - "${LIBRARY_DIR}/lib/http_negotiate.c" - "${LIBRARY_DIR}/lib/inet_pton.c" - "${LIBRARY_DIR}/lib/strtoofft.c" - "${LIBRARY_DIR}/lib/strerror.c" - "${LIBRARY_DIR}/lib/amigaos.c" + "${LIBRARY_DIR}/lib/cookie.c" + "${LIBRARY_DIR}/lib/curl_addrinfo.c" + "${LIBRARY_DIR}/lib/curl_des.c" + "${LIBRARY_DIR}/lib/curl_endian.c" + "${LIBRARY_DIR}/lib/curl_fnmatch.c" + "${LIBRARY_DIR}/lib/curl_get_line.c" + "${LIBRARY_DIR}/lib/curl_gethostname.c" + "${LIBRARY_DIR}/lib/curl_gssapi.c" + "${LIBRARY_DIR}/lib/curl_memrchr.c" + "${LIBRARY_DIR}/lib/curl_multibyte.c" + "${LIBRARY_DIR}/lib/curl_ntlm_core.c" + "${LIBRARY_DIR}/lib/curl_ntlm_wb.c" + "${LIBRARY_DIR}/lib/curl_path.c" + "${LIBRARY_DIR}/lib/curl_range.c" + "${LIBRARY_DIR}/lib/curl_rtmp.c" + "${LIBRARY_DIR}/lib/curl_sasl.c" + "${LIBRARY_DIR}/lib/curl_sspi.c" + "${LIBRARY_DIR}/lib/curl_threads.c" + "${LIBRARY_DIR}/lib/curl_trc.c" + "${LIBRARY_DIR}/lib/dict.c" + "${LIBRARY_DIR}/lib/doh.c" + "${LIBRARY_DIR}/lib/dynbuf.c" + "${LIBRARY_DIR}/lib/dynhds.c" + "${LIBRARY_DIR}/lib/easy.c" + "${LIBRARY_DIR}/lib/escape.c" + "${LIBRARY_DIR}/lib/file.c" + "${LIBRARY_DIR}/lib/fileinfo.c" + "${LIBRARY_DIR}/lib/fopen.c" + "${LIBRARY_DIR}/lib/formdata.c" + "${LIBRARY_DIR}/lib/getenv.c" + "${LIBRARY_DIR}/lib/getinfo.c" + "${LIBRARY_DIR}/lib/gopher.c" + "${LIBRARY_DIR}/lib/hash.c" + "${LIBRARY_DIR}/lib/headers.c" + "${LIBRARY_DIR}/lib/hmac.c" "${LIBRARY_DIR}/lib/hostasyn.c" + "${LIBRARY_DIR}/lib/hostip.c" "${LIBRARY_DIR}/lib/hostip4.c" "${LIBRARY_DIR}/lib/hostip6.c" "${LIBRARY_DIR}/lib/hostsyn.c" + "${LIBRARY_DIR}/lib/hsts.c" + "${LIBRARY_DIR}/lib/http.c" + "${LIBRARY_DIR}/lib/http2.c" + "${LIBRARY_DIR}/lib/http_aws_sigv4.c" + "${LIBRARY_DIR}/lib/http_chunks.c" + "${LIBRARY_DIR}/lib/http_digest.c" + "${LIBRARY_DIR}/lib/http_negotiate.c" + "${LIBRARY_DIR}/lib/http_ntlm.c" + "${LIBRARY_DIR}/lib/http_proxy.c" + "${LIBRARY_DIR}/lib/idn.c" + "${LIBRARY_DIR}/lib/if2ip.c" + "${LIBRARY_DIR}/lib/imap.c" "${LIBRARY_DIR}/lib/inet_ntop.c" + "${LIBRARY_DIR}/lib/inet_pton.c" + "${LIBRARY_DIR}/lib/krb5.c" + "${LIBRARY_DIR}/lib/ldap.c" + "${LIBRARY_DIR}/lib/llist.c" + "${LIBRARY_DIR}/lib/md4.c" + "${LIBRARY_DIR}/lib/md5.c" + "${LIBRARY_DIR}/lib/memdebug.c" + "${LIBRARY_DIR}/lib/mime.c" + "${LIBRARY_DIR}/lib/mprintf.c" + "${LIBRARY_DIR}/lib/mqtt.c" + "${LIBRARY_DIR}/lib/multi.c" + "${LIBRARY_DIR}/lib/netrc.c" + "${LIBRARY_DIR}/lib/nonblock.c" + "${LIBRARY_DIR}/lib/noproxy.c" + "${LIBRARY_DIR}/lib/openldap.c" "${LIBRARY_DIR}/lib/parsedate.c" + "${LIBRARY_DIR}/lib/pingpong.c" + "${LIBRARY_DIR}/lib/pop3.c" + "${LIBRARY_DIR}/lib/progress.c" + "${LIBRARY_DIR}/lib/psl.c" + "${LIBRARY_DIR}/lib/rand.c" + "${LIBRARY_DIR}/lib/rename.c" + "${LIBRARY_DIR}/lib/rtsp.c" "${LIBRARY_DIR}/lib/select.c" - "${LIBRARY_DIR}/lib/splay.c" - "${LIBRARY_DIR}/lib/strdup.c" + "${LIBRARY_DIR}/lib/sendf.c" + "${LIBRARY_DIR}/lib/setopt.c" + "${LIBRARY_DIR}/lib/sha256.c" + "${LIBRARY_DIR}/lib/share.c" + "${LIBRARY_DIR}/lib/slist.c" + "${LIBRARY_DIR}/lib/smb.c" + "${LIBRARY_DIR}/lib/smtp.c" + "${LIBRARY_DIR}/lib/socketpair.c" "${LIBRARY_DIR}/lib/socks.c" - "${LIBRARY_DIR}/lib/curl_addrinfo.c" "${LIBRARY_DIR}/lib/socks_gssapi.c" "${LIBRARY_DIR}/lib/socks_sspi.c" - "${LIBRARY_DIR}/lib/curl_sspi.c" - "${LIBRARY_DIR}/lib/slist.c" - "${LIBRARY_DIR}/lib/nonblock.c" - "${LIBRARY_DIR}/lib/curl_memrchr.c" - "${LIBRARY_DIR}/lib/imap.c" - "${LIBRARY_DIR}/lib/pop3.c" - "${LIBRARY_DIR}/lib/smtp.c" - "${LIBRARY_DIR}/lib/pingpong.c" - "${LIBRARY_DIR}/lib/rtsp.c" - "${LIBRARY_DIR}/lib/curl_threads.c" - "${LIBRARY_DIR}/lib/warnless.c" - "${LIBRARY_DIR}/lib/hmac.c" - "${LIBRARY_DIR}/lib/curl_rtmp.c" - "${LIBRARY_DIR}/lib/openldap.c" - "${LIBRARY_DIR}/lib/curl_gethostname.c" - "${LIBRARY_DIR}/lib/gopher.c" - "${LIBRARY_DIR}/lib/http_proxy.c" - "${LIBRARY_DIR}/lib/asyn-thread.c" - "${LIBRARY_DIR}/lib/curl_gssapi.c" - "${LIBRARY_DIR}/lib/http_ntlm.c" - "${LIBRARY_DIR}/lib/curl_ntlm_wb.c" - "${LIBRARY_DIR}/lib/curl_ntlm_core.c" - "${LIBRARY_DIR}/lib/curl_sasl.c" - "${LIBRARY_DIR}/lib/rand.c" - "${LIBRARY_DIR}/lib/curl_multibyte.c" - "${LIBRARY_DIR}/lib/conncache.c" - "${LIBRARY_DIR}/lib/cf-h1-proxy.c" - "${LIBRARY_DIR}/lib/http2.c" - "${LIBRARY_DIR}/lib/smb.c" - "${LIBRARY_DIR}/lib/curl_endian.c" - "${LIBRARY_DIR}/lib/curl_des.c" + "${LIBRARY_DIR}/lib/speedcheck.c" + "${LIBRARY_DIR}/lib/splay.c" + "${LIBRARY_DIR}/lib/strcase.c" + "${LIBRARY_DIR}/lib/strdup.c" + "${LIBRARY_DIR}/lib/strerror.c" + "${LIBRARY_DIR}/lib/strtok.c" + "${LIBRARY_DIR}/lib/strtoofft.c" "${LIBRARY_DIR}/lib/system_win32.c" - "${LIBRARY_DIR}/lib/mime.c" - "${LIBRARY_DIR}/lib/sha256.c" - "${LIBRARY_DIR}/lib/setopt.c" - "${LIBRARY_DIR}/lib/curl_path.c" - "${LIBRARY_DIR}/lib/curl_range.c" - "${LIBRARY_DIR}/lib/psl.c" - "${LIBRARY_DIR}/lib/doh.c" - "${LIBRARY_DIR}/lib/urlapi.c" - "${LIBRARY_DIR}/lib/curl_get_line.c" - "${LIBRARY_DIR}/lib/altsvc.c" - "${LIBRARY_DIR}/lib/socketpair.c" - "${LIBRARY_DIR}/lib/bufref.c" - "${LIBRARY_DIR}/lib/bufq.c" - "${LIBRARY_DIR}/lib/dynbuf.c" - "${LIBRARY_DIR}/lib/dynhds.c" - "${LIBRARY_DIR}/lib/hsts.c" - "${LIBRARY_DIR}/lib/http_aws_sigv4.c" - "${LIBRARY_DIR}/lib/mqtt.c" - "${LIBRARY_DIR}/lib/rename.c" - "${LIBRARY_DIR}/lib/headers.c" + "${LIBRARY_DIR}/lib/telnet.c" "${LIBRARY_DIR}/lib/timediff.c" - "${LIBRARY_DIR}/lib/vauth/vauth.c" + "${LIBRARY_DIR}/lib/timeval.c" + "${LIBRARY_DIR}/lib/transfer.c" + "${LIBRARY_DIR}/lib/url.c" + "${LIBRARY_DIR}/lib/urlapi.c" "${LIBRARY_DIR}/lib/vauth/cleartext.c" "${LIBRARY_DIR}/lib/vauth/cram.c" "${LIBRARY_DIR}/lib/vauth/digest.c" @@ -138,23 +135,24 @@ set (SRCS "${LIBRARY_DIR}/lib/vauth/oauth2.c" "${LIBRARY_DIR}/lib/vauth/spnego_gssapi.c" "${LIBRARY_DIR}/lib/vauth/spnego_sspi.c" + "${LIBRARY_DIR}/lib/vauth/vauth.c" + "${LIBRARY_DIR}/lib/version.c" "${LIBRARY_DIR}/lib/vquic/vquic.c" - "${LIBRARY_DIR}/lib/vtls/openssl.c" + "${LIBRARY_DIR}/lib/vssh/libssh.c" + "${LIBRARY_DIR}/lib/vssh/libssh2.c" + "${LIBRARY_DIR}/lib/vtls/bearssl.c" "${LIBRARY_DIR}/lib/vtls/gtls.c" - "${LIBRARY_DIR}/lib/vtls/vtls.c" - "${LIBRARY_DIR}/lib/vtls/nss.c" - "${LIBRARY_DIR}/lib/vtls/wolfssl.c" + "${LIBRARY_DIR}/lib/vtls/hostcheck.c" + "${LIBRARY_DIR}/lib/vtls/keylog.c" + "${LIBRARY_DIR}/lib/vtls/mbedtls.c" + "${LIBRARY_DIR}/lib/vtls/openssl.c" "${LIBRARY_DIR}/lib/vtls/schannel.c" "${LIBRARY_DIR}/lib/vtls/schannel_verify.c" "${LIBRARY_DIR}/lib/vtls/sectransp.c" - "${LIBRARY_DIR}/lib/vtls/gskit.c" - "${LIBRARY_DIR}/lib/vtls/mbedtls.c" - "${LIBRARY_DIR}/lib/vtls/bearssl.c" - "${LIBRARY_DIR}/lib/vtls/keylog.c" + "${LIBRARY_DIR}/lib/vtls/vtls.c" + "${LIBRARY_DIR}/lib/vtls/wolfssl.c" "${LIBRARY_DIR}/lib/vtls/x509asn1.c" - "${LIBRARY_DIR}/lib/vtls/hostcheck.c" - "${LIBRARY_DIR}/lib/vssh/libssh2.c" - "${LIBRARY_DIR}/lib/vssh/libssh.c" + "${LIBRARY_DIR}/lib/warnless.c" ) add_library (_curl ${SRCS}) diff --git a/contrib/idxd-config b/contrib/idxd-config index f6605c41a73..a836ce0e420 160000 --- a/contrib/idxd-config +++ b/contrib/idxd-config @@ -1 +1 @@ -Subproject commit f6605c41a735e3fdfef2d2d18655a33af6490b99 +Subproject commit a836ce0e42052a69bffbbc14239ab4097f3b77f1 diff --git a/contrib/incbin b/contrib/incbin new file mode 160000 index 00000000000..6e576cae5ab --- /dev/null +++ b/contrib/incbin @@ -0,0 +1 @@ +Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt new file mode 100644 index 00000000000..5778cf83c22 --- /dev/null +++ b/contrib/incbin-cmake/CMakeLists.txt @@ -0,0 +1,8 @@ +set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin") +add_library(_incbin INTERFACE) +target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR}) +add_library(ch_contrib::incbin ALIAS _incbin) + +# Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. +# Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning." +target_compile_definitions(_incbin INTERFACE INCBIN_SILENCE_BITCODE_WARNING) diff --git a/contrib/isa-l-cmake/CMakeLists.txt b/contrib/isa-l-cmake/CMakeLists.txt index d4d6d648268..10f7d7bad64 100644 --- a/contrib/isa-l-cmake/CMakeLists.txt +++ b/contrib/isa-l-cmake/CMakeLists.txt @@ -1,6 +1,7 @@ option(ENABLE_ISAL_LIBRARY "Enable ISA-L library" ${ENABLE_LIBRARIES}) -if (ARCH_AARCH64) - # Disable ISA-L libray on aarch64. + +# ISA-L is only available for x86-64, so it shall be disabled for other platforms +if (NOT ARCH_AMD64) set (ENABLE_ISAL_LIBRARY OFF) endif () diff --git a/contrib/krb5 b/contrib/krb5 index b56ce6ba690..71b06c22760 160000 --- a/contrib/krb5 +++ b/contrib/krb5 @@ -1 +1 @@ -Subproject commit b56ce6ba690e1f320df1a64afa34980c3e462617 +Subproject commit 71b06c2276009ae649c7703019f3b4605f66fd3d diff --git a/contrib/libarchive b/contrib/libarchive new file mode 160000 index 00000000000..ee457961713 --- /dev/null +++ b/contrib/libarchive @@ -0,0 +1 @@ +Subproject commit ee45796171324519f0c0bfd012018dd099296336 diff --git a/contrib/libarchive-cmake/CMakeLists.txt b/contrib/libarchive-cmake/CMakeLists.txt new file mode 100644 index 00000000000..cd5658b7086 --- /dev/null +++ b/contrib/libarchive-cmake/CMakeLists.txt @@ -0,0 +1,182 @@ +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libarchive") + +set(SRCS + "${LIBRARY_DIR}/libarchive/archive_acl.c" + "${LIBRARY_DIR}/libarchive/archive_blake2sp_ref.c" + "${LIBRARY_DIR}/libarchive/archive_blake2s_ref.c" + "${LIBRARY_DIR}/libarchive/archive_check_magic.c" + "${LIBRARY_DIR}/libarchive/archive_cmdline.c" + "${LIBRARY_DIR}/libarchive/archive_cryptor.c" + "${LIBRARY_DIR}/libarchive/archive_digest.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_darwin.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_freebsd.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_linux.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_sunos.c" + "${LIBRARY_DIR}/libarchive/archive_entry.c" + "${LIBRARY_DIR}/libarchive/archive_entry_copy_bhfi.c" + "${LIBRARY_DIR}/libarchive/archive_entry_copy_stat.c" + "${LIBRARY_DIR}/libarchive/archive_entry_link_resolver.c" + "${LIBRARY_DIR}/libarchive/archive_entry_sparse.c" + "${LIBRARY_DIR}/libarchive/archive_entry_stat.c" + "${LIBRARY_DIR}/libarchive/archive_entry_strmode.c" + "${LIBRARY_DIR}/libarchive/archive_entry_xattr.c" + "${LIBRARY_DIR}/libarchive/archive_getdate.c" + "${LIBRARY_DIR}/libarchive/archive_hmac.c" + "${LIBRARY_DIR}/libarchive/archive_match.c" + "${LIBRARY_DIR}/libarchive/archive_options.c" + "${LIBRARY_DIR}/libarchive/archive_pack_dev.c" + "${LIBRARY_DIR}/libarchive/archive_pathmatch.c" + "${LIBRARY_DIR}/libarchive/archive_ppmd7.c" + "${LIBRARY_DIR}/libarchive/archive_ppmd8.c" + "${LIBRARY_DIR}/libarchive/archive_random.c" + "${LIBRARY_DIR}/libarchive/archive_rb.c" + "${LIBRARY_DIR}/libarchive/archive_read_add_passphrase.c" + "${LIBRARY_DIR}/libarchive/archive_read_append_filter.c" + "${LIBRARY_DIR}/libarchive/archive_read.c" + "${LIBRARY_DIR}/libarchive/archive_read_data_into_fd.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_entry_from_file.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_posix.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_set_standard_lookup.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_windows.c" + "${LIBRARY_DIR}/libarchive/archive_read_extract2.c" + "${LIBRARY_DIR}/libarchive/archive_read_extract.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_fd.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_file.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_filename.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_memory.c" + "${LIBRARY_DIR}/libarchive/archive_read_set_format.c" + "${LIBRARY_DIR}/libarchive/archive_read_set_options.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_all.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_by_code.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_bzip2.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_compress.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_grzip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_gzip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lrzip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lz4.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lzop.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_none.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_program.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_rpm.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_uu.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_xz.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_zstd.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_7zip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_all.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_ar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_by_code.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_cab.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_cpio.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_empty.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_iso9660.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_lha.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_mtree.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_rar5.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_rar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_raw.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_tar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_warc.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_xar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_zip.c" + "${LIBRARY_DIR}/libarchive/archive_string.c" + "${LIBRARY_DIR}/libarchive/archive_string_sprintf.c" + "${LIBRARY_DIR}/libarchive/archive_util.c" + "${LIBRARY_DIR}/libarchive/archive_version_details.c" + "${LIBRARY_DIR}/libarchive/archive_virtual.c" + "${LIBRARY_DIR}/libarchive/archive_windows.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_b64encode.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_by_name.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_bzip2.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_compress.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_grzip.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_gzip.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lrzip.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lz4.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lzop.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_none.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_program.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_uuencode.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_xz.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_zstd.c" + "${LIBRARY_DIR}/libarchive/archive_write.c" + "${LIBRARY_DIR}/libarchive/archive_write_disk_posix.c" + "${LIBRARY_DIR}/libarchive/archive_write_disk_set_standard_lookup.c" + "${LIBRARY_DIR}/libarchive/archive_write_disk_windows.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_fd.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_file.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_filename.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_memory.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_7zip.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_ar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_by_name.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_binary.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_newc.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_odc.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_filter_by_ext.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_gnutar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_iso9660.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_mtree.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_pax.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_raw.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_shar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_ustar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_v7tar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_warc.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_xar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_zip.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_options.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_passphrase.c" + "${LIBRARY_DIR}/libarchive/filter_fork_posix.c" + "${LIBRARY_DIR}/libarchive/filter_fork_windows.c" + "${LIBRARY_DIR}/libarchive/xxhash.c" +) + +add_library(_libarchive ${SRCS}) +target_include_directories(_libarchive PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + "${LIBRARY_DIR}/libarchive" +) + +target_compile_definitions(_libarchive PUBLIC + HAVE_CONFIG_H +) + +target_compile_options(_libarchive PRIVATE "-Wno-reserved-macro-identifier") + +if (TARGET ch_contrib::xz) + target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1 HAVE_LIBLZMA=1) + target_link_libraries(_libarchive PRIVATE ch_contrib::xz) +endif() + +if (TARGET ch_contrib::zlib) + target_compile_definitions(_libarchive PUBLIC HAVE_ZLIB_H=1) + target_link_libraries(_libarchive PRIVATE ch_contrib::zlib) +endif() + +if (TARGET ch_contrib::zstd) + target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1) + target_link_libraries(_libarchive PRIVATE ch_contrib::zstd) +endif() + +if (TARGET ch_contrib::bzip2) + target_compile_definitions(_libarchive PUBLIC HAVE_BZLIB_H=1) + target_link_libraries(_libarchive PRIVATE ch_contrib::bzip2) +endif() + +if (OS_LINUX) + target_compile_definitions( + _libarchive PUBLIC + MAJOR_IN_SYSMACROS=1 + HAVE_LINUX_FS_H=1 + HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=1 + HAVE_LINUX_TYPES_H=1 + HAVE_SYS_STATFS_H=1 + HAVE_FUTIMESAT=1 + HAVE_ICONV=1 + ) +endif() + +add_library(ch_contrib::libarchive ALIAS _libarchive) \ No newline at end of file diff --git a/contrib/libarchive-cmake/config.h b/contrib/libarchive-cmake/config.h new file mode 100644 index 00000000000..0b0cab47a52 --- /dev/null +++ b/contrib/libarchive-cmake/config.h @@ -0,0 +1,1391 @@ +/* config.h. Generated from build/cmake/config.h.in by cmake configure */ +#define __LIBARCHIVE_CONFIG_H_INCLUDED 1 + +/* + * Ensure we have C99-style int64_t, etc, all defined. + */ + +/* First, we need to know if the system has already defined them. */ +#define HAVE_INT16_T +#define HAVE_INT32_T +#define HAVE_INT64_T +#define HAVE_INTMAX_T + +#define HAVE_UINT8_T +#define HAVE_UINT16_T +#define HAVE_UINT32_T +#define HAVE_UINT64_T +#define HAVE_UINTMAX_T + +/* We might have the types we want under other spellings. */ +/* #undef HAVE___INT64 */ +/* #undef HAVE_U_INT64_T */ +/* #undef HAVE_UNSIGNED___INT64 */ + +/* The sizes of various standard integer types. */ +#define SIZEOF_SHORT 2 +#define SIZEOF_INT 4 +#define SIZEOF_LONG 8 +#define SIZEOF_LONG_LONG 8 +#define SIZEOF_UNSIGNED_SHORT 2 +#define SIZEOF_UNSIGNED 4 +#define SIZEOF_UNSIGNED_LONG 8 +#define SIZEOF_UNSIGNED_LONG_LONG 8 + +/* + * If we lack int64_t, define it to the first of __int64, int, long, and long long + * that exists and is the right size. + */ +#if !defined(HAVE_INT64_T) && defined(HAVE___INT64) +typedef __int64 int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) && SIZEOF_INT == 8 +typedef int int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) && SIZEOF_LONG == 8 +typedef long int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) && SIZEOF_LONG_LONG == 8 +typedef long long int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) +#error No 64-bit integer type was found. +#endif + +/* + * Similarly for int32_t + */ +#if !defined(HAVE_INT32_T) && SIZEOF_INT == 4 +typedef int int32_t; +#define HAVE_INT32_T +#endif + +#if !defined(HAVE_INT32_T) && SIZEOF_LONG == 4 +typedef long int32_t; +#define HAVE_INT32_T +#endif + +#if !defined(HAVE_INT32_T) +#error No 32-bit integer type was found. +#endif + +/* + * Similarly for int16_t + */ +#if !defined(HAVE_INT16_T) && SIZEOF_INT == 2 +typedef int int16_t; +#define HAVE_INT16_T +#endif + +#if !defined(HAVE_INT16_T) && SIZEOF_SHORT == 2 +typedef short int16_t; +#define HAVE_INT16_T +#endif + +#if !defined(HAVE_INT16_T) +#error No 16-bit integer type was found. +#endif + +/* + * Similarly for uint64_t + */ +#if !defined(HAVE_UINT64_T) && defined(HAVE_UNSIGNED___INT64) +typedef unsigned __int64 uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED == 8 +typedef unsigned uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED_LONG == 8 +typedef unsigned long uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED_LONG_LONG == 8 +typedef unsigned long long uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) +#error No 64-bit unsigned integer type was found. +#endif + + +/* + * Similarly for uint32_t + */ +#if !defined(HAVE_UINT32_T) && SIZEOF_UNSIGNED == 4 +typedef unsigned uint32_t; +#define HAVE_UINT32_T +#endif + +#if !defined(HAVE_UINT32_T) && SIZEOF_UNSIGNED_LONG == 4 +typedef unsigned long uint32_t; +#define HAVE_UINT32_T +#endif + +#if !defined(HAVE_UINT32_T) +#error No 32-bit unsigned integer type was found. +#endif + +/* + * Similarly for uint16_t + */ +#if !defined(HAVE_UINT16_T) && SIZEOF_UNSIGNED == 2 +typedef unsigned uint16_t; +#define HAVE_UINT16_T +#endif + +#if !defined(HAVE_UINT16_T) && SIZEOF_UNSIGNED_SHORT == 2 +typedef unsigned short uint16_t; +#define HAVE_UINT16_T +#endif + +#if !defined(HAVE_UINT16_T) +#error No 16-bit unsigned integer type was found. +#endif + +/* + * Similarly for uint8_t + */ +#if !defined(HAVE_UINT8_T) +typedef unsigned char uint8_t; +#define HAVE_UINT8_T +#endif + +#if !defined(HAVE_UINT8_T) +#error No 8-bit unsigned integer type was found. +#endif + +/* Define intmax_t and uintmax_t if they are not already defined. */ +#if !defined(HAVE_INTMAX_T) +typedef int64_t intmax_t; +#endif + +#if !defined(HAVE_UINTMAX_T) +typedef uint64_t uintmax_t; +#endif + +/* Define ZLIB_WINAPI if zlib was built on Visual Studio. */ +/* #undef ZLIB_WINAPI */ + +/* Darwin ACL support */ +/* #undef ARCHIVE_ACL_DARWIN */ + +/* FreeBSD ACL support */ +/* #undef ARCHIVE_ACL_FREEBSD */ + +/* FreeBSD NFSv4 ACL support */ +/* #undef ARCHIVE_ACL_FREEBSD_NFS4 */ + +/* Linux POSIX.1e ACL support via libacl */ +/* #undef ARCHIVE_ACL_LIBACL */ + +/* Linux NFSv4 ACL support via librichacl */ +/* #undef ARCHIVE_ACL_LIBRICHACL */ + +/* Solaris ACL support */ +/* #undef ARCHIVE_ACL_SUNOS */ + +/* Solaris NFSv4 ACL support */ +/* #undef ARCHIVE_ACL_SUNOS_NFS4 */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_LIBC */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_LIBSYSTEM */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_MBEDTLS */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_NETTLE */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_OPENSSL */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_WIN */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_LIBC */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_NETTLE */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_MBEDTLS */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_OPENSSL */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_LIBC */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_LIBSYSTEM */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_MBEDTLS */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_NETTLE */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_OPENSSL */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_WIN */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBC */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC2 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBC2 */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC3 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBC3 */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBSYSTEM */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_MBEDTLS */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_NETTLE */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_OPENSSL */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_WIN */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBC */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC2 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBC2 */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC3 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBC3 */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBSYSTEM */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_MBEDTLS */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_NETTLE */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_OPENSSL */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_WIN */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBC */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC2 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBC2 */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC3 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBC3 */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBSYSTEM */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_MBEDTLS */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_NETTLE */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_OPENSSL */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_WIN */ + +/* AIX xattr support */ +/* #undef ARCHIVE_XATTR_AIX */ + +/* Darwin xattr support */ +/* #undef ARCHIVE_XATTR_DARWIN */ + +/* FreeBSD xattr support */ +/* #undef ARCHIVE_XATTR_FREEBSD */ + +/* Linux xattr support */ +/* #undef ARCHIVE_XATTR_LINUX */ + +/* Version number of bsdcpio */ +#define BSDCPIO_VERSION_STRING "3.7.0" + +/* Version number of bsdtar */ +#define BSDTAR_VERSION_STRING "3.7.0" + +/* Version number of bsdcat */ +#define BSDCAT_VERSION_STRING "3.7.0" + +/* Define to 1 if you have the `acl_create_entry' function. */ +/* #undef HAVE_ACL_CREATE_ENTRY */ + +/* Define to 1 if you have the `acl_get_fd_np' function. */ +/* #undef HAVE_ACL_GET_FD_NP */ + +/* Define to 1 if you have the `acl_get_link' function. */ +/* #undef HAVE_ACL_GET_LINK */ + +/* Define to 1 if you have the `acl_get_link_np' function. */ +/* #undef HAVE_ACL_GET_LINK_NP */ + +/* Define to 1 if you have the `acl_get_perm' function. */ +/* #undef HAVE_ACL_GET_PERM */ + +/* Define to 1 if you have the `acl_get_perm_np' function. */ +/* #undef HAVE_ACL_GET_PERM_NP */ + +/* Define to 1 if you have the `acl_init' function. */ +/* #undef HAVE_ACL_INIT */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ACL_LIBACL_H */ + +/* Define to 1 if the system has the type `acl_permset_t'. */ +/* #undef HAVE_ACL_PERMSET_T */ + +/* Define to 1 if you have the `acl_set_fd' function. */ +/* #undef HAVE_ACL_SET_FD */ + +/* Define to 1 if you have the `acl_set_fd_np' function. */ +/* #undef HAVE_ACL_SET_FD_NP */ + +/* Define to 1 if you have the `acl_set_file' function. */ +/* #undef HAVE_ACL_SET_FILE */ + +/* Define to 1 if you have the `arc4random_buf' function. */ +/* #undef HAVE_ARC4RANDOM_BUF */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ATTR_XATTR_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BCRYPT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BSDXML_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BZLIB_H */ + +/* Define to 1 if you have the `chflags' function. */ +/* #undef HAVE_CHFLAGS */ + +/* Define to 1 if you have the `chown' function. */ +#define HAVE_CHOWN 1 + +/* Define to 1 if you have the `chroot' function. */ +#define HAVE_CHROOT 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_COPYFILE_H */ + +/* Define to 1 if you have the `ctime_r' function. */ +#define HAVE_CTIME_R 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_CTYPE_H 1 + +/* Define to 1 if you have the `cygwin_conv_path' function. */ +/* #undef HAVE_CYGWIN_CONV_PATH */ + +/* Define to 1 if you have the declaration of `ACE_GETACL', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACE_GETACL */ + +/* Define to 1 if you have the declaration of `ACE_GETACLCNT', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACE_GETACLCNT */ + +/* Define to 1 if you have the declaration of `ACE_SETACL', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACE_SETACL */ + +/* Define to 1 if you have the declaration of `ACL_SYNCHRONIZE', and to 0 if + you don't. */ +/* #undef HAVE_DECL_ACL_SYNCHRONIZE */ + +/* Define to 1 if you have the declaration of `ACL_TYPE_EXTENDED', and to 0 if + you don't. */ +/* #undef HAVE_DECL_ACL_TYPE_EXTENDED */ + +/* Define to 1 if you have the declaration of `ACL_TYPE_NFS4', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACL_TYPE_NFS4 */ + +/* Define to 1 if you have the declaration of `ACL_USER', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACL_USER */ + +/* Define to 1 if you have the declaration of `INT32_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_INT32_MAX 1 + +/* Define to 1 if you have the declaration of `INT32_MIN', and to 0 if you + don't. */ +#define HAVE_DECL_INT32_MIN 1 + +/* Define to 1 if you have the declaration of `INT64_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_INT64_MAX 1 + +/* Define to 1 if you have the declaration of `INT64_MIN', and to 0 if you + don't. */ +#define HAVE_DECL_INT64_MIN 1 + +/* Define to 1 if you have the declaration of `INTMAX_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_INTMAX_MAX 1 + +/* Define to 1 if you have the declaration of `INTMAX_MIN', and to 0 if you + don't. */ +#define HAVE_DECL_INTMAX_MIN 1 + +/* Define to 1 if you have the declaration of `SETACL', and to 0 if you don't. + */ +/* #undef HAVE_DECL_SETACL */ + +/* Define to 1 if you have the declaration of `SIZE_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_SIZE_MAX 1 + +/* Define to 1 if you have the declaration of `SSIZE_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_SSIZE_MAX 1 + +/* Define to 1 if you have the declaration of `strerror_r', and to 0 if you + don't. */ +#define HAVE_DECL_STRERROR_R 1 + +/* Define to 1 if you have the declaration of `UINT32_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_UINT32_MAX 1 + +/* Define to 1 if you have the declaration of `UINT64_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_UINT64_MAX 1 + +/* Define to 1 if you have the declaration of `UINTMAX_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_UINTMAX_MAX 1 + +/* Define to 1 if you have the declaration of `XATTR_NOFOLLOW', and to 0 if + you don't. */ +/* #undef HAVE_DECL_XATTR_NOFOLLOW */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DIRECT_H */ + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +#define HAVE_DIRENT_H 1 + +/* Define to 1 if you have the `dirfd' function. */ +#define HAVE_DIRFD 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */ +/* #undef HAVE_DOPRNT */ + +/* Define to 1 if nl_langinfo supports D_MD_ORDER */ +/* #undef HAVE_D_MD_ORDER */ + +/* A possible errno value for invalid file format errors */ +/* #undef HAVE_EFTYPE */ + +/* A possible errno value for invalid file format errors */ +#define HAVE_EILSEQ 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_ERRNO_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_EXPAT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_EXT2FS_EXT2_FS_H */ + +/* Define to 1 if you have the `extattr_get_file' function. */ +/* #undef HAVE_EXTATTR_GET_FILE */ + +/* Define to 1 if you have the `extattr_list_file' function. */ +/* #undef HAVE_EXTATTR_LIST_FILE */ + +/* Define to 1 if you have the `extattr_set_fd' function. */ +/* #undef HAVE_EXTATTR_SET_FD */ + +/* Define to 1 if you have the `extattr_set_file' function. */ +/* #undef HAVE_EXTATTR_SET_FILE */ + +/* Define to 1 if EXTATTR_NAMESPACE_USER is defined in sys/extattr.h. */ +/* #undef HAVE_DECL_EXTATTR_NAMESPACE_USER */ + +/* Define to 1 if you have the declaration of `GETACL', and to 0 if you don't. + */ +/* #undef HAVE_DECL_GETACL */ + +/* Define to 1 if you have the declaration of `GETACLCNT', and to 0 if you + don't. */ +/* #undef HAVE_DECL_GETACLCNT */ + +/* Define to 1 if you have the `fchdir' function. */ +#define HAVE_FCHDIR 1 + +/* Define to 1 if you have the `fchflags' function. */ +/* #undef HAVE_FCHFLAGS */ + +/* Define to 1 if you have the `fchmod' function. */ +#define HAVE_FCHMOD 1 + +/* Define to 1 if you have the `fchown' function. */ +#define HAVE_FCHOWN 1 + +/* Define to 1 if you have the `fcntl' function. */ +#define HAVE_FCNTL 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the `fdopendir' function. */ +#define HAVE_FDOPENDIR 1 + +/* Define to 1 if you have the `fgetea' function. */ +/* #undef HAVE_FGETEA */ + +/* Define to 1 if you have the `fgetxattr' function. */ +/* #undef HAVE_FGETXATTR */ + +/* Define to 1 if you have the `flistea' function. */ +/* #undef HAVE_FLISTEA */ + +/* Define to 1 if you have the `flistxattr' function. */ +#define HAVE_FLISTXATTR 1 + +/* Define to 1 if you have the `fnmatch' function. */ +#define HAVE_FNMATCH 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FNMATCH_H 1 + +/* Define to 1 if you have the `fork' function. */ +#define HAVE_FORK 1 + +/* Define to 1 if fseeko (and presumably ftello) exists and is declared. */ +#define HAVE_FSEEKO 1 + +/* Define to 1 if you have the `fsetea' function. */ +/* #undef HAVE_FSETEA */ + +/* Define to 1 if you have the `fsetxattr' function. */ +/* #undef HAVE_FSETXATTR */ + +/* Define to 1 if you have the `fstat' function. */ +#define HAVE_FSTAT 1 + +/* Define to 1 if you have the `fstatat' function. */ +#define HAVE_FSTATAT 1 + +/* Define to 1 if you have the `fstatfs' function. */ +#define HAVE_FSTATFS 1 + +/* Define to 1 if you have the `fstatvfs' function. */ +#define HAVE_FSTATVFS 1 + +/* Define to 1 if you have the `ftruncate' function. */ +#define HAVE_FTRUNCATE 1 + +/* Define to 1 if you have the `futimens' function. */ +#define HAVE_FUTIMENS 1 + +/* Define to 1 if you have the `futimes' function. */ +#define HAVE_FUTIMES 1 + +/* Define to 1 if you have the `futimesat' function. */ +/* #undef HAVE_FUTIMESAT */ + +/* Define to 1 if you have the `getea' function. */ +/* #undef HAVE_GETEA */ + +/* Define to 1 if you have the `geteuid' function. */ +#define HAVE_GETEUID 1 + +/* Define to 1 if you have the `getgrgid_r' function. */ +#define HAVE_GETGRGID_R 1 + +/* Define to 1 if you have the `getgrnam_r' function. */ +#define HAVE_GETGRNAM_R 1 + +/* Define to 1 if platform uses `optreset` to reset `getopt` */ +#define HAVE_GETOPT_OPTRESET 1 + +/* Define to 1 if you have the `getpid' function. */ +#define HAVE_GETPID 1 + +/* Define to 1 if you have the `getpwnam_r' function. */ +#define HAVE_GETPWNAM_R 1 + +/* Define to 1 if you have the `getpwuid_r' function. */ +#define HAVE_GETPWUID_R 1 + +/* Define to 1 if you have the `getvfsbyname' function. */ +/* #undef HAVE_GETVFSBYNAME */ + +/* Define to 1 if you have the `getxattr' function. */ +#define HAVE_GETXATTR 1 + +/* Define to 1 if you have the `gmtime_r' function. */ +#define HAVE_GMTIME_R 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GRP_H 1 + +/* Define to 1 if you have the `iconv' function. */ +/* #undef HAVE_ICONV */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ICONV_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LANGINFO_H 1 + +/* Define to 1 if you have the `lchflags' function. */ +/* #undef HAVE_LCHFLAGS */ + +/* Define to 1 if you have the `lchmod' function. */ +#define HAVE_LCHMOD 1 + +/* Define to 1 if you have the `lchown' function. */ +#define HAVE_LCHOWN 1 + +/* Define to 1 if you have the `lgetea' function. */ +/* #undef HAVE_LGETEA */ + +/* Define to 1 if you have the `lgetxattr' function. */ +#define HAVE_LGETXATTR 1 + +/* Define to 1 if you have the `acl' library (-lacl). */ +/* #undef HAVE_LIBACL */ + +/* Define to 1 if you have the `attr' library (-lattr). */ +/* #undef HAVE_LIBATTR */ + +/* Define to 1 if you have the `bsdxml' library (-lbsdxml). */ +/* #undef HAVE_LIBBSDXML */ + +/* Define to 1 if you have the `bz2' library (-lbz2). */ +/* #undef HAVE_LIBBZ2 */ + +/* Define to 1 if you have the `b2' library (-lb2). */ +/* #undef HAVE_LIBB2 */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BLAKE2_H */ + +/* Define to 1 if you have the `charset' library (-lcharset). */ +/* #undef HAVE_LIBCHARSET */ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +/* #undef HAVE_LIBCRYPTO */ + +/* Define to 1 if you have the `expat' library (-lexpat). */ +/* #undef HAVE_LIBEXPAT */ + +/* Define to 1 if you have the `gcc' library (-lgcc). */ +/* #undef HAVE_LIBGCC */ + +/* Define to 1 if you have the `lz4' library (-llz4). */ +/* #undef HAVE_LIBLZ4 */ + +/* Define to 1 if you have the `lzma' library (-llzma). */ +/* #undef HAVE_LIBLZMA */ + +/* Define to 1 if you have the `lzmadec' library (-llzmadec). */ +/* #undef HAVE_LIBLZMADEC */ + +/* Define to 1 if you have the `lzo2' library (-llzo2). */ +/* #undef HAVE_LIBLZO2 */ + +/* Define to 1 if you have the `mbedcrypto' library (-lmbedcrypto). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `nettle' library (-lnettle). */ +/* #undef HAVE_LIBNETTLE */ + +/* Define to 1 if you have the `pcre' library (-lpcre). */ +/* #undef HAVE_LIBPCRE */ + +/* Define to 1 if you have the `pcreposix' library (-lpcreposix). */ +/* #undef HAVE_LIBPCREPOSIX */ + +/* Define to 1 if you have the `xml2' library (-lxml2). */ +#define HAVE_LIBXML2 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBXML_XMLREADER_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBXML_XMLWRITER_H */ + +/* Define to 1 if you have the `z' library (-lz). */ +/* #undef HAVE_LIBZ */ + +/* Define to 1 if you have the `zstd' library (-lzstd). */ +/* #undef HAVE_LIBZSTD */ + +/* Define to 1 if you have the `zstd' library (-lzstd) with compression + support. */ +/* #undef HAVE_LIBZSTD_COMPRESSOR */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LIMITS_H 1 + +/* Define to 1 if you have the `link' function. */ +#define HAVE_LINK 1 + +/* Define to 1 if you have the `linkat' function. */ +#define HAVE_LINKAT 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_FIEMAP_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_FS_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_MAGIC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_TYPES_H */ + +/* Define to 1 if you have the `listea' function. */ +/* #undef HAVE_LISTEA */ + +/* Define to 1 if you have the `listxattr' function. */ +#define HAVE_LISTXATTR 1 + +/* Define to 1 if you have the `llistea' function. */ +/* #undef HAVE_LLISTEA */ + +/* Define to 1 if you have the `llistxattr' function. */ +#define HAVE_LLISTXATTR 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LOCALCHARSET_H */ + +/* Define to 1 if you have the `locale_charset' function. */ +/* #undef HAVE_LOCALE_CHARSET */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LOCALE_H 1 + +/* Define to 1 if you have the `localtime_r' function. */ +#define HAVE_LOCALTIME_R 1 + +/* Define to 1 if the system has the type `long long int'. */ +/* #undef HAVE_LONG_LONG_INT */ + +/* Define to 1 if you have the `lsetea' function. */ +/* #undef HAVE_LSETEA */ + +/* Define to 1 if you have the `lsetxattr' function. */ +#define HAVE_LSETXATTR 1 + +/* Define to 1 if you have the `lstat' function. */ +#define HAVE_LSTAT 1 + +/* Define to 1 if `lstat' has the bug that it succeeds when given the + zero-length file name argument. */ +/* #undef HAVE_LSTAT_EMPTY_STRING_BUG */ + +/* Define to 1 if you have the `lutimes' function. */ +#define HAVE_LUTIMES 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZ4HC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZ4_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZMADEC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZMA_H */ + +/* Define to 1 if you have a working `lzma_stream_encoder_mt' function. */ +/* #undef HAVE_LZMA_STREAM_ENCODER_MT */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZO_LZO1X_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZO_LZOCONF_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MBEDTLS_AES_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MBEDTLS_MD_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MBEDTLS_PKCS5_H */ + +/* Define to 1 if you have the `mbrtowc' function. */ +/* #undef HAVE_MBRTOWC */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MEMBERSHIP_H */ + +/* Define to 1 if you have the `memmove' function. */ +#define HAVE_MEMMOVE 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `mkdir' function. */ +#define HAVE_MKDIR 1 + +/* Define to 1 if you have the `mkfifo' function. */ +#define HAVE_MKFIFO 1 + +/* Define to 1 if you have the `mknod' function. */ +#define HAVE_MKNOD 1 + +/* Define to 1 if you have the `mkstemp' function. */ +#define HAVE_MKSTEMP 1 + +/* Define to 1 if you have the header file, and it defines `DIR'. */ +/* #undef HAVE_NDIR_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_AES_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_HMAC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_MD5_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_PBKDF2_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_RIPEMD160_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_SHA_H */ + +/* Define to 1 if you have the `nl_langinfo' function. */ +/* #undef HAVE_NL_LANGINFO */ + +/* Define to 1 if you have the `openat' function. */ +#define HAVE_OPENAT 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_EVP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_PATHS_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PCREPOSIX_H */ + +/* Define to 1 if you have the `pipe' function. */ +#define HAVE_PIPE 1 + +/* Define to 1 if you have the `PKCS5_PBKDF2_HMAC_SHA1' function. */ +/* #undef HAVE_PKCS5_PBKDF2_HMAC_SHA1 */ + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_POLL_H 1 + +/* Define to 1 if you have the `posix_spawnp' function. */ +#define HAVE_POSIX_SPAWNP 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PROCESS_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PWD_H 1 + +/* Define to 1 if you have the `readdir_r' function. */ +#define HAVE_READDIR_R 1 + +/* Define to 1 if you have the `readlink' function. */ +#define HAVE_READLINK 1 + +/* Define to 1 if you have the `readlinkat' function. */ +#define HAVE_READLINKAT 1 + +/* Define to 1 if you have the `readpassphrase' function. */ +/* #undef HAVE_READPASSPHRASE */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_READPASSPHRASE_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_REGEX_H 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `setenv' function. */ +#define HAVE_SETENV 1 + +/* Define to 1 if you have the `setlocale' function. */ +#define HAVE_SETLOCALE 1 + +/* Define to 1 if you have the `sigaction' function. */ +#define HAVE_SIGACTION 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SIGNAL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SPAWN_H 1 + +/* Define to 1 if you have the `statfs' function. */ +#define HAVE_STATFS 1 + +/* Define to 1 if you have the `statvfs' function. */ +#define HAVE_STATVFS 1 + +/* Define to 1 if `stat' has the bug that it succeeds when given the + zero-length file name argument. */ +/* #undef HAVE_STAT_EMPTY_STRING_BUG */ + +/* Define to 1 if you have the header file. */ +#define HAVE_STDARG_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strchr' function. */ +#define HAVE_STRCHR 1 + +/* Define to 1 if you have the `strnlen' function. */ +#define HAVE_STRNLEN 1 + +/* Define to 1 if you have the `strdup' function. */ +#define HAVE_STRDUP 1 + +/* Define to 1 if you have the `strerror' function. */ +#define HAVE_STRERROR 1 + +/* Define to 1 if you have the `strerror_r' function. */ +#define HAVE_STRERROR_R 1 + +/* Define to 1 if you have the `strftime' function. */ +#define HAVE_STRFTIME 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the `strrchr' function. */ +#define HAVE_STRRCHR 1 + +/* Define to 1 if `f_namemax' is a member of `struct statfs'. */ +/* #undef HAVE_STRUCT_STATFS_F_NAMEMAX */ + +/* Define to 1 if `f_iosize' is a member of `struct statvfs'. */ +/* #undef HAVE_STRUCT_STATVFS_F_IOSIZE */ + +/* Define to 1 if `st_birthtime' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_BIRTHTIME */ + +/* Define to 1 if `st_birthtimespec.tv_nsec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_BIRTHTIMESPEC_TV_NSEC */ + +/* Define to 1 if `st_blksize' is a member of `struct stat'. */ +#define HAVE_STRUCT_STAT_ST_BLKSIZE 1 + +/* Define to 1 if `st_flags' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_FLAGS */ + +/* Define to 1 if `st_mtimespec.tv_nsec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC */ + +/* Define to 1 if `st_mtime_n' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIME_N */ + +/* Define to 1 if `st_mtime_usec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIME_USEC */ + +/* Define to 1 if `st_mtim.tv_nsec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC */ + +/* Define to 1 if `st_umtime' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_UMTIME */ + +/* Define to 1 if `tm_gmtoff' is a member of `struct tm'. */ +#define HAVE_STRUCT_TM_TM_GMTOFF 1 + +/* Define to 1 if `__tm_gmtoff' is a member of `struct tm'. */ +/* #undef HAVE_STRUCT_TM___TM_GMTOFF */ + +/* Define to 1 if you have `struct vfsconf'. */ +/* #undef HAVE_STRUCT_VFSCONF */ + +/* Define to 1 if you have `struct xvfsconf'. */ +/* #undef HAVE_STRUCT_XVFSCONF */ + +/* Define to 1 if you have the `symlink' function. */ +#define HAVE_SYMLINK 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_ACL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_CDEFS_H */ + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +/* #undef HAVE_SYS_DIR_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_EA_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_EXTATTR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_IOCTL_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_MKDEV_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_MOUNT_H */ + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +/* #undef HAVE_SYS_NDIR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_PARAM_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_POLL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_QUEUE_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_RICHACL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SELECT_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_STATFS_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STATVFS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SYSMACROS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_UTSNAME_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_VFS_H 1 + +/* Define to 1 if you have that is POSIX.1 compatible. */ +#define HAVE_SYS_WAIT_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_XATTR_H */ + +/* Define to 1 if you have the `timegm' function. */ +#define HAVE_TIMEGM 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_TIME_H 1 + +/* Define to 1 if you have the `tzset' function. */ +#define HAVE_TZSET 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the `unlinkat' function. */ +#define HAVE_UNLINKAT 1 + +/* Define to 1 if you have the `unsetenv' function. */ +#define HAVE_UNSETENV 1 + +/* Define to 1 if the system has the type `unsigned long long'. */ +/* #undef HAVE_UNSIGNED_LONG_LONG */ + +/* Define to 1 if the system has the type `unsigned long long int'. */ +/* #undef HAVE_UNSIGNED_LONG_LONG_INT */ + +/* Define to 1 if you have the `utime' function. */ +#define HAVE_UTIME 1 + +/* Define to 1 if you have the `utimensat' function. */ +#define HAVE_UTIMENSAT 1 + +/* Define to 1 if you have the `utimes' function. */ +#define HAVE_UTIMES 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UTIME_H 1 + +/* Define to 1 if you have the `vfork' function. */ +#define HAVE_VFORK 1 + +/* Define to 1 if you have the `vprintf' function. */ +#define HAVE_VPRINTF 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_WCHAR_H 1 + +/* Define to 1 if the system has the type `wchar_t'. */ +#define HAVE_WCHAR_T 1 + +/* Define to 1 if you have the `wcrtomb' function. */ +#define HAVE_WCRTOMB 1 + +/* Define to 1 if you have the `wcscmp' function. */ +#define HAVE_WCSCMP 1 + +/* Define to 1 if you have the `wcscpy' function. */ +#define HAVE_WCSCPY 1 + +/* Define to 1 if you have the `wcslen' function. */ +#define HAVE_WCSLEN 1 + +/* Define to 1 if you have the `wctomb' function. */ +#define HAVE_WCTOMB 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_WCTYPE_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WINCRYPT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WINDOWS_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WINIOCTL_H */ + +/* Define to 1 if you have _CrtSetReportMode in */ +/* #undef HAVE__CrtSetReportMode */ + +/* Define to 1 if you have the `wmemcmp' function. */ +#define HAVE_WMEMCMP 1 + +/* Define to 1 if you have the `wmemcpy' function. */ +#define HAVE_WMEMCPY 1 + +/* Define to 1 if you have the `wmemmove' function. */ +#define HAVE_WMEMMOVE 1 + +/* Define to 1 if you have a working EXT2_IOC_GETFLAGS */ +/* #undef HAVE_WORKING_EXT2_IOC_GETFLAGS */ + +/* Define to 1 if you have a working FS_IOC_GETFLAGS */ +#define HAVE_WORKING_FS_IOC_GETFLAGS 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ZLIB_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ZSTD_H */ + +/* Define to 1 if you have the `ctime_s' function. */ +/* #undef HAVE_CTIME_S */ + +/* Define to 1 if you have the `_fseeki64' function. */ +/* #undef HAVE__FSEEKI64 */ + +/* Define to 1 if you have the `_get_timezone' function. */ +/* #undef HAVE__GET_TIMEZONE */ + +/* Define to 1 if you have the `gmtime_s' function. */ +/* #undef HAVE_GMTIME_S */ + +/* Define to 1 if you have the `localtime_s' function. */ +/* #undef HAVE_LOCALTIME_S */ + +/* Define to 1 if you have the `_mkgmtime' function. */ +/* #undef HAVE__MKGMTIME */ + +/* Define as const if the declaration of iconv() needs const. */ +#define ICONV_CONST + +/* Version number of libarchive as a single integer */ +#define LIBARCHIVE_VERSION_NUMBER "3007000" + +/* Version number of libarchive */ +#define LIBARCHIVE_VERSION_STRING "3.7.0" + +/* Define to 1 if `lstat' dereferences a symlink specified with a trailing + slash. */ +/* #undef LSTAT_FOLLOWS_SLASHED_SYMLINK */ + +/* Define to 1 if `major', `minor', and `makedev' are declared in . + */ +/* #undef MAJOR_IN_MKDEV */ + +/* Define to 1 if `major', `minor', and `makedev' are declared in + . */ +/* #undef MAJOR_IN_SYSMACROS */ + +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +/* #undef NO_MINUS_C_MINUS_O */ + +/* The size of `wchar_t', as computed by sizeof. */ +#define SIZEOF_WCHAR_T 4 + +/* Define to 1 if strerror_r returns char *. */ +/* #undef STRERROR_R_CHAR_P */ + +/* Define to 1 if you can safely include both and . */ +/* #undef TIME_WITH_SYS_TIME */ + +/* + * Some platform requires a macro to use extension functions. + */ +#define SAFE_TO_DEFINE_EXTENSIONS 1 +#ifdef SAFE_TO_DEFINE_EXTENSIONS +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# define _ALL_SOURCE 1 +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# define _TANDEM_SOURCE 1 +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif +#endif /* SAFE_TO_DEFINE_EXTENSIONS */ + +/* Version number of package */ +#define VERSION "3.7.0" + +/* Number of bits in a file offset, on hosts where this is settable. */ +/* #undef _FILE_OFFSET_BITS */ + +/* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */ +/* #undef _LARGEFILE_SOURCE */ + +/* Define for large files, on AIX-style hosts. */ +/* #undef _LARGE_FILES */ + +/* Define to control Windows SDK version */ +#ifndef NTDDI_VERSION +/* #undef NTDDI_VERSION */ +#endif // NTDDI_VERSION + +#ifndef _WIN32_WINNT +/* #undef _WIN32_WINNT */ +#endif // _WIN32_WINNT + +#ifndef WINVER +/* #undef WINVER */ +#endif // WINVER + +/* Define to empty if `const' does not conform to ANSI C. */ +/* #undef const */ + +/* Define to `int' if doesn't define. */ +/* #undef gid_t */ + +/* Define to `unsigned long' if does not define. */ +/* #undef id_t */ + +/* Define to `int' if does not define. */ +/* #undef mode_t */ + +/* Define to `long long' if does not define. */ +/* #undef off_t */ + +/* Define to `int' if doesn't define. */ +/* #undef pid_t */ + +/* Define to `unsigned int' if does not define. */ +/* #undef size_t */ + +/* Define to `int' if does not define. */ +/* #undef ssize_t */ + +/* Define to `int' if doesn't define. */ +/* #undef uid_t */ + +/* Define to `int' if does not define. */ +/* #undef intptr_t */ + +/* Define to `unsigned int' if does not define. */ +/* #undef uintptr_t */ diff --git a/contrib/libmetrohash/src/platform.h b/contrib/libmetrohash/src/platform.h index bc00e5a286b..9e83d11cb7c 100644 --- a/contrib/libmetrohash/src/platform.h +++ b/contrib/libmetrohash/src/platform.h @@ -17,7 +17,8 @@ #ifndef METROHASH_PLATFORM_H #define METROHASH_PLATFORM_H -#include +#include +#include #include // rotate right idiom recognized by most compilers @@ -33,6 +34,11 @@ inline static uint64_t read_u64(const void * const ptr) // so we use memcpy() which is the most portable. clang & gcc usually translates `memcpy()` into a single `load` instruction // when hardware supports it, so using memcpy() is efficient too. memcpy(&result, ptr, sizeof(result)); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + result = std::byteswap(result); +#endif + return result; } @@ -40,6 +46,11 @@ inline static uint64_t read_u32(const void * const ptr) { uint32_t result; memcpy(&result, ptr, sizeof(result)); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + result = std::byteswap(result); +#endif + return result; } @@ -47,6 +58,11 @@ inline static uint64_t read_u16(const void * const ptr) { uint16_t result; memcpy(&result, ptr, sizeof(result)); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + result = std::byteswap(result); +#endif + return result; } diff --git a/contrib/libpqxx b/contrib/libpqxx index bdd6540fb95..791d68fd899 160000 --- a/contrib/libpqxx +++ b/contrib/libpqxx @@ -1 +1 @@ -Subproject commit bdd6540fb95ff56c813691ceb5da5a3266cf235d +Subproject commit 791d68fd89902835133c50435e380ec7a73271b7 diff --git a/contrib/libunwind b/contrib/libunwind index e48aa13f67d..30cc1d3fd36 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit e48aa13f67dc722511b5af33a32ba9b7748176b5 +Subproject commit 30cc1d3fd3655a5cfa0ab112fe320fb9fc0a8344 diff --git a/contrib/llvm-project b/contrib/llvm-project index d857c707fcc..e7b8befca85 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit d857c707fccd50423bea1c4710dc469cf89607a9 +Subproject commit e7b8befca85c8b847614432dba250c22d35fbae0 diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index fe6cffd33e2..00992f4f792 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -1,18 +1,16 @@ -if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined") +if (APPLE OR SANITIZE STREQUAL "undefined") set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) else() set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) endif() -option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) +option (ENABLE_EMBEDDED_COMPILER "Enable support for JIT compilation during query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) if (NOT ENABLE_EMBEDDED_COMPILER) message(STATUS "Not using LLVM") return() endif() -# TODO: Enable compilation on AArch64 - set (LLVM_VERSION "15.0.0bundled") set (LLVM_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm/include" @@ -58,18 +56,30 @@ set (REQUIRED_LLVM_LIBRARIES LLVMDemangle ) -# if (ARCH_AMD64) +if (ARCH_AMD64) + set (LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "") list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen) -# elseif (ARCH_AARCH64) -# list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) -# endif () +elseif (ARCH_AARCH64) + set (LLVM_TARGETS_TO_BUILD "AArch64" CACHE INTERNAL "") + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) +elseif (ARCH_PPC64LE) + set (LLVM_TARGETS_TO_BUILD "PowerPC" CACHE INTERNAL "") + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMPowerPCInfo LLVMPowerPCDesc LLVMPowerPCCodeGen) +elseif (ARCH_S390X) + set (LLVM_TARGETS_TO_BUILD "SystemZ" CACHE INTERNAL "") + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMSystemZInfo LLVMSystemZDesc LLVMSystemZCodeGen) +elseif (ARCH_RISCV64) + set (LLVM_TARGETS_TO_BUILD "RISCV" CACHE INTERNAL "") + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMRISCVInfo LLVMRISCVDesc LLVMRISCVCodeGen) +endif () + +message (STATUS "LLVM TARGETS TO BUILD ${LLVM_TARGETS_TO_BUILD}") set (CMAKE_INSTALL_RPATH "ON") # Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind set (LLVM_COMPILER_CHECKED 1 CACHE INTERNAL "") # Skip internal compiler selection set (LLVM_ENABLE_EH 1 CACHE INTERNAL "") # With exception handling set (LLVM_ENABLE_RTTI 1 CACHE INTERNAL "") set (LLVM_ENABLE_PIC 0 CACHE INTERNAL "") -set (LLVM_TARGETS_TO_BUILD "X86" CACHE STRING "") # for x86 + ARM: "X86;AArch64" # Omit unnecessary stuff (just the options which are ON by default) set(LLVM_ENABLE_BACKTRACES 0 CACHE INTERNAL "") @@ -99,15 +109,12 @@ set(LLVM_ENABLE_BINDINGS 0 CACHE INTERNAL "") set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm") set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm") -# Since we always use toolchain files to generate hermatic builds, cmake will -# think it's a cross compilation, and LLVM will try to configure NATIVE LLVM -# targets with all tests enabled, which will slow down cmake configuration and -# compilation (You'll see Building native llvm-tblgen...). Let's disable the -# cross compiling indicator for now. -# -# TODO We should let cmake know whether it's indeed a cross compilation in the -# first place. -set (CMAKE_CROSSCOMPILING 0) +message (STATUS "LLVM CMAKE CROSS COMPILING ${CMAKE_CROSSCOMPILING}") +if (CMAKE_CROSSCOMPILING) + set (LLVM_HOST_TRIPLE "${CMAKE_C_COMPILER_TARGET}" CACHE INTERNAL "") + message (STATUS "CROSS COMPILING SET LLVM HOST TRIPLE ${LLVM_HOST_TRIPLE}") +endif() + add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}") set_directory_properties (PROPERTIES diff --git a/contrib/lz4 b/contrib/lz4 index e82198428c8..92ebf1870b9 160000 --- a/contrib/lz4 +++ b/contrib/lz4 @@ -1 +1 @@ -Subproject commit e82198428c8061372d5adef1f9bfff4203f6081e +Subproject commit 92ebf1870b9acbefc0e7970409a181954a10ff40 diff --git a/contrib/lz4-cmake/CMakeLists.txt b/contrib/lz4-cmake/CMakeLists.txt index c0fd574134f..0f37022d515 100644 --- a/contrib/lz4-cmake/CMakeLists.txt +++ b/contrib/lz4-cmake/CMakeLists.txt @@ -13,6 +13,11 @@ add_library (ch_contrib::lz4 ALIAS _lz4) target_compile_definitions (_lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1) target_compile_definitions (_lz4 PUBLIC LZ4_FAST_DEC_LOOP=1) + +if(ARCH_S390X) + target_compile_definitions(_lz4 PRIVATE LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT) +endif() + if (SANITIZE STREQUAL "undefined") target_compile_options (_lz4 PRIVATE -fno-sanitize=undefined) endif () diff --git a/contrib/nlp-data-cmake/CMakeLists.txt b/contrib/nlp-data-cmake/CMakeLists.txt deleted file mode 100644 index 5380269c479..00000000000 --- a/contrib/nlp-data-cmake/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - -set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data") - -add_library (_nlp_data INTERFACE) - -clickhouse_embed_binaries( - TARGET nlp_dictionaries - RESOURCE_DIR "${LIBRARY_DIR}" - RESOURCES charset.zst tonality_ru.zst programming.zst -) - -add_dependencies(_nlp_data nlp_dictionaries) -target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") -add_library(ch_contrib::nlp_data ALIAS _nlp_data) diff --git a/contrib/openldap b/contrib/openldap index 8688afe6bc9..5671b80e369 160000 --- a/contrib/openldap +++ b/contrib/openldap @@ -1 +1 @@ -Subproject commit 8688afe6bc95ebcd20edf4578c536362218cb70a +Subproject commit 5671b80e369df2caf5f34e02924316205a43c895 diff --git a/contrib/openldap-cmake/CMakeLists.txt b/contrib/openldap-cmake/CMakeLists.txt index 7af07d5f553..c7d4b5a2ca2 100644 --- a/contrib/openldap-cmake/CMakeLists.txt +++ b/contrib/openldap-cmake/CMakeLists.txt @@ -96,71 +96,82 @@ target_compile_definitions(_lber ) set(_ldap_srcs - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/bind.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/open.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/result.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/error.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/compare.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/search.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/controls.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/messages.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/references.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/extended.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/cyrus.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/modify.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/add.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/modrdn.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/delete.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/abandon.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sasl.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sbind.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/unbind.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/add.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/addentry.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/assertion.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/avl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/bind.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/cancel.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/charray.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/compare.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/controls.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/cyrus.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/dds.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/delete.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/deref.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/dnssrv.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/error.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/extended.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/fetch.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/filter.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/free.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sort.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/passwd.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/whoami.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/vc.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/getattr.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/getdn.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/getentry.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/getattr.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/getvalues.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/addentry.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/request.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-ip.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/url.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/pagectrl.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sortctrl.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/vlvctrl.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/init.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/options.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/print.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/string.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/util-int.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/schema.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/charray.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-local.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/dnssrv.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8-conv.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls2.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_o.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_g.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/turn.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ppolicy.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/dds.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/txn.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldap_sync.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/stctrl.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/assertion.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/deref.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldifutil.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldif.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/fetch.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/lbase64.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldap_sync.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldif.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldifutil.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/messages.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/modify.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/modrdn.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/msctrl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/open.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/options.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-ip.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-local.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/pagectrl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/passwd.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ppolicy.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/print.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/psearchctrl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/rdwr.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/references.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/request.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/result.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/rq.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sasl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sbind.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/schema.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/search.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sort.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sortctrl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/stctrl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/string.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/tavl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_debug.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_nt.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_posix.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_pth.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_thr.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/threads.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls2.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_g.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_o.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/tpool.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/turn.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/txn.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/unbind.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/url.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8-conv.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/util-int.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/vc.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/vlvctrl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/whoami.c" ) mkversion(ldap) @@ -185,43 +196,5 @@ target_compile_definitions(_ldap PRIVATE LDAP_LIBRARY ) -set(_ldap_r_specific_srcs - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/threads.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/rdwr.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/tpool.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/rq.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_posix.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_thr.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_nt.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_pth.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_stub.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_debug.c" -) - -mkversion(ldap_r) - -add_library(_ldap_r - ${_ldap_r_specific_srcs} - ${_ldap_srcs} - "${CMAKE_CURRENT_BINARY_DIR}/ldap_r-version.c" -) - -target_link_libraries(_ldap_r - PRIVATE _lber - PRIVATE OpenSSL::Crypto OpenSSL::SSL -) - -target_include_directories(_ldap_r SYSTEM - PUBLIC ${_extra_build_dir}/include - PUBLIC "${OPENLDAP_SOURCE_DIR}/include" - PRIVATE "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r" - PRIVATE "${OPENLDAP_SOURCE_DIR}/libraries/libldap" -) - -target_compile_definitions(_ldap_r - PRIVATE LDAP_R_COMPILE - PRIVATE LDAP_LIBRARY -) - -add_library(ch_contrib::ldap ALIAS _ldap_r) +add_library(ch_contrib::ldap ALIAS _ldap) add_library(ch_contrib::lber ALIAS _lber) diff --git a/contrib/openssl b/contrib/openssl index 19cc035b6c6..245cb0291e0 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit 19cc035b6c6f2283573d29c7ea7f7d675cf750ce +Subproject commit 245cb0291e0db99d9ccf3692fa76f440b2b054c2 diff --git a/contrib/openssl-cmake/CMakeLists.txt b/contrib/openssl-cmake/CMakeLists.txt index 92739ff3608..980a12e0365 100644 --- a/contrib/openssl-cmake/CMakeLists.txt +++ b/contrib/openssl-cmake/CMakeLists.txt @@ -126,7 +126,7 @@ if(ENABLE_OPENSSL_DYNAMIC OR ENABLE_OPENSSL) elseif(ARCH_PPC64LE) macro(perl_generate_asm FILE_IN FILE_OUT) add_custom_command(OUTPUT ${FILE_OUT} - COMMAND /usr/bin/env perl ${FILE_IN} "linux64" ${FILE_OUT}) + COMMAND /usr/bin/env perl ${FILE_IN} "linux64v2" ${FILE_OUT}) endmacro() perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/aes-ppc.pl ${OPENSSL_BINARY_DIR}/crypto/aes/aes-ppc.s) diff --git a/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h b/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h index 2476042c531..49825570d8c 100644 --- a/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/cmp.h.in + * Generated by Makefile from include/openssl/cmp.h.in * - * Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. * Copyright Nokia 2007-2019 * Copyright Siemens AG 2015-2019 * @@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ -# define OSSL_CMP_PKISTATUS_accepted 0 -# define OSSL_CMP_PKISTATUS_grantedWithMods 1 -# define OSSL_CMP_PKISTATUS_rejection 2 -# define OSSL_CMP_PKISTATUS_waiting 3 -# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 # define OSSL_CMP_PKISTATUS_revocationNotification 5 -# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) @@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, const unsigned char *ref, int len); -int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec, - const int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); /* CMP message header and extra certificates: */ int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, STACK_OF(X509) *extraCertsOut); /* certificate template: */ @@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, const char *propq); diff --git a/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h index 81c1b93afaa..3c221e1ac23 100644 --- a/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h @@ -1,6 +1,6 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/opensslv.h.in + * Generated by Makefile from include/openssl/opensslv.h.in * * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. * @@ -29,7 +29,7 @@ extern "C" { */ # define OPENSSL_VERSION_MAJOR 3 # define OPENSSL_VERSION_MINOR 0 -# define OPENSSL_VERSION_PATCH 7 +# define OPENSSL_VERSION_PATCH 10 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.0.7" -# define OPENSSL_FULL_VERSION_STR "3.0.7" +# define OPENSSL_VERSION_STR "3.0.10" +# define OPENSSL_FULL_VERSION_STR "3.0.10" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "1 Nov 2022" +# define OPENSSL_RELEASE_DATE "1 Aug 2023" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_aarch64/include/openssl/x509v3.h b/contrib/openssl-cmake/linux_aarch64/include/openssl/x509v3.h index fb4b49ca349..20b67455f20 100644 --- a/contrib/openssl-cmake/linux_aarch64/include/openssl/x509v3.h +++ b/contrib/openssl-cmake/linux_aarch64/include/openssl/x509v3.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/x509v3.h.in + * Generated by Makefile from include/openssl/x509v3.h.in * - * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st { OTHERNAME *otherName; /* otherName */ ASN1_IA5STRING *rfc822Name; ASN1_IA5STRING *dNSName; - ASN1_TYPE *x400Address; + ASN1_STRING *x400Address; X509_NAME *directoryName; EDIPARTYNAME *ediPartyName; ASN1_IA5STRING *uniformResourceIdentifier; diff --git a/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h b/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h index 2476042c531..49825570d8c 100644 --- a/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/cmp.h.in + * Generated by Makefile from include/openssl/cmp.h.in * - * Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. * Copyright Nokia 2007-2019 * Copyright Siemens AG 2015-2019 * @@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ -# define OSSL_CMP_PKISTATUS_accepted 0 -# define OSSL_CMP_PKISTATUS_grantedWithMods 1 -# define OSSL_CMP_PKISTATUS_rejection 2 -# define OSSL_CMP_PKISTATUS_waiting 3 -# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 # define OSSL_CMP_PKISTATUS_revocationNotification 5 -# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) @@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, const unsigned char *ref, int len); -int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec, - const int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); /* CMP message header and extra certificates: */ int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, STACK_OF(X509) *extraCertsOut); /* certificate template: */ @@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, const char *propq); diff --git a/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h index 81c1b93afaa..3c221e1ac23 100644 --- a/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h @@ -1,6 +1,6 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/opensslv.h.in + * Generated by Makefile from include/openssl/opensslv.h.in * * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. * @@ -29,7 +29,7 @@ extern "C" { */ # define OPENSSL_VERSION_MAJOR 3 # define OPENSSL_VERSION_MINOR 0 -# define OPENSSL_VERSION_PATCH 7 +# define OPENSSL_VERSION_PATCH 10 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.0.7" -# define OPENSSL_FULL_VERSION_STR "3.0.7" +# define OPENSSL_VERSION_STR "3.0.10" +# define OPENSSL_FULL_VERSION_STR "3.0.10" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "1 Nov 2022" +# define OPENSSL_RELEASE_DATE "1 Aug 2023" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_ppc64le/include/openssl/x509v3.h b/contrib/openssl-cmake/linux_ppc64le/include/openssl/x509v3.h index fb4b49ca349..20b67455f20 100644 --- a/contrib/openssl-cmake/linux_ppc64le/include/openssl/x509v3.h +++ b/contrib/openssl-cmake/linux_ppc64le/include/openssl/x509v3.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/x509v3.h.in + * Generated by Makefile from include/openssl/x509v3.h.in * - * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st { OTHERNAME *otherName; /* otherName */ ASN1_IA5STRING *rfc822Name; ASN1_IA5STRING *dNSName; - ASN1_TYPE *x400Address; + ASN1_STRING *x400Address; X509_NAME *directoryName; EDIPARTYNAME *ediPartyName; ASN1_IA5STRING *uniformResourceIdentifier; diff --git a/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h b/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h index 2476042c531..49825570d8c 100644 --- a/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/cmp.h.in + * Generated by Makefile from include/openssl/cmp.h.in * - * Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. * Copyright Nokia 2007-2019 * Copyright Siemens AG 2015-2019 * @@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ -# define OSSL_CMP_PKISTATUS_accepted 0 -# define OSSL_CMP_PKISTATUS_grantedWithMods 1 -# define OSSL_CMP_PKISTATUS_rejection 2 -# define OSSL_CMP_PKISTATUS_waiting 3 -# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 # define OSSL_CMP_PKISTATUS_revocationNotification 5 -# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) @@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, const unsigned char *ref, int len); -int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec, - const int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); /* CMP message header and extra certificates: */ int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, STACK_OF(X509) *extraCertsOut); /* certificate template: */ @@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, const char *propq); diff --git a/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h index 81c1b93afaa..3c221e1ac23 100644 --- a/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h @@ -1,6 +1,6 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/opensslv.h.in + * Generated by Makefile from include/openssl/opensslv.h.in * * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. * @@ -29,7 +29,7 @@ extern "C" { */ # define OPENSSL_VERSION_MAJOR 3 # define OPENSSL_VERSION_MINOR 0 -# define OPENSSL_VERSION_PATCH 7 +# define OPENSSL_VERSION_PATCH 10 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.0.7" -# define OPENSSL_FULL_VERSION_STR "3.0.7" +# define OPENSSL_VERSION_STR "3.0.10" +# define OPENSSL_FULL_VERSION_STR "3.0.10" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "1 Nov 2022" +# define OPENSSL_RELEASE_DATE "1 Aug 2023" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_s390x/include/openssl/x509v3.h b/contrib/openssl-cmake/linux_s390x/include/openssl/x509v3.h index fb4b49ca349..20b67455f20 100644 --- a/contrib/openssl-cmake/linux_s390x/include/openssl/x509v3.h +++ b/contrib/openssl-cmake/linux_s390x/include/openssl/x509v3.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/x509v3.h.in + * Generated by Makefile from include/openssl/x509v3.h.in * - * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st { OTHERNAME *otherName; /* otherName */ ASN1_IA5STRING *rfc822Name; ASN1_IA5STRING *dNSName; - ASN1_TYPE *x400Address; + ASN1_STRING *x400Address; X509_NAME *directoryName; EDIPARTYNAME *ediPartyName; ASN1_IA5STRING *uniformResourceIdentifier; diff --git a/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h b/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h index 2476042c531..49825570d8c 100644 --- a/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/cmp.h.in + * Generated by Makefile from include/openssl/cmp.h.in * - * Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. * Copyright Nokia 2007-2019 * Copyright Siemens AG 2015-2019 * @@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ -# define OSSL_CMP_PKISTATUS_accepted 0 -# define OSSL_CMP_PKISTATUS_grantedWithMods 1 -# define OSSL_CMP_PKISTATUS_rejection 2 -# define OSSL_CMP_PKISTATUS_waiting 3 -# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 # define OSSL_CMP_PKISTATUS_revocationNotification 5 -# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) @@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, const unsigned char *ref, int len); -int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec, - const int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); /* CMP message header and extra certificates: */ int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, STACK_OF(X509) *extraCertsOut); /* certificate template: */ @@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, const char *propq); diff --git a/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h index 81c1b93afaa..3c221e1ac23 100644 --- a/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h @@ -1,6 +1,6 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/opensslv.h.in + * Generated by Makefile from include/openssl/opensslv.h.in * * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. * @@ -29,7 +29,7 @@ extern "C" { */ # define OPENSSL_VERSION_MAJOR 3 # define OPENSSL_VERSION_MINOR 0 -# define OPENSSL_VERSION_PATCH 7 +# define OPENSSL_VERSION_PATCH 10 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.0.7" -# define OPENSSL_FULL_VERSION_STR "3.0.7" +# define OPENSSL_VERSION_STR "3.0.10" +# define OPENSSL_FULL_VERSION_STR "3.0.10" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "1 Nov 2022" +# define OPENSSL_RELEASE_DATE "1 Aug 2023" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_x86_64/include/openssl/x509v3.h b/contrib/openssl-cmake/linux_x86_64/include/openssl/x509v3.h index fb4b49ca349..20b67455f20 100644 --- a/contrib/openssl-cmake/linux_x86_64/include/openssl/x509v3.h +++ b/contrib/openssl-cmake/linux_x86_64/include/openssl/x509v3.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/x509v3.h.in + * Generated by Makefile from include/openssl/x509v3.h.in * - * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st { OTHERNAME *otherName; /* otherName */ ASN1_IA5STRING *rfc822Name; ASN1_IA5STRING *dNSName; - ASN1_TYPE *x400Address; + ASN1_STRING *x400Address; X509_NAME *directoryName; EDIPARTYNAME *ediPartyName; ASN1_IA5STRING *uniformResourceIdentifier; diff --git a/contrib/orc b/contrib/orc index 568d1d60c25..a20d1d9d7ad 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit 568d1d60c250af1890f226c182bc15bd8cc94cf1 +Subproject commit a20d1d9d7ad4a4be7b7ba97588e16ca8b9abb2b6 diff --git a/contrib/qpl b/contrib/qpl index 3f8f5cea277..faaf1935045 160000 --- a/contrib/qpl +++ b/contrib/qpl @@ -1 +1 @@ -Subproject commit 3f8f5cea27739f5261e8fd577dc233ffe88bf679 +Subproject commit faaf19350459c076e66bb5df11743c3fade59b73 diff --git a/contrib/re2 b/contrib/re2 index 03da4fc0857..a807e8a3aac 160000 --- a/contrib/re2 +++ b/contrib/re2 @@ -1 +1 @@ -Subproject commit 03da4fc0857c285e3a26782f6bc8931c4c950df4 +Subproject commit a807e8a3aac2cc33c77b7071efea54fcabe38e0c diff --git a/contrib/re2-cmake/CMakeLists.txt b/contrib/re2-cmake/CMakeLists.txt index 305c2400c77..408e23725fd 100644 --- a/contrib/re2-cmake/CMakeLists.txt +++ b/contrib/re2-cmake/CMakeLists.txt @@ -32,7 +32,6 @@ set(RE2_SOURCES ${SRC_DIR}/re2/tostring.cc ${SRC_DIR}/re2/unicode_casefold.cc ${SRC_DIR}/re2/unicode_groups.cc - ${SRC_DIR}/util/pcre.cc ${SRC_DIR}/util/rune.cc ${SRC_DIR}/util/strutil.cc ) diff --git a/contrib/robin-map b/contrib/robin-map new file mode 160000 index 00000000000..851a59e0e30 --- /dev/null +++ b/contrib/robin-map @@ -0,0 +1 @@ +Subproject commit 851a59e0e3063ee0e23089062090a73fd3de482d diff --git a/contrib/robin-map-cmake/CMakeLists.txt b/contrib/robin-map-cmake/CMakeLists.txt new file mode 100644 index 00000000000..f82ad705dcc --- /dev/null +++ b/contrib/robin-map-cmake/CMakeLists.txt @@ -0,0 +1 @@ +# See contrib/usearch-cmake/CMakeLists.txt diff --git a/contrib/snappy b/contrib/snappy index fb057edfed8..6ebb5b1ab88 160000 --- a/contrib/snappy +++ b/contrib/snappy @@ -1 +1 @@ -Subproject commit fb057edfed820212076239fd32cb2ff23e9016bf +Subproject commit 6ebb5b1ab8801ea3fde103c5c29f5ab86df5fe7a diff --git a/contrib/sparse-checkout/update-boost.sh b/contrib/sparse-checkout/update-boost.sh index 9bd1f6c1796..04a5c0a1f6c 100755 --- a/contrib/sparse-checkout/update-boost.sh +++ b/contrib/sparse-checkout/update-boost.sh @@ -20,6 +20,7 @@ echo '/boost/context/*' >> $FILES_TO_CHECKOUT echo '/boost/convert/*' >> $FILES_TO_CHECKOUT echo '/boost/coroutine/*' >> $FILES_TO_CHECKOUT echo '/boost/core/*' >> $FILES_TO_CHECKOUT +echo '/boost/describe/*' >> $FILES_TO_CHECKOUT echo '/boost/detail/*' >> $FILES_TO_CHECKOUT echo '/boost/dynamic_bitset/*' >> $FILES_TO_CHECKOUT echo '/boost/exception/*' >> $FILES_TO_CHECKOUT @@ -82,4 +83,4 @@ echo '/libs/*' >> $FILES_TO_CHECKOUT git config core.sparsecheckout true git checkout $1 -git read-tree -mu HEAD \ No newline at end of file +git read-tree -mu HEAD diff --git a/contrib/usearch b/contrib/usearch new file mode 160000 index 00000000000..f942b6f334b --- /dev/null +++ b/contrib/usearch @@ -0,0 +1 @@ +Subproject commit f942b6f334b31716f9bdb02eb6a25fa6b222f5ba diff --git a/contrib/usearch-cmake/CMakeLists.txt b/contrib/usearch-cmake/CMakeLists.txt new file mode 100644 index 00000000000..29fbe57106c --- /dev/null +++ b/contrib/usearch-cmake/CMakeLists.txt @@ -0,0 +1,17 @@ +set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch") +set(USEARCH_SOURCE_DIR "${USEARCH_PROJECT_DIR}/include") + +set(FP16_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/FP16") +set(ROBIN_MAP_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/robin-map") +set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD-map") + +add_library(_usearch INTERFACE) + +target_include_directories(_usearch SYSTEM INTERFACE + ${FP16_PROJECT_DIR}/include + ${ROBIN_MAP_PROJECT_DIR}/include + ${SIMSIMD_PROJECT_DIR}/include + ${USEARCH_SOURCE_DIR}) + +add_library(ch_contrib::usearch ALIAS _usearch) +target_compile_definitions(_usearch INTERFACE ENABLE_USEARCH) diff --git a/docker/README.md b/docker/README.md index ec52ddd143e..c1bb3b49f00 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1,5 +1,5 @@ ## ClickHouse Dockerfiles -This directory contain Dockerfiles for `clickhouse-client` and `clickhouse-server`. They are updated in each release. +This directory contain Dockerfiles for `clickhouse-server`. They are updated in each release. -Also there is bunch of images for testing and CI. They are listed in `images.json` file and updated on each commit to master. If you need to add another image, place information about it into `images.json`. +Also, there is a bunch of images for testing and CI. They are listed in `images.json` file and updated on each commit to master. If you need to add another image, place information about it into `images.json`. diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile deleted file mode 100644 index 1c185daec75..00000000000 --- a/docker/client/Dockerfile +++ /dev/null @@ -1,34 +0,0 @@ -FROM ubuntu:18.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=22.1.1.* - -RUN apt-get update \ - && apt-get install --yes --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - dirmngr \ - gnupg \ - && mkdir -p /etc/apt/sources.list.d \ - && apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \ - && echo $repository > /etc/apt/sources.list.d/clickhouse.list \ - && apt-get update \ - && env DEBIAN_FRONTEND=noninteractive \ - apt-get install --allow-unauthenticated --yes --no-install-recommends \ - clickhouse-client=$version \ - clickhouse-common-static=$version \ - locales \ - tzdata \ - && rm -rf /var/lib/apt/lists/* /var/cache/debconf \ - && apt-get clean - -RUN locale-gen en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 - -ENTRYPOINT ["/usr/bin/clickhouse-client"] diff --git a/docker/client/README.md b/docker/client/README.md deleted file mode 100644 index bbcc7d60794..00000000000 --- a/docker/client/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# ClickHouse Client Docker Image - -For more information see [ClickHouse Server Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/). - -## License - -View [license information](https://github.com/ClickHouse/ClickHouse/blob/master/LICENSE) for the software contained in this image. diff --git a/docker/images.json b/docker/images.json index e8fc329a640..d895e2da2f0 100644 --- a/docker/images.json +++ b/docker/images.json @@ -125,6 +125,7 @@ "docker/test/keeper-jepsen", "docker/test/server-jepsen", "docker/test/sqllogic", + "docker/test/sqltest", "docker/test/stateless" ] }, @@ -155,11 +156,18 @@ }, "docker/docs/builder": { "name": "clickhouse/docs-builder", - "dependent": [ - ] + "dependent": [] }, "docker/test/sqllogic": { "name": "clickhouse/sqllogic-test", "dependent": [] + }, + "docker/test/sqltest": { + "name": "clickhouse/sqltest", + "dependent": [] + }, + "docker/test/integration/nginx_dav": { + "name": "clickhouse/nginx-dav", + "dependent": [] } } diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 8a6324aef88..63dbac6a995 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.6.2.18" +ARG VERSION="23.8.2.7" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/packager/README.md b/docker/packager/README.md index a78feb8d7fc..3a91f9a63f0 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -6,7 +6,7 @@ Usage: Build deb package with `clang-14` in `debug` mode: ``` $ mkdir deb/test_output -$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --build-type=debug +$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --debug-build $ ls -l deb/test_output -rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb -rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 99e748c41d4..86cce4159ac 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -52,38 +52,15 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \ rustup toolchain install nightly-2023-07-04 && \ rustup default nightly-2023-07-04 && \ rustup component add rust-src && \ + rustup target add x86_64-unknown-linux-gnu && \ rustup target add aarch64-unknown-linux-gnu && \ rustup target add x86_64-apple-darwin && \ rustup target add x86_64-unknown-freebsd && \ rustup target add aarch64-apple-darwin && \ - rustup target add powerpc64le-unknown-linux-gnu - -# Create vendor cache for cargo. -# -# Note, that the config.toml for the root is used, you will not be able to -# install any other crates, except those which had been vendored (since if -# there is "replace-with" for some source, then cargo will not look to other -# remotes except this). -# -# Notes for the command itself: -# - --chown is required to preserve the rights -# - unstable-options for -C -# - chmod is required to fix the permissions, since builds are running from a different user -# - copy of the Cargo.lock is required for proper dependencies versions -# - cargo vendor --sync is requried to overcome [1] bug. -# -# [1]: https://github.com/rust-lang/wg-cargo-std-aware/issues/23 -COPY --chown=root:root /rust /rust/packages -RUN cargo -Z unstable-options -C /rust/packages vendor > $CARGO_HOME/config.toml && \ - cp "$(rustc --print=sysroot)"/lib/rustlib/src/rust/Cargo.lock "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/ && \ - cargo -Z unstable-options -C /rust/packages vendor --sync "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.toml && \ - rm "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.lock && \ - sed -i "s#\"vendor\"#\"/rust/vendor\"#" $CARGO_HOME/config.toml && \ - cat $CARGO_HOME/config.toml && \ - mv /rust/packages/vendor /rust/vendor && \ - chmod -R o=r+X /rust/vendor && \ - ls -R -l /rust/packages && \ - rm -r /rust/packages + rustup target add powerpc64le-unknown-linux-gnu && \ + rustup target add x86_64-unknown-linux-musl && \ + rustup target add aarch64-unknown-linux-musl && \ + rustup target add riscv64gc-unknown-linux-gnu # NOTE: Seems like gcc-11 is too new for ubuntu20 repository # A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work): @@ -107,6 +84,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ # Download toolchain and SDK for Darwin RUN curl -sL -O https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz +# Download and install mold 2.0 for s390x build +RUN curl -Lo /tmp/mold.tar.gz "https://github.com/rui314/mold/releases/download/v2.0.0/mold-2.0.0-x86_64-linux.tar.gz" \ + && mkdir /tmp/mold \ + && tar -xzf /tmp/mold.tar.gz -C /tmp/mold \ + && cp -r /tmp/mold/mold*/* /usr \ + && rm -rf /tmp/mold \ + && rm /tmp/mold.tar.gz + # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH ARG NFPM_VERSION=2.20.0 diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index c0803c74147..39d299e1794 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -64,7 +64,7 @@ then ninja $NINJA_FLAGS clickhouse-keeper ls -la ./programs/ - ldd ./programs/clickhouse-keeper + ldd ./programs/clickhouse-keeper ||: if [ -n "$MAKE_DEB" ]; then # No quotes because I want it to expand to nothing if empty. @@ -80,19 +80,9 @@ else cmake --debug-trycompile -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. fi -if [ "coverity" == "$COMBINED_OUTPUT" ] -then - mkdir -p /workdir/cov-analysis - - wget --post-data "token=$COVERITY_TOKEN&project=ClickHouse%2FClickHouse" -qO- https://scan.coverity.com/download/linux64 | tar xz -C /workdir/cov-analysis --strip-components 1 - export PATH=$PATH:/workdir/cov-analysis/bin - cov-configure --config ./coverity.config --template --comptype clangcc --compiler "$CC" - SCAN_WRAPPER="cov-build --config ./coverity.config --dir cov-int" -fi - # No quotes because I want it to expand to nothing if empty. # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. -$SCAN_WRAPPER ninja $NINJA_FLAGS $BUILD_TARGET +ninja $NINJA_FLAGS $BUILD_TARGET ls -la ./programs @@ -107,9 +97,11 @@ if [ -n "$MAKE_DEB" ]; then bash -x /build/packages/build fi -mv ./programs/clickhouse* /output -[ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output -mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds +if [ "$BUILD_TARGET" != "fuzzers" ]; then + mv ./programs/clickhouse* /output + [ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output + mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds +fi prepare_combined_output () { local OUTPUT @@ -175,13 +167,6 @@ then mv "$COMBINED_OUTPUT.tar.zst" /output fi -if [ "coverity" == "$COMBINED_OUTPUT" ] -then - # Coverity does not understand ZSTD. - tar -cvz -f "coverity-scan.tar.gz" cov-int - mv "coverity-scan.tar.gz" /output -fi - ccache_status ccache --evict-older-than 1d diff --git a/docker/packager/binary/rust b/docker/packager/binary/rust deleted file mode 120000 index 742dc49e9ac..00000000000 --- a/docker/packager/binary/rust +++ /dev/null @@ -1 +0,0 @@ -../../../rust \ No newline at end of file diff --git a/docker/packager/packager b/docker/packager/packager index e12bd55dde3..c1bb839193f 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -22,7 +22,7 @@ def check_image_exists_locally(image_name: str) -> bool: output = subprocess.check_output( f"docker images -q {image_name} 2> /dev/null", shell=True ) - return output != "" + return output != b"" except subprocess.CalledProcessError: return False @@ -46,7 +46,7 @@ def build_image(image_name: str, filepath: Path) -> None: ) -def pre_build(repo_path: Path, env_variables: List[str]): +def pre_build(repo_path: Path, env_variables: List[str]) -> None: if "WITH_PERFORMANCE=1" in env_variables: current_branch = subprocess.check_output( "git branch --show-current", shell=True, encoding="utf-8" @@ -80,9 +80,12 @@ def run_docker_image_with_env( output_dir: Path, env_variables: List[str], ch_root: Path, + cargo_cache_dir: Path, ccache_dir: Optional[Path], -): +) -> None: output_dir.mkdir(parents=True, exist_ok=True) + cargo_cache_dir.mkdir(parents=True, exist_ok=True) + env_part = " -e ".join(env_variables) if env_part: env_part = " -e " + env_part @@ -104,7 +107,7 @@ def run_docker_image_with_env( cmd = ( f"docker run --network=host --user={user} --rm {ccache_mount}" f"--volume={output_dir}:/output --volume={ch_root}:/build {env_part} " - f"{interactive} {image_name}" + f"--volume={cargo_cache_dir}:/rust/cargo/registry {interactive} {image_name}" ) logging.info("Will build ClickHouse pkg with cmd: '%s'", cmd) @@ -112,12 +115,12 @@ def run_docker_image_with_env( subprocess.check_call(cmd, shell=True) -def is_release_build(build_type: str, package_type: str, sanitizer: str) -> bool: - return build_type == "" and package_type == "deb" and sanitizer == "" +def is_release_build(debug_build: bool, package_type: str, sanitizer: str) -> bool: + return not debug_build and package_type == "deb" and sanitizer == "" def parse_env_variables( - build_type: str, + debug_build: bool, compiler: str, sanitizer: str, package_type: str, @@ -129,9 +132,10 @@ def parse_env_variables( version: str, official: bool, additional_pkgs: bool, + with_profiler: bool, with_coverage: bool, with_binaries: str, -): +) -> List[str]: DARWIN_SUFFIX = "-darwin" DARWIN_ARM_SUFFIX = "-darwin-aarch64" ARM_SUFFIX = "-aarch64" @@ -139,12 +143,16 @@ def parse_env_variables( FREEBSD_SUFFIX = "-freebsd" PPC_SUFFIX = "-ppc64le" RISCV_SUFFIX = "-riscv64" + S390X_SUFFIX = "-s390x" AMD64_COMPAT_SUFFIX = "-amd64-compat" result = [] result.append("OUTPUT_DIR=/output") cmake_flags = ["$CMAKE_FLAGS"] - build_target = "clickhouse-bundle" + if package_type == "fuzzers": + build_target = "fuzzers" + else: + build_target = "clickhouse-bundle" is_cross_darwin = compiler.endswith(DARWIN_SUFFIX) is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX) @@ -152,6 +160,7 @@ def parse_env_variables( is_cross_arm_v80compat = compiler.endswith(ARM_V80COMPAT_SUFFIX) is_cross_ppc = compiler.endswith(PPC_SUFFIX) is_cross_riscv = compiler.endswith(RISCV_SUFFIX) + is_cross_s390x = compiler.endswith(S390X_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX) @@ -213,6 +222,11 @@ def parse_env_variables( cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake" ) + elif is_cross_s390x: + cc = compiler[: -len(S390X_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-s390x.cmake" + ) elif is_amd64_compat: cc = compiler[: -len(AMD64_COMPAT_SUFFIX)] result.append("DEB_ARCH=amd64") @@ -240,28 +254,34 @@ def parse_env_variables( build_target = ( f"{build_target} clickhouse-odbc-bridge clickhouse-library-bridge" ) - if is_release_build(build_type, package_type, sanitizer): + if is_release_build(debug_build, package_type, sanitizer): cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON") result.append("WITH_PERFORMANCE=1") if is_cross_arm: cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") else: result.append("BUILD_MUSL_KEEPER=1") + elif package_type == "fuzzers": + cmake_flags.append("-DENABLE_FUZZING=1") + cmake_flags.append("-DENABLE_PROTOBUF=1") + cmake_flags.append("-DUSE_INTERNAL_PROTOBUF_LIBRARY=1") + cmake_flags.append("-DWITH_COVERAGE=1") + cmake_flags.append("-DCMAKE_AUTOGEN_VERBOSE=ON") + # cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr") + # cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc") + # cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var") + # Reduce linking and building time by avoid *install/all dependencies + cmake_flags.append("-DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=ON") result.append(f"CC={cc}") result.append(f"CXX={cxx}") cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}") cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}") - # Create combined output archive for performance tests. - if package_type == "coverity": - result.append("COMBINED_OUTPUT=coverity") - result.append('COVERITY_TOKEN="$COVERITY_TOKEN"') - if sanitizer: result.append(f"SANITIZER={sanitizer}") - if build_type: - result.append(f"BUILD_TYPE={build_type.capitalize()}") + if debug_build: + result.append("BUILD_TYPE=Debug") else: result.append("BUILD_TYPE=None") @@ -327,6 +347,9 @@ def parse_env_variables( # utils are not included into clickhouse-bundle, so build everything build_target = "all" + if with_profiler: + cmake_flags.append("-DENABLE_BUILD_PROFILING=1") + if with_coverage: cmake_flags.append("-DWITH_COVERAGE=1") @@ -356,7 +379,7 @@ def parse_args() -> argparse.Namespace: ) parser.add_argument( "--package-type", - choices=["deb", "binary", "coverity"], + choices=["deb", "binary", "fuzzers"], required=True, ) parser.add_argument( @@ -366,7 +389,7 @@ def parse_args() -> argparse.Namespace: help="ClickHouse git repository", ) parser.add_argument("--output-dir", type=dir_name, required=True) - parser.add_argument("--build-type", choices=("debug", ""), default="") + parser.add_argument("--debug-build", action="store_true") parser.add_argument( "--compiler", @@ -378,6 +401,7 @@ def parse_args() -> argparse.Namespace: "clang-16-aarch64-v80compat", "clang-16-ppc64le", "clang-16-riscv64", + "clang-16-s390x", "clang-16-amd64-compat", "clang-16-freebsd", ), @@ -417,10 +441,18 @@ def parse_args() -> argparse.Namespace: action="store_true", help="if set, the build fails on errors writing cache to S3", ) + parser.add_argument( + "--cargo-cache-dir", + default=Path(os.getenv("CARGO_HOME", "") or Path.home() / ".cargo") + / "registry", + type=dir_name, + help="a directory to preserve the rust cargo crates", + ) parser.add_argument("--force-build-image", action="store_true") parser.add_argument("--version") parser.add_argument("--official", action="store_true") parser.add_argument("--additional-pkgs", action="store_true") + parser.add_argument("--with-profiler", action="store_true") parser.add_argument("--with-coverage", action="store_true") parser.add_argument( "--with-binaries", choices=("programs", "tests", ""), default="" @@ -456,7 +488,7 @@ def parse_args() -> argparse.Namespace: return args -def main(): +def main() -> None: logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") args = parse_args() @@ -472,7 +504,7 @@ def main(): build_image(image_with_version, dockerfile) env_prepared = parse_env_variables( - args.build_type, + args.debug_build, args.compiler, args.sanitizer, args.package_type, @@ -484,6 +516,7 @@ def main(): args.version, args.official, args.additional_pkgs, + args.with_profiler, args.with_coverage, args.with_binaries, ) @@ -495,6 +528,7 @@ def main(): args.output_dir, env_prepared, ch_root, + args.cargo_cache_dir, args.ccache_dir, ) logging.info("Output placed into %s", args.output_dir) diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 7f453627601..36dacd781bc 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.6.2.18" +ARG VERSION="23.8.2.7" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 1fa7b83ae16..752adf67229 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.6.2.18" +ARG VERSION="23.8.2.7" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index f6836804454..b55baa0e0fc 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -11,6 +11,7 @@ RUN apt-get update \ pv \ ripgrep \ zstd \ + locales \ --yes --no-install-recommends # Sanitizer options for services (clickhouse-server) @@ -18,17 +19,23 @@ RUN apt-get update \ # and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB). # TSAN will flush shadow memory when reaching this limit. # It may cause false-negatives, but it's better than OOM. -RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment +RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment # Sanitizer options for current shell (not current, but the one that will be spawned on "docker run") # (but w/o verbosity for TSAN, otherwise test.reference will not match) -ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' +ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' -ENV TZ=Europe/Moscow +RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 +ENV LC_ALL en_US.UTF-8 + +ENV TZ=Europe/Amsterdam RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone +# This script is used to setup realtime export of server logs from the CI into external ClickHouse cluster: +COPY setup_export_logs.sh / + CMD sleep 1 diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh new file mode 100755 index 00000000000..0d6c0e368db --- /dev/null +++ b/docker/test/base/setup_export_logs.sh @@ -0,0 +1,170 @@ +#!/bin/bash + +# This script sets up export of system log tables to a remote server. +# Remote tables are created if not exist, and augmented with extra columns, +# and their names will contain a hash of the table structure, +# which allows exporting tables from servers of different versions. + +# Config file contains KEY=VALUE pairs with any necessary parameters like: +# CLICKHOUSE_CI_LOGS_HOST - remote host +# CLICKHOUSE_CI_LOGS_USER - password for user +# CLICKHOUSE_CI_LOGS_PASSWORD - password for user +CLICKHOUSE_CI_LOGS_CREDENTIALS=${CLICKHOUSE_CI_LOGS_CREDENTIALS:-/tmp/export-logs-config.sh} +CLICKHOUSE_CI_LOGS_USER=${CLICKHOUSE_CI_LOGS_USER:-ci} + +# Pre-configured destination cluster, where to export the data +CLICKHOUSE_CI_LOGS_CLUSTER=${CLICKHOUSE_CI_LOGS_CLUSTER:-system_logs_export} + +EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), instance_id String, "} +EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type, '' AS instance_id"} +EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "} + +function __set_connection_args +{ + # It's impossible to use generous $CONNECTION_ARGS string, it's unsafe from word splitting perspective. + # That's why we must stick to the generated option + CONNECTION_ARGS=( + --receive_timeout=45 --send_timeout=45 --secure + --user "${CLICKHOUSE_CI_LOGS_USER}" --host "${CLICKHOUSE_CI_LOGS_HOST}" + --password "${CLICKHOUSE_CI_LOGS_PASSWORD}" + ) +} + +function __shadow_credentials +{ + # The function completely screws the output, it shouldn't be used in normal functions, only in () + # The only way to substitute the env as a plain text is using perl 's/\Qsomething\E/another/ + exec &> >(perl -pe ' + s(\Q$ENV{CLICKHOUSE_CI_LOGS_HOST}\E)[CLICKHOUSE_CI_LOGS_HOST]g; + s(\Q$ENV{CLICKHOUSE_CI_LOGS_USER}\E)[CLICKHOUSE_CI_LOGS_USER]g; + s(\Q$ENV{CLICKHOUSE_CI_LOGS_PASSWORD}\E)[CLICKHOUSE_CI_LOGS_PASSWORD]g; + ') +} + +function check_logs_credentials +( + # The function connects with given credentials, and if it's unable to execute the simplest query, returns exit code + + # First check, if all necessary parameters are set + set +x + for parameter in CLICKHOUSE_CI_LOGS_HOST CLICKHOUSE_CI_LOGS_USER CLICKHOUSE_CI_LOGS_PASSWORD; do + export -p | grep -q "$parameter" || { + echo "Credentials parameter $parameter is unset" + return 1 + } + done + + __shadow_credentials + __set_connection_args + local code + # Catch both success and error to not fail on `set -e` + clickhouse-client "${CONNECTION_ARGS[@]}" -q 'SELECT 1 FORMAT Null' && return 0 || code=$? + if [ "$code" != 0 ]; then + echo 'Failed to connect to CI Logs cluster' + return $code + fi +) + +function config_logs_export_cluster +( + # The function is launched in a separate shell instance to not expose the + # exported values from CLICKHOUSE_CI_LOGS_CREDENTIALS + set +x + if ! [ -r "${CLICKHOUSE_CI_LOGS_CREDENTIALS}" ]; then + echo "File $CLICKHOUSE_CI_LOGS_CREDENTIALS does not exist, do not setup" + return + fi + set -a + # shellcheck disable=SC1090 + source "${CLICKHOUSE_CI_LOGS_CREDENTIALS}" + set +a + __shadow_credentials + echo "Checking if the credentials work" + check_logs_credentials || return 0 + cluster_config="${1:-/etc/clickhouse-server/config.d/system_logs_export.yaml}" + mkdir -p "$(dirname "$cluster_config")" + echo "remote_servers: + ${CLICKHOUSE_CI_LOGS_CLUSTER}: + shard: + replica: + secure: 1 + user: '${CLICKHOUSE_CI_LOGS_USER}' + host: '${CLICKHOUSE_CI_LOGS_HOST}' + port: 9440 + password: '${CLICKHOUSE_CI_LOGS_PASSWORD}' +" > "$cluster_config" + echo "Cluster ${CLICKHOUSE_CI_LOGS_CLUSTER} is confugured in ${cluster_config}" +) + +function setup_logs_replication +( + # The function is launched in a separate shell instance to not expose the + # exported values from CLICKHOUSE_CI_LOGS_CREDENTIALS + set +x + # disable output + if ! [ -r "${CLICKHOUSE_CI_LOGS_CREDENTIALS}" ]; then + echo "File $CLICKHOUSE_CI_LOGS_CREDENTIALS does not exist, do not setup" + return 0 + fi + set -a + # shellcheck disable=SC1090 + source "${CLICKHOUSE_CI_LOGS_CREDENTIALS}" + set +a + __shadow_credentials + echo "Checking if the credentials work" + check_logs_credentials || return 0 + __set_connection_args + + echo 'Create all configured system logs' + clickhouse-client --query "SYSTEM FLUSH LOGS" + + # It's doesn't make sense to try creating tables if SYNC fails + echo "SYSTEM SYNC DATABASE REPLICA default" | clickhouse-client "${CONNECTION_ARGS[@]}" || return 0 + + # For each system log table: + echo 'Create %_log tables' + clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table + do + # Calculate hash of its structure. Note: 1 is the version of extra columns - increment it if extra columns are changed: + hash=$(clickhouse-client --query " + SELECT sipHash64(1, groupArray((name, type))) + FROM (SELECT name, type FROM system.columns + WHERE database = 'system' AND table = '$table' + ORDER BY position) + ") + + # Create the destination table with adapted name and structure: + statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e ' + s/^\($/('"$EXTRA_COLUMNS"'/; + s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/; + s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/; + /^TTL /d + ') + + echo -e "Creating remote destination table ${table}_${hash} with statement:\n${statement}" >&2 + + echo "$statement" | clickhouse-client --database_replicated_initial_query_timeout_sec=10 \ + --distributed_ddl_task_timeout=30 \ + "${CONNECTION_ARGS[@]}" || continue + + echo "Creating table system.${table}_sender" >&2 + + # Create Distributed table and materialized view to watch on the original table: + clickhouse-client --query " + CREATE TABLE system.${table}_sender + ENGINE = Distributed(${CLICKHOUSE_CI_LOGS_CLUSTER}, default, ${table}_${hash}) + SETTINGS flush_on_detach=0 + EMPTY AS + SELECT ${EXTRA_COLUMNS_EXPRESSION}, * + FROM system.${table} + " || continue + + echo "Creating materialized view system.${table}_watcher" >&2 + + clickhouse-client --query " + CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS + SELECT ${EXTRA_COLUMNS_EXPRESSION}, * + FROM system.${table} + " || continue + done +) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index da4baa8c687..fd7a5640964 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -18,6 +18,7 @@ RUN apt-get update \ python3-termcolor \ unixodbc \ pv \ + jq \ zstd \ --yes --no-install-recommends @@ -32,7 +33,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index e25b5fdbfed..296a132d3e3 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -120,7 +120,7 @@ function clone_submodules contrib/libxml2 contrib/libunwind contrib/fmtlib - contrib/base64 + contrib/aklomp-base64 contrib/cctz contrib/libcpuid contrib/libdivide @@ -147,6 +147,8 @@ function clone_submodules contrib/simdjson contrib/liburing contrib/libfiu + contrib/incbin + contrib/yaml-cpp ) git submodule sync @@ -169,6 +171,7 @@ function run_cmake "-DENABLE_SIMDJSON=1" "-DENABLE_JEMALLOC=1" "-DENABLE_LIBURING=1" + "-DENABLE_YAML_CPP=1" ) export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache" diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index aa71074c02a..0bc0fb06633 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -8,7 +8,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ENV LANG=C.UTF-8 -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update \ diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 5cda0831a84..5426cd9756b 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -1,6 +1,8 @@ #!/bin/bash # shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031,SC2010,SC2015 +# shellcheck disable=SC1091 +source /setup_export_logs.sh set -x # core.COMM.PID-TID @@ -122,6 +124,8 @@ EOL $PWD EOL + + config_logs_export_cluster db/config.d/system_logs_export.yaml } function filter_exists_and_template @@ -223,7 +227,9 @@ quit done clickhouse-client --query "select 1" # This checks that the server is responding kill -0 $server_pid # This checks that it is our server that is started and not some other one - echo Server started and responded + echo 'Server started and responded' + + setup_logs_replication # SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric. # SC2046: Quote this to prevent word splitting. Actually I need word splitting. diff --git a/docker/test/install/deb/Dockerfile b/docker/test/install/deb/Dockerfile index 9614473c69b..e9c928b1fe7 100644 --- a/docker/test/install/deb/Dockerfile +++ b/docker/test/install/deb/Dockerfile @@ -12,6 +12,7 @@ ENV \ # install systemd packages RUN apt-get update && \ apt-get install -y --no-install-recommends \ + sudo \ systemd \ && \ apt-get clean && \ diff --git a/docker/test/integration/hive_server/Dockerfile b/docker/test/integration/hive_server/Dockerfile index b06a0dcc830..e37e2800557 100644 --- a/docker/test/integration/hive_server/Dockerfile +++ b/docker/test/integration/hive_server/Dockerfile @@ -6,7 +6,7 @@ RUN apt-get install -y wget openjdk-8-jre RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz && \ tar -xf hadoop-3.1.0.tar.gz && rm -rf hadoop-3.1.0.tar.gz -RUN wget https://dlcdn.apache.org/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \ +RUN wget https://apache.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \ tar -xf apache-hive-2.3.9-bin.tar.gz && rm -rf apache-hive-2.3.9-bin.tar.gz RUN apt install -y vim diff --git a/docker/test/integration/nginx_dav/Dockerfile b/docker/test/integration/nginx_dav/Dockerfile new file mode 100644 index 00000000000..42c1244f6dc --- /dev/null +++ b/docker/test/integration/nginx_dav/Dockerfile @@ -0,0 +1,6 @@ +FROM nginx:alpine-slim + +COPY default.conf /etc/nginx/conf.d/ + +RUN mkdir /usr/share/nginx/files/ \ + && chown nginx: /usr/share/nginx/files/ -R diff --git a/docker/test/integration/nginx_dav/default.conf b/docker/test/integration/nginx_dav/default.conf new file mode 100644 index 00000000000..466d0584a2d --- /dev/null +++ b/docker/test/integration/nginx_dav/default.conf @@ -0,0 +1,25 @@ +server { + listen 80; + + #root /usr/share/nginx/test.com; + index index.html index.htm; + + server_name test.com localhost; + + location / { + expires max; + root /usr/share/nginx/files; + client_max_body_size 20m; + client_body_temp_path /usr/share/nginx/tmp; + dav_methods PUT; # Allowed methods, only PUT is necessary + + create_full_put_path on; # nginx automatically creates nested directories + dav_access user:rw group:r all:r; # access permissions for files + + limit_except GET { + allow all; + } + } + + error_page 405 =200 $uri; +} diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 8e95d94b6dc..d42fcb9baf6 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -95,14 +95,18 @@ RUN python3 -m pip install --no-cache-dir \ pytest-timeout \ pytest-xdist \ pytz \ + pyyaml==5.3.1 \ redis \ requests-kerberos \ tzlocal==2.1 \ retry \ + bs4 \ + lxml \ urllib3 +# bs4, lxml are for cloud tests, do not delete # Hudi supports only spark 3.3.*, not 3.4 -RUN curl -fsSL -O https://dlcdn.apache.org/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ +RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ && tar xzvf spark-3.3.2-bin-hadoop3.tgz -C / \ && rm spark-3.3.2-bin-hadoop3.tgz @@ -129,7 +133,7 @@ COPY misc/ /misc/ # Same options as in test/base/Dockerfile # (in case you need to override them in tests) -ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' +ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' diff --git a/docker/test/integration/runner/compose/docker_compose_coredns.yml b/docker/test/integration/runner/compose/docker_compose_coredns.yml index b329d4e0a46..e4736e04846 100644 --- a/docker/test/integration/runner/compose/docker_compose_coredns.yml +++ b/docker/test/integration/runner/compose/docker_compose_coredns.yml @@ -2,7 +2,7 @@ version: "2.3" services: coredns: - image: coredns/coredns:latest + image: coredns/coredns:1.9.3 # :latest broke this test restart: always volumes: - ${COREDNS_CONFIG_DIR}/example.com:/example.com diff --git a/docker/test/integration/runner/compose/docker_compose_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_hdfs.yml index f83eb93fea7..1cae54ad9e1 100644 --- a/docker/test/integration/runner/compose/docker_compose_hdfs.yml +++ b/docker/test/integration/runner/compose/docker_compose_hdfs.yml @@ -12,3 +12,5 @@ services: - type: ${HDFS_FS:-tmpfs} source: ${HDFS_LOGS:-} target: /usr/local/hadoop/logs + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml index 30d1b0bed3f..4ae3de3cbc7 100644 --- a/docker/test/integration/runner/compose/docker_compose_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml @@ -31,6 +31,8 @@ services: - kafka_zookeeper security_opt: - label:disable + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' schema-registry: image: confluentinc/cp-schema-registry:5.2.0 diff --git a/docker/test/integration/runner/compose/docker_compose_keeper.yml b/docker/test/integration/runner/compose/docker_compose_keeper.yml index 8524823ed87..91010c4aa83 100644 --- a/docker/test/integration/runner/compose/docker_compose_keeper.yml +++ b/docker/test/integration/runner/compose/docker_compose_keeper.yml @@ -20,6 +20,9 @@ services: - type: ${keeper_fs:-tmpfs} source: ${keeper_db_dir1:-} target: /var/lib/clickhouse-keeper + - type: ${keeper_fs:-tmpfs} + source: ${keeper_db_dir1:-} + target: /var/lib/clickhouse entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config1.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log" cap_add: - SYS_PTRACE @@ -53,6 +56,9 @@ services: - type: ${keeper_fs:-tmpfs} source: ${keeper_db_dir2:-} target: /var/lib/clickhouse-keeper + - type: ${keeper_fs:-tmpfs} + source: ${keeper_db_dir1:-} + target: /var/lib/clickhouse entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config2.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log" cap_add: - SYS_PTRACE @@ -86,6 +92,9 @@ services: - type: ${keeper_fs:-tmpfs} source: ${keeper_db_dir3:-} target: /var/lib/clickhouse-keeper + - type: ${keeper_fs:-tmpfs} + source: ${keeper_db_dir1:-} + target: /var/lib/clickhouse entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config3.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log" cap_add: - SYS_PTRACE diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml index 1160192696d..e955a14eb3d 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml @@ -20,6 +20,8 @@ services: depends_on: - hdfskerberos entrypoint: /etc/bootstrap.sh -d + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' hdfskerberos: image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} @@ -29,3 +31,5 @@ services: - ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh - /dev/urandom:/dev/random expose: [88, 749] + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml index 86e920ff573..49d4c1db90f 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml @@ -48,6 +48,8 @@ services: - kafka_kerberos security_opt: - label:disable + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' kafka_kerberos: image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} diff --git a/docker/test/integration/runner/compose/docker_compose_meili.yml b/docker/test/integration/runner/compose/docker_compose_meili.yml index c734c43b4c6..c1fad4aca87 100644 --- a/docker/test/integration/runner/compose/docker_compose_meili.yml +++ b/docker/test/integration/runner/compose/docker_compose_meili.yml @@ -1,16 +1,15 @@ version: '2.3' services: meili1: - image: getmeili/meilisearch:v0.27.0 + image: getmeili/meilisearch:v0.27.0 restart: always ports: - ${MEILI_EXTERNAL_PORT:-7700}:${MEILI_INTERNAL_PORT:-7700} meili_secure: - image: getmeili/meilisearch:v0.27.0 + image: getmeili/meilisearch:v0.27.0 restart: always ports: - ${MEILI_SECURE_EXTERNAL_PORT:-7700}:${MEILI_SECURE_INTERNAL_PORT:-7700} environment: MEILI_MASTER_KEY: "password" - diff --git a/docker/test/integration/runner/compose/docker_compose_minio.yml b/docker/test/integration/runner/compose/docker_compose_minio.yml index 3eaf891ff8e..f2979566296 100644 --- a/docker/test/integration/runner/compose/docker_compose_minio.yml +++ b/docker/test/integration/runner/compose/docker_compose_minio.yml @@ -14,7 +14,7 @@ services: MINIO_ACCESS_KEY: minio MINIO_SECRET_KEY: minio123 MINIO_PROMETHEUS_AUTH_TYPE: public - command: server --address :9001 --certs-dir /certs /data1-1 + command: server --console-address 127.0.0.1:19001 --address :9001 --certs-dir /certs /data1-1 depends_on: - proxy1 - proxy2 diff --git a/docker/test/integration/runner/compose/docker_compose_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql.yml index 6b98a372bd0..103fe2769e9 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql.yml @@ -9,10 +9,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-1.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-1.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/error.log @@ -21,4 +21,4 @@ services: volumes: - type: ${MYSQL_LOGS_FS:-tmpfs} source: ${MYSQL_LOGS:-} - target: /mysql/ \ No newline at end of file + target: /mysql/ diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml b/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml index d5fb5a53aaf..9c9c7430cec 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml @@ -9,9 +9,9 @@ services: DATADIR: /mysql/ expose: - ${MYSQL8_PORT:-3306} - command: --server_id=100 --log-bin='mysql-bin-1.log' - --default_authentication_plugin='mysql_native_password' - --default-time-zone='+3:00' --gtid-mode="ON" + command: --server_id=100 --log-bin='mysql-bin-1.log' + --default_authentication_plugin='mysql_native_password' + --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/error.log @@ -20,4 +20,4 @@ services: volumes: - type: ${MYSQL8_LOGS_FS:-tmpfs} source: ${MYSQL8_LOGS:-} - target: /mysql/ \ No newline at end of file + target: /mysql/ diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml index 8e145a3b408..73f9e39f0d6 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml @@ -9,10 +9,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_CLUSTER_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-2.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-2.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/2_error.log @@ -31,10 +31,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_CLUSTER_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-3.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-3.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/3_error.log @@ -53,10 +53,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_CLUSTER_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-4.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-4.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/4_error.log @@ -65,4 +65,4 @@ services: volumes: - type: ${MYSQL_CLUSTER_LOGS_FS:-tmpfs} source: ${MYSQL_CLUSTER_LOGS:-} - target: /mysql/ \ No newline at end of file + target: /mysql/ diff --git a/docker/test/integration/runner/compose/docker_compose_nginx.yml b/docker/test/integration/runner/compose/docker_compose_nginx.yml index d0fb9fc1ff4..38d2a6d84c8 100644 --- a/docker/test/integration/runner/compose/docker_compose_nginx.yml +++ b/docker/test/integration/runner/compose/docker_compose_nginx.yml @@ -5,7 +5,7 @@ services: # Files will be put into /usr/share/nginx/files. nginx: - image: kssenii/nginx-test:1.1 + image: clickhouse/nginx-dav:${DOCKER_NGINX_DAV_TAG:-latest} restart: always ports: - 80:80 diff --git a/docker/test/integration/runner/compose/docker_compose_postgres.yml b/docker/test/integration/runner/compose/docker_compose_postgres.yml index 1fb6b7a1410..2ef7eb17395 100644 --- a/docker/test/integration/runner/compose/docker_compose_postgres.yml +++ b/docker/test/integration/runner/compose/docker_compose_postgres.yml @@ -12,9 +12,9 @@ services: timeout: 5s retries: 5 networks: - default: - aliases: - - postgre-sql.local + default: + aliases: + - postgre-sql.local environment: POSTGRES_HOST_AUTH_METHOD: "trust" POSTGRES_PASSWORD: mysecretpassword diff --git a/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml b/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml index 7a1c32e0023..b5dbae423b2 100644 --- a/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml +++ b/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml @@ -12,7 +12,7 @@ services: command: ["zkServer.sh", "start-foreground"] entrypoint: /zookeeper-ssl-entrypoint.sh volumes: - - type: bind + - type: bind source: /misc/zookeeper-ssl-entrypoint.sh target: /zookeeper-ssl-entrypoint.sh - type: bind @@ -37,7 +37,7 @@ services: command: ["zkServer.sh", "start-foreground"] entrypoint: /zookeeper-ssl-entrypoint.sh volumes: - - type: bind + - type: bind source: /misc/zookeeper-ssl-entrypoint.sh target: /zookeeper-ssl-entrypoint.sh - type: bind @@ -61,7 +61,7 @@ services: command: ["zkServer.sh", "start-foreground"] entrypoint: /zookeeper-ssl-entrypoint.sh volumes: - - type: bind + - type: bind source: /misc/zookeeper-ssl-entrypoint.sh target: /zookeeper-ssl-entrypoint.sh - type: bind diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index 3c4ff522b36..b05aef76faf 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -64,15 +64,16 @@ export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge export CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH=/clickhouse-library-bridge export DOCKER_BASE_TAG=${DOCKER_BASE_TAG:=latest} -export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest} -export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest} export DOCKER_DOTNET_CLIENT_TAG=${DOCKER_DOTNET_CLIENT_TAG:=latest} +export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest} +export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest} +export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest} +export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest} export DOCKER_MYSQL_JAVA_CLIENT_TAG=${DOCKER_MYSQL_JAVA_CLIENT_TAG:=latest} export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest} export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest} +export DOCKER_NGINX_DAV_TAG=${DOCKER_NGINX_DAV_TAG:=latest} export DOCKER_POSTGRESQL_JAVA_CLIENT_TAG=${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:=latest} -export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest} -export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest} cd /ClickHouse/tests/integration exec "$@" diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index ab9f1f8a2e3..d31663f9071 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -1,18 +1,7 @@ # docker build -t clickhouse/performance-comparison . -# Using ubuntu:22.04 over 20.04 as all other images, since: -# a) ubuntu 20.04 has too old parallel, and does not support --memsuspend -# b) anyway for perf tests it should not be important (backward compatiblity -# with older ubuntu had been checked lots of times in various tests) -FROM ubuntu:22.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -ENV LANG=C.UTF-8 -ENV TZ=Europe/Moscow -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \ @@ -56,10 +45,9 @@ COPY * / # node #0 should be less stable because of system interruptions. We bind # randomly to node 1 or 0 to gather some statistics on that. We have to bind # both servers and the tmpfs on which the database is stored. How to do it -# through Yandex Sandbox API is unclear, but by default tmpfs uses +# is unclear, but by default tmpfs uses # 'process allocation policy', not sure which process but hopefully the one that -# writes to it, so just bind the downloader script as well. We could also try to -# remount it with proper options in Sandbox task. +# writes to it, so just bind the downloader script as well. # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt # Double-escaped backslashes are a tribute to the engineering wonder of docker -- # it gives '/bin/sh: 1: [bash,: not found' otherwise. diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 798d2a40b12..0407d54ff6e 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -90,7 +90,7 @@ function configure set +m wait_for_server $LEFT_SERVER_PORT $left_pid - echo Server for setup started + echo "Server for setup started" clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||: clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||: @@ -156,9 +156,9 @@ function restart wait_for_server $RIGHT_SERVER_PORT $right_pid echo right ok - clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database != 'system'" + clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')" clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.build_options" - clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database != 'system'" + clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')" clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.build_options" # Check again that both servers we started are running -- this is important @@ -352,14 +352,12 @@ function get_profiles wait clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > left-query-log.tsv ||: & - clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > right-query-log.tsv ||: & - clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: & @@ -646,7 +644,7 @@ function report rm -r report ||: mkdir report report/tmp ||: -rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv run-errors.tsv ||: +rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv all-queries.tsv run-errors.tsv ||: cat analyze/errors.log >> report/errors.log ||: cat profile-errors.log >> report/errors.log ||: @@ -665,9 +663,8 @@ create view partial_query_times as select * from -- Report for backward-incompatible ('partial') queries that we could only run on the new server (e.g. -- queries with new functions added in the tested PR). create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv') - settings output_format_decimal_trailing_zeros = 1 - as select toDecimal64(time_median, 3) time, - toDecimal64(time_stddev / time_median, 3) relative_time_stddev, + as select round(time_median, 3) time, + round(time_stddev / time_median, 3) relative_time_stddev, test, query_index, query_display_name from partial_query_times join query_display_names using (test, query_index) @@ -739,28 +736,26 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv') ; create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv') - settings output_format_decimal_trailing_zeros = 1 as with -- server_time is sometimes reported as zero (if it's less than 1 ms), -- so we have to work around this to not get an error about conversion -- of NaN to decimal. (left > right ? left / right : right / left) as times_change_float, isFinite(times_change_float) as times_change_finite, - toDecimal64(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, + round(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, times_change_finite ? (left > right ? '-' : '+') || toString(times_change_decimal) || 'x' : '--' as times_change_str select - toDecimal64(left, 3), toDecimal64(right, 3), times_change_str, - toDecimal64(diff, 3), toDecimal64(stat_threshold, 3), + round(left, 3), round(right, 3), times_change_str, + round(diff, 3), round(stat_threshold, 3), changed_fail, test, query_index, query_display_name from queries where changed_show order by abs(diff) desc; create table unstable_queries_report engine File(TSV, 'report/unstable-queries.tsv') - settings output_format_decimal_trailing_zeros = 1 as select - toDecimal64(left, 3), toDecimal64(right, 3), toDecimal64(diff, 3), - toDecimal64(stat_threshold, 3), unstable_fail, test, query_index, query_display_name + round(left, 3), round(right, 3), round(diff, 3), + round(stat_threshold, 3), unstable_fail, test, query_index, query_display_name from queries where unstable_show order by stat_threshold desc; @@ -789,11 +784,10 @@ create view total_speedup as ; create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv') - settings output_format_decimal_trailing_zeros = 1 as with (times_speedup >= 1 - ? '-' || toString(toDecimal64(times_speedup, 3)) || 'x' - : '+' || toString(toDecimal64(1 / times_speedup, 3)) || 'x') + ? '-' || toString(round(times_speedup, 3)) || 'x' + : '+' || toString(round(1 / times_speedup, 3)) || 'x') as times_speedup_str select test, times_speedup_str, queries, bad, changed, unstable -- Not sure what's the precedence of UNION ALL vs WHERE & ORDER BY, hence all @@ -816,13 +810,6 @@ create view total_client_time_per_query as select * from file('analyze/client-times.tsv', TSV, 'test text, query_index int, client float, server float'); -create table slow_on_client_report engine File(TSV, 'report/slow-on-client.tsv') - settings output_format_decimal_trailing_zeros = 1 - as select client, server, toDecimal64(client/server, 3) p, - test, query_display_name - from total_client_time_per_query left join query_display_names using (test, query_index) - where p > toDecimal64(1.02, 3) order by p desc; - create table wall_clock_time_per_test engine Memory as select * from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float'); @@ -899,15 +886,14 @@ create view test_times_view_total as ; create table test_times_report engine File(TSV, 'report/test-times.tsv') - settings output_format_decimal_trailing_zeros = 1 as select test, - toDecimal64(real, 3), - toDecimal64(total_client_time, 3), + round(real, 3), + round(total_client_time, 3), queries, - toDecimal64(query_max, 3), - toDecimal64(avg_real_per_query, 3), - toDecimal64(query_min, 3), + round(query_max, 3), + round(avg_real_per_query, 3), + round(query_min, 3), runs from ( select * from test_times_view @@ -919,21 +905,20 @@ create table test_times_report engine File(TSV, 'report/test-times.tsv') -- report for all queries page, only main metric create table all_tests_report engine File(TSV, 'report/all-queries.tsv') - settings output_format_decimal_trailing_zeros = 1 as with -- server_time is sometimes reported as zero (if it's less than 1 ms), -- so we have to work around this to not get an error about conversion -- of NaN to decimal. (left > right ? left / right : right / left) as times_change_float, isFinite(times_change_float) as times_change_finite, - toDecimal64(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, + round(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, times_change_finite ? (left > right ? '-' : '+') || toString(times_change_decimal) || 'x' : '--' as times_change_str select changed_fail, unstable_fail, - toDecimal64(left, 3), toDecimal64(right, 3), times_change_str, - toDecimal64(isFinite(diff) ? diff : 0, 3), - toDecimal64(isFinite(stat_threshold) ? stat_threshold : 0, 3), + round(left, 3), round(right, 3), times_change_str, + round(isFinite(diff) ? diff : 0, 3), + round(isFinite(stat_threshold) ? stat_threshold : 0, 3), test, query_index, query_display_name from queries order by test, query_index; @@ -1044,27 +1029,6 @@ create table unstable_run_traces engine File(TSVWithNamesAndTypes, order by count() desc ; -create table metric_devation engine File(TSVWithNamesAndTypes, - 'report/metric-deviation.$version.tsv') - settings output_format_decimal_trailing_zeros = 1 - -- first goes the key used to split the file with grep - as select test, query_index, query_display_name, - toDecimal64(d, 3) d, q, metric - from ( - select - test, query_index, - (q[3] - q[1])/q[2] d, - quantilesExact(0, 0.5, 1)(value) q, metric - from (select * from unstable_run_metrics - union all select * from unstable_run_traces - union all select * from unstable_run_metrics_2) mm - group by test, query_index, metric - having isFinite(d) and d > 0.5 and q[3] > 5 - ) metrics - left join query_display_names using (test, query_index) - order by test, query_index, d desc - ; - create table stacks engine File(TSV, 'report/stacks.$version.tsv') as select -- first goes the key used to split the file with grep @@ -1173,9 +1137,8 @@ create table metrics engine File(TSV, 'metrics/metrics.tsv') as -- Show metrics that have changed create table changes engine File(TSV, 'metrics/changes.tsv') - settings output_format_decimal_trailing_zeros = 1 as select metric, left, right, - toDecimal64(diff, 3), toDecimal64(times_diff, 3) + round(diff, 3), round(times_diff, 3) from ( select metric, median(left) as left, median(right) as right, (right - left) / left diff, @@ -1226,7 +1189,6 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv') '$SHA_TO_TEST' :: LowCardinality(String) AS commit_sha, '${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME:-Performance}' :: LowCardinality(String) AS check_name, '$(sed -n 's/.*/\1/p' report.html)' :: LowCardinality(String) AS check_status, - -- TODO toDateTime() can't parse output of 'date', so no time for now. (($(date +%s) - $CHPC_CHECK_START_TIMESTAMP) * 1000) :: UInt64 AS check_duration_ms, fromUnixTimestamp($CHPC_CHECK_START_TIMESTAMP) check_start_time, test_name :: LowCardinality(String) AS test_name , diff --git a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml index 39c29bb61ca..292665c4f68 100644 --- a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml +++ b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml @@ -19,31 +19,6 @@ - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - 1000000000 10 diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml index 093834943a3..cb591f1a184 100644 --- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml +++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml @@ -3,7 +3,7 @@ 1 1 - 1 + 0 0 0 + 0 60 diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh index aee11030068..cb243b655c6 100755 --- a/docker/test/performance-comparison/download.sh +++ b/docker/test/performance-comparison/download.sh @@ -31,8 +31,6 @@ function download # Test all of them. declare -a urls_to_try=( "$S3_URL/PRs/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst" - "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst" - "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tgz" ) for path in "${urls_to_try[@]}" diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 74571777be0..fb5e6bd2a7a 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -130,7 +130,7 @@ then git -C right/ch diff --name-only "$base" pr -- :!tests/performance :!docker/test/performance-comparison | tee other-changed-files.txt fi -# Set python output encoding so that we can print queries with Russian letters. +# Set python output encoding so that we can print queries with non-ASCII letters. export PYTHONIOENCODING=utf-8 # By default, use the main comparison script from the tested package, so that we @@ -151,11 +151,7 @@ export PATH export REF_PR export REF_SHA -# Try to collect some core dumps. I've seen two patterns in Sandbox: -# 1) |/home/zomb-sandbox/venv/bin/python /home/zomb-sandbox/client/sandbox/bin/coredumper.py %e %p %g %u %s %P %c -# Not sure what this script does (puts them to sandbox resources, logs some messages?), -# and it's not accessible from inside docker anyway. -# 2) something like %e.%p.core.dmp. The dump should end up in the workspace directory. +# Try to collect some core dumps. # At least we remove the ulimit and then try to pack some common file names into output. ulimit -c unlimited cat /proc/sys/kernel/core_pattern diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 7a4e6386d0d..d23a9ac61c1 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -369,6 +369,7 @@ for query_index in queries_to_run: "max_execution_time": args.prewarm_max_query_seconds, "query_profiler_real_time_period_ns": 10000000, "query_profiler_cpu_time_period_ns": 10000000, + "metrics_perf_events_enabled": 1, "memory_profiler_step": "4Mi", }, ) @@ -503,6 +504,7 @@ for query_index in queries_to_run: settings={ "query_profiler_real_time_period_ns": 10000000, "query_profiler_cpu_time_period_ns": 10000000, + "metrics_perf_events_enabled": 1, }, ) print( diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index a1f2eb9d9ec..7da30ba7a08 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -364,20 +364,6 @@ if args.report == "main": ] ) - slow_on_client_rows = tsvRows("report/slow-on-client.tsv") - error_tests += len(slow_on_client_rows) - addSimpleTable( - "Slow on Client", - ["Client time, s", "Server time, s", "Ratio", "Test", "Query"], - slow_on_client_rows, - ) - if slow_on_client_rows: - errors_explained.append( - [ - f'Some queries are taking noticeable time client-side (missing `FORMAT Null`?)' - ] - ) - def add_backward_incompatible(): rows = tsvRows("report/partial-queries-report.tsv") if not rows: diff --git a/docker/test/sqllogic/run.sh b/docker/test/sqllogic/run.sh index 444252837a3..db828741b0d 100755 --- a/docker/test/sqllogic/run.sh +++ b/docker/test/sqllogic/run.sh @@ -1,4 +1,5 @@ #!/bin/bash + set -exu trap "exit" INT TERM @@ -96,5 +97,4 @@ rg -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: zstd < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & # Compressed (FIXME: remove once only github actions will be left) -rm /var/log/clickhouse-server/clickhouse-server.log mv /var/log/clickhouse-server/stderr.log /test_output/ ||: diff --git a/docker/test/sqltest/Dockerfile b/docker/test/sqltest/Dockerfile new file mode 100644 index 00000000000..437677f4fd1 --- /dev/null +++ b/docker/test/sqltest/Dockerfile @@ -0,0 +1,30 @@ +# docker build -t clickhouse/sqltest . +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG + +RUN apt-get update --yes \ + && env DEBIAN_FRONTEND=noninteractive \ + apt-get install --yes --no-install-recommends \ + wget \ + git \ + python3 \ + python3-dev \ + python3-pip \ + sudo \ + && apt-get clean + +RUN pip3 install \ + pyyaml \ + clickhouse-driver + +ARG sqltest_repo="https://github.com/elliotchance/sqltest/" + +RUN git clone ${sqltest_repo} + +ENV TZ=UTC +ENV MAX_RUN_TIME=900 +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +COPY run.sh / +COPY test.py / +CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/sqltest/run.sh b/docker/test/sqltest/run.sh new file mode 100755 index 00000000000..cba1c1dab1f --- /dev/null +++ b/docker/test/sqltest/run.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# shellcheck disable=SC2015 + +set -x +set -e +set -u +set -o pipefail + +BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-16_debug_none_unsplitted_disable_False_binary"} +BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} + +function wget_with_retry +{ + for _ in 1 2 3 4; do + if wget -nv -nd -c "$1";then + return 0 + else + sleep 0.5 + fi + done + return 1 +} + +wget_with_retry "$BINARY_URL_TO_DOWNLOAD" +chmod +x clickhouse +./clickhouse install --noninteractive + +echo " +users: + default: + access_management: 1" > /etc/clickhouse-server/users.d/access_management.yaml + +clickhouse start + +# Wait for start +for _ in {1..100} +do + clickhouse-client --query "SELECT 1" && break ||: + sleep 1 +done + +# Run the test +pushd sqltest/standards/2016/ +/test.py +mv report.html test.log /workspace +popd + +zstd --threads=0 /var/log/clickhouse-server/clickhouse-server.log +zstd --threads=0 /var/log/clickhouse-server/clickhouse-server.err.log + +mv /var/log/clickhouse-server/clickhouse-server.log.zst /var/log/clickhouse-server/clickhouse-server.err.log.zst /workspace diff --git a/docker/test/sqltest/test.py b/docker/test/sqltest/test.py new file mode 100755 index 00000000000..5807ca79b02 --- /dev/null +++ b/docker/test/sqltest/test.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 + +import os +import yaml +import html +import random +import string +from clickhouse_driver import Client + + +client = Client(host="localhost", port=9000) +settings = { + "default_table_engine": "Memory", + "union_default_mode": "DISTINCT", + "calculate_text_stack_trace": 0, +} + +database_name = "sqltest_" + "".join( + random.choice(string.ascii_lowercase) for _ in range(10) +) + +client.execute(f"DROP DATABASE IF EXISTS {database_name}", settings=settings) +client.execute(f"CREATE DATABASE {database_name}", settings=settings) + +client = Client(host="localhost", port=9000, database=database_name) + +summary = {"success": 0, "total": 0, "results": {}} + +log_file = open("test.log", "w") +report_html_file = open("report.html", "w") + +with open("features.yml", "r") as file: + yaml_content = yaml.safe_load(file) + + for category in yaml_content: + log_file.write(category.capitalize() + " features:\n") + summary["results"][category] = {"success": 0, "total": 0, "results": {}} + + for test in yaml_content[category]: + log_file.write(test + ": " + yaml_content[category][test] + "\n") + summary["results"][category]["results"][test] = { + "success": 0, + "total": 0, + "description": yaml_content[category][test], + } + + test_path = test[0] + "/" + test + ".tests.yml" + if os.path.exists(test_path): + with open(test_path, "r") as test_file: + test_yaml_content = yaml.load_all(test_file, Loader=yaml.FullLoader) + + for test_case in test_yaml_content: + queries = test_case["sql"] + if not isinstance(queries, list): + queries = [queries] + + for query in queries: + # Example: E011-01 + test_group = "" + if "-" in test: + test_group = test.split("-", 1)[0] + summary["results"][category]["results"][test_group][ + "total" + ] += 1 + summary["results"][category]["results"][test]["total"] += 1 + summary["results"][category]["total"] += 1 + summary["total"] += 1 + + log_file.write(query + "\n") + + try: + result = client.execute(query, settings=settings) + log_file.write(str(result) + "\n") + + if test_group: + summary["results"][category]["results"][test_group][ + "success" + ] += 1 + summary["results"][category]["results"][test][ + "success" + ] += 1 + summary["results"][category]["success"] += 1 + summary["success"] += 1 + + except Exception as e: + log_file.write(f"Error occurred: {str(e)}\n") + +client.execute(f"DROP DATABASE {database_name}", settings=settings) + + +def enable_color(ratio): + if ratio == 0: + return "" + elif ratio < 0.5: + return "" + elif ratio < 1: + return "" + else: + return "" + + +reset_color = "" + + +def print_ratio(indent, name, success, total, description): + report_html_file.write( + "{}{}: {}{} / {} ({:.1%}){}{}\n".format( + " " * indent, + name.capitalize(), + enable_color(success / total), + success, + total, + success / total, + reset_color, + f" - " + html.escape(description) if description else "", + ) + ) + + +report_html_file.write( + "
\n"
+)
+
+print_ratio(0, "Total", summary["success"], summary["total"], "")
+
+for category in summary["results"]:
+    cat_summary = summary["results"][category]
+
+    if cat_summary["total"] == 0:
+        continue
+
+    print_ratio(2, category, cat_summary["success"], cat_summary["total"], "")
+
+    for test in summary["results"][category]["results"]:
+        test_summary = summary["results"][category]["results"][test]
+
+        if test_summary["total"] == 0:
+            continue
+
+        print_ratio(
+            6 if "-" in test else 4,
+            test,
+            test_summary["success"],
+            test_summary["total"],
+            test_summary["description"],
+        )
+
+report_html_file.write("
\n") diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index c973b6c6ec6..ad3c3477b37 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -1,5 +1,7 @@ #!/bin/bash +# shellcheck disable=SC1091 +source /setup_export_logs.sh set -e -x # Choose random timezone for this test run @@ -20,6 +22,8 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & ./setup_minio.sh stateful +config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml + function start() { if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then @@ -65,6 +69,9 @@ function start() } start + +setup_logs_replication + # shellcheck disable=SC2086 # No quotes because I want to split it into words. /s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS chmod 777 -R /var/lib/clickhouse diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index e1e84c427ba..d96531e84c2 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -40,7 +40,10 @@ RUN apt-get update -y \ cargo \ zstd \ file \ + jq \ pv \ + zip \ + p7zip-full \ && apt-get clean RUN pip3 install numpy scipy pandas Jinja2 @@ -52,7 +55,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 @@ -71,7 +74,7 @@ RUN arch=${TARGETARCH:-amd64} \ && chmod +x ./mc ./minio -RUN wget --no-verbose 'https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \ +RUN wget --no-verbose 'https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \ && tar -xvf hadoop-3.3.1.tar.gz \ && rm -rf hadoop-3.3.1.tar.gz @@ -85,4 +88,5 @@ RUN npm install -g azurite \ COPY run.sh / COPY setup_minio.sh / COPY setup_hdfs_minicluster.sh / + CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index fe53925ecc8..d2324a6c696 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -1,9 +1,15 @@ #!/bin/bash +# shellcheck disable=SC1091 +source /setup_export_logs.sh + # fail on errors, verbose and export all env variables set -e -x -a # Choose random timezone for this test run. +# +# NOTE: that clickhouse-test will randomize session_timezone by itself as well +# (it will choose between default server timezone and something specific). TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)" echo "Choosen random timezone $TZ" ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone @@ -33,6 +39,8 @@ fi ./setup_minio.sh stateless ./setup_hdfs_minicluster.sh +config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml + # For flaky check we also enable thread fuzzer if [ "$NUM_TRIES" -gt "1" ]; then export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 @@ -89,7 +97,15 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) fi -sleep 5 + +# Wait for the server to start, but not for too long. +for _ in {1..100} +do + clickhouse-client --query "SELECT 1" && break + sleep 1 +done + +setup_logs_replication attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01 @@ -199,6 +215,12 @@ rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & +data_path_config="--path=/var/lib/clickhouse/" +if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + # We need s3 storage configuration (but it's more likely that clickhouse-local will fail for some reason) + data_path_config="--config-file=/etc/clickhouse-server/config.xml" +fi + # Compress tables. # # NOTE: @@ -208,7 +230,7 @@ zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_outp # for files >64MB, we want this files to be compressed explicitly for table in query_log zookeeper_log trace_log transactions_info_log do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: + clickhouse-local "$data_path_config" --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: @@ -218,7 +240,7 @@ done # Also export trace log in flamegraph-friendly format. for trace_type in CPU Memory Real do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q " + clickhouse-local "$data_path_config" --only-system-tables -q " select arrayStringConcat((arrayMap(x -> concat(splitByChar('/', addressToLine(x))[-1], '#', demangle(addressToSymbol(x)) ), trace)), ';') AS stack, count(*) AS samples diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 4926967d2d2..58bff56a6c5 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -5,6 +5,8 @@ # Avoid overlaps with previous runs dmesg --clear +# shellcheck disable=SC1091 +source /setup_export_logs.sh set -x @@ -14,6 +16,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib +source /usr/share/clickhouse-test/ci/attach_gdb.lib source /usr/share/clickhouse-test/ci/stress_tests.lib install_packages package_folder @@ -50,9 +53,13 @@ configure azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & ./setup_minio.sh stateless # to have a proper environment +config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml + start -shellcheck disable=SC2086 # No quotes because I want to split it into words. +setup_logs_replication + +# shellcheck disable=SC2086 # No quotes because I want to split it into words. /s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS chmod 777 -R /var/lib/clickhouse clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary" @@ -179,6 +186,11 @@ mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/cli sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \ + | sed "s|trace|test|" \ + > /etc/clickhouse-server/config.d/logger_trace.xml.tmp +mv /etc/clickhouse-server/config.d/logger_trace.xml.tmp /etc/clickhouse-server/config.d/logger_trace.xml + start stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ @@ -232,4 +244,10 @@ rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv +# But OOMs in stress test are allowed +if rg 'OOM in dmesg|Signal 9' /test_output/check_status.tsv +then + sed -i 's/failure/success/' /test_output/check_status.tsv +fi + collect_core_dumps diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 2aa0b1a62d6..a4feae27c67 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -1,5 +1,5 @@ # docker build -t clickhouse/style-test . -FROM ubuntu:20.04 +FROM ubuntu:22.04 ARG ACT_VERSION=0.2.33 ARG ACTIONLINT_VERSION=1.6.22 @@ -18,9 +18,13 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ python3-pip \ shellcheck \ yamllint \ + locales \ && pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \ && apt-get clean \ - && rm -rf /root/.cache/pip + && rm -rf /root/.cache/pip + +RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 +ENV LC_ALL en_US.UTF-8 # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index b8061309342..0b30ab9dbf7 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -16,6 +16,7 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib +source /usr/share/clickhouse-test/ci/attach_gdb.lib source /usr/share/clickhouse-test/ci/stress_tests.lib azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & @@ -61,6 +62,7 @@ configure # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml +rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml start @@ -90,6 +92,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml +rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml start @@ -126,6 +129,7 @@ sudo cat /etc/clickhouse-server/config.d/lost_forever_check.xml \ | sed "s|>1<|>0<|g" \ > /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp sudo mv /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp /etc/clickhouse-server/config.d/lost_forever_check.xml +rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml start 500 clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \ @@ -228,4 +232,10 @@ rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv +# But OOMs in stress test are allowed +if rg 'OOM in dmesg|Signal 9' /test_output/check_status.tsv +then + sed -i 's/failure/success/' /test_output/check_status.tsv +fi + collect_core_dumps diff --git a/docs/README.md b/docs/README.md index 0cd35a4e3ec..d1260312166 100644 --- a/docs/README.md +++ b/docs/README.md @@ -200,8 +200,8 @@ Templates: - [Server Setting](_description_templates/template-server-setting.md) - [Database or Table engine](_description_templates/template-engine.md) - [System table](_description_templates/template-system-table.md) -- [Data type](_description_templates/data-type.md) -- [Statement](_description_templates/statement.md) +- [Data type](_description_templates/template-data-type.md) +- [Statement](_description_templates/template-statement.md) diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index 5d4571aed9e..0ae77f464eb 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -36,6 +36,9 @@ then elif [ "${ARCH}" = "riscv64" ] then DIR="riscv64" + elif [ "${ARCH}" = "s390x" ] + then + DIR="s390x" fi elif [ "${OS}" = "FreeBSD" ] then diff --git a/docs/changelogs/v22.8.21.38-lts.md b/docs/changelogs/v22.8.21.38-lts.md new file mode 100644 index 00000000000..fc919b25735 --- /dev/null +++ b/docs/changelogs/v22.8.21.38-lts.md @@ -0,0 +1,36 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.21.38-lts (70872e9859e) FIXME as compared to v22.8.20.11-lts (c9ca79e24e8) + +#### Build/Testing/Packaging Improvement +* Backported in [#53017](https://github.com/ClickHouse/ClickHouse/issues/53017): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53459](https://github.com/ClickHouse/ClickHouse/issues/53459): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix broken `02862_sorted_distinct_sparse_fix` [#53738](https://github.com/ClickHouse/ClickHouse/pull/53738) ([Antonio Andelic](https://github.com/antonio2368)). +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.3.10.5-lts.md b/docs/changelogs/v23.3.10.5-lts.md new file mode 100644 index 00000000000..61c47747709 --- /dev/null +++ b/docs/changelogs/v23.3.10.5-lts.md @@ -0,0 +1,14 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.10.5-lts (d8737007f9e) FIXME as compared to v23.3.9.55-lts (b9c5c8622d3) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). + diff --git a/docs/changelogs/v23.3.11.5-lts.md b/docs/changelogs/v23.3.11.5-lts.md new file mode 100644 index 00000000000..b671c7e5bb6 --- /dev/null +++ b/docs/changelogs/v23.3.11.5-lts.md @@ -0,0 +1,17 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.11.5-lts (5762a23a76d) FIXME as compared to v23.3.10.5-lts (d8737007f9e) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.3.12.11-lts.md b/docs/changelogs/v23.3.12.11-lts.md new file mode 100644 index 00000000000..2eaaa575f60 --- /dev/null +++ b/docs/changelogs/v23.3.12.11-lts.md @@ -0,0 +1,20 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.12.11-lts (414317bed21) FIXME as compared to v23.3.11.5-lts (5762a23a76d) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix crash in join on sparse column [#53548](https://github.com/ClickHouse/ClickHouse/pull/53548) ([vdimir](https://github.com/vdimir)). +* Fix rows_before_limit_at_least for DelayedSource. [#54122](https://github.com/ClickHouse/ClickHouse/pull/54122) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix broken `02862_sorted_distinct_sparse_fix` [#53738](https://github.com/ClickHouse/ClickHouse/pull/53738) ([Antonio Andelic](https://github.com/antonio2368)). + diff --git a/docs/changelogs/v23.3.13.6-lts.md b/docs/changelogs/v23.3.13.6-lts.md new file mode 100644 index 00000000000..bcc08721806 --- /dev/null +++ b/docs/changelogs/v23.3.13.6-lts.md @@ -0,0 +1,13 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.13.6-lts (25635e27551) FIXME as compared to v23.3.12.11-lts (414317bed21) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Backport moving/ part checking code from master [#54157](https://github.com/ClickHouse/ClickHouse/pull/54157) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.3.9.55-lts.md b/docs/changelogs/v23.3.9.55-lts.md new file mode 100644 index 00000000000..a08070892b5 --- /dev/null +++ b/docs/changelogs/v23.3.9.55-lts.md @@ -0,0 +1,45 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.9.55-lts (b9c5c8622d3) FIXME as compared to v23.3.8.21-lts (1675f2264f3) + +#### Performance Improvement +* Backported in [#52213](https://github.com/ClickHouse/ClickHouse/issues/52213): Do not store blocks in `ANY` hash join if nothing is inserted. [#48633](https://github.com/ClickHouse/ClickHouse/pull/48633) ([vdimir](https://github.com/vdimir)). +* Backported in [#52826](https://github.com/ClickHouse/ClickHouse/issues/52826): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement +* Backported in [#53019](https://github.com/ClickHouse/ClickHouse/issues/53019): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53288](https://github.com/ClickHouse/ClickHouse/issues/53288): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#53461](https://github.com/ClickHouse/ClickHouse/issues/53461): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v23.5.5.92-stable.md b/docs/changelogs/v23.5.5.92-stable.md new file mode 100644 index 00000000000..ade39b7545d --- /dev/null +++ b/docs/changelogs/v23.5.5.92-stable.md @@ -0,0 +1,62 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.5.5.92-stable (557edaddace) FIXME as compared to v23.5.4.25-stable (190f962abcf) + +#### Performance Improvement +* Backported in [#52749](https://github.com/ClickHouse/ClickHouse/issues/52749): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement +* Backported in [#51886](https://github.com/ClickHouse/ClickHouse/issues/51886): Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#52909](https://github.com/ClickHouse/ClickHouse/issues/52909): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53021](https://github.com/ClickHouse/ClickHouse/issues/53021): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53289](https://github.com/ClickHouse/ClickHouse/issues/53289): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#53463](https://github.com/ClickHouse/ClickHouse/issues/53463): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix backward compatibility for IP types hashing in aggregate functions [#50551](https://github.com/ClickHouse/ClickHouse/pull/50551) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix segfault in MathUnary [#51499](https://github.com/ClickHouse/ClickHouse/pull/51499) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Decoupled commits from [#51180](https://github.com/ClickHouse/ClickHouse/issues/51180) for backports [#51561](https://github.com/ClickHouse/ClickHouse/pull/51561) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix MergeTreeMarksLoader segfaulting if marks file is longer than expected [#51636](https://github.com/ClickHouse/ClickHouse/pull/51636) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.6.3.87-stable.md b/docs/changelogs/v23.6.3.87-stable.md new file mode 100644 index 00000000000..8db499f308a --- /dev/null +++ b/docs/changelogs/v23.6.3.87-stable.md @@ -0,0 +1,58 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.6.3.87-stable (36911c17d0f) FIXME as compared to v23.6.2.18-stable (89f39a7ccfe) + +#### Performance Improvement +* Backported in [#52751](https://github.com/ClickHouse/ClickHouse/issues/52751): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement +* Backported in [#52911](https://github.com/ClickHouse/ClickHouse/issues/52911): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53023](https://github.com/ClickHouse/ClickHouse/issues/53023): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53290](https://github.com/ClickHouse/ClickHouse/issues/53290): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#53465](https://github.com/ClickHouse/ClickHouse/issues/53465): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix: logical error in grace hash join [#51737](https://github.com/ClickHouse/ClickHouse/pull/51737) ([Igor Nikonov](https://github.com/devcrafter)). +* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Increase min protocol version for sparse serialization [#52835](https://github.com/ClickHouse/ClickHouse/pull/52835) ([Anton Popov](https://github.com/CurtizJ)). +* Docker improvements [#52869](https://github.com/ClickHouse/ClickHouse/pull/52869) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.7.1.2470-stable.md b/docs/changelogs/v23.7.1.2470-stable.md new file mode 100644 index 00000000000..a77078cb653 --- /dev/null +++ b/docs/changelogs/v23.7.1.2470-stable.md @@ -0,0 +1,452 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.1.2470-stable (a70127baecc) FIXME as compared to v23.6.1.1524-stable (d1c7e13d088) + +#### Backward Incompatible Change +* Add ` NAMED COLLECTION` access type (aliases `USE NAMED COLLECTION`, `NAMED COLLECTION USAGE`). This PR is backward incompatible because this access type is disabled by default (because a parent access type `NAMED COLLECTION ADMIN` is disabled by default as well). Proposed in [#50277](https://github.com/ClickHouse/ClickHouse/issues/50277). To grant use `GRANT NAMED COLLECTION ON collection_name TO user` or `GRANT NAMED COLLECTION ON * TO user`, to be able to give these grants `named_collection_admin` is required in config (previously it was named `named_collection_control`, so will remain as an alias). [#50625](https://github.com/ClickHouse/ClickHouse/pull/50625) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixing a typo in the `system.parts` column name `last_removal_attemp_time`. Now it is named `last_removal_attempt_time`. [#52104](https://github.com/ClickHouse/ClickHouse/pull/52104) ([filimonov](https://github.com/filimonov)). +* Bump version of the distributed_ddl_entry_format_version to 5 by default (enables opentelemetry and initial_query_idd pass through). This will not allow to process existing entries for distributed DDL after **downgrade** (but note, that usually there should be no such unprocessed entries). [#52128](https://github.com/ClickHouse/ClickHouse/pull/52128) ([Azat Khuzhin](https://github.com/azat)). +* Check projection metadata the same way we check ordinary metadata. This change may prevent the server from starting in case there was a table with an invalid projection. An example is a projection that created positional columns in PK (e.g. `projection p (select * order by 1, 4)` which is not allowed in table PK and can cause a crash during insert/merge). Drop such projections before the update. Fixes [#52353](https://github.com/ClickHouse/ClickHouse/issues/52353). [#52361](https://github.com/ClickHouse/ClickHouse/pull/52361) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The function `toDecimalString` is removed due to subpar implementation quality. This closes [#52407](https://github.com/ClickHouse/ClickHouse/issues/52407). [#52450](https://github.com/ClickHouse/ClickHouse/pull/52450) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Implement KQL-style formatting for Interval. [#45671](https://github.com/ClickHouse/ClickHouse/pull/45671) ([ltrk2](https://github.com/ltrk2)). +* Support ZooKeeper `reconfig` command for CH Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). +* Kafka connector can fetch avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)). +* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Added support for prql as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Add a column is_obsolete to system.settings and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)). +* Implement support of encrypted elements in configuration file Added possibility to use encrypted text in leaf elements of configuration file. The text is encrypted using encryption codecs from section. [#50986](https://github.com/ClickHouse/ClickHouse/pull/50986) ([Roman Vasin](https://github.com/rvasin)). +* Just a new request of [#49483](https://github.com/ClickHouse/ClickHouse/issues/49483). [#51013](https://github.com/ClickHouse/ClickHouse/pull/51013) ([lgbo](https://github.com/lgbo-ustc)). +* Add SYSTEM STOP LISTEN query. Closes [#47972](https://github.com/ClickHouse/ClickHouse/issues/47972). [#51016](https://github.com/ClickHouse/ClickHouse/pull/51016) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add input_format_csv_allow_variable_number_of_columns options. [#51273](https://github.com/ClickHouse/ClickHouse/pull/51273) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Another boring feature: add function substring_index, as in spark or mysql. [#51472](https://github.com/ClickHouse/ClickHouse/pull/51472) ([李扬](https://github.com/taiyang-li)). +* Show stats for jemalloc bins. Example ``` SELECT *, size * (nmalloc - ndalloc) AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 ORDER BY allocated_bytes DESC LIMIT 10. [#51674](https://github.com/ClickHouse/ClickHouse/pull/51674) ([Alexander Gololobov](https://github.com/davenger)). +* Add RowBinaryWithDefaults format with extra byte before each column for using column default value. Closes [#50854](https://github.com/ClickHouse/ClickHouse/issues/50854). [#51695](https://github.com/ClickHouse/ClickHouse/pull/51695) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `default_temporary_table_engine` setting. Same as `default_table_engine` but for temporary tables. [#51292](https://github.com/ClickHouse/ClickHouse/issues/51292). [#51708](https://github.com/ClickHouse/ClickHouse/pull/51708) ([velavokr](https://github.com/velavokr)). +* Added new initcap / initcapUTF8 functions which convert the first letter of each word to upper case and the rest to lower case. [#51735](https://github.com/ClickHouse/ClickHouse/pull/51735) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Create table now supports `PRIMARY KEY` syntax in column definition. Columns are added to primary index in the same order columns are defined. [#51881](https://github.com/ClickHouse/ClickHouse/pull/51881) ([Ilya Yatsishin](https://github.com/qoega)). +* Added the possibility to use date and time format specifiers in log and error log file names, either in config files (`log` and `errorlog` tags) or command line arguments (`--log-file` and `--errorlog-file`). [#51945](https://github.com/ClickHouse/ClickHouse/pull/51945) ([Victor Krasnov](https://github.com/sirvickr)). +* Added Peak Memory Usage (for query) to client final statistics, and to http header. [#51946](https://github.com/ClickHouse/ClickHouse/pull/51946) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added new hasSubsequence() (+CaseInsensitive + UTF8 versions) functions. [#52050](https://github.com/ClickHouse/ClickHouse/pull/52050) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Add `array_agg` as alias of `groupArray` for PostgreSQL compatibility. Closes [#52100](https://github.com/ClickHouse/ClickHouse/issues/52100). ### Documentation entry for user-facing changes. [#52135](https://github.com/ClickHouse/ClickHouse/pull/52135) ([flynn](https://github.com/ucasfl)). +* Add `any_value` as a compatibility alias for `any` aggregate function. Closes [#52140](https://github.com/ClickHouse/ClickHouse/issues/52140). [#52147](https://github.com/ClickHouse/ClickHouse/pull/52147) ([flynn](https://github.com/ucasfl)). +* Add aggregate function `array_concat_agg` for compatibility with BigQuery, it's alias of `groupArrayArray`. Closes [#52139](https://github.com/ClickHouse/ClickHouse/issues/52139). [#52149](https://github.com/ClickHouse/ClickHouse/pull/52149) ([flynn](https://github.com/ucasfl)). +* Add `OCTET_LENGTH` as an alias to `length`. Closes [#52153](https://github.com/ClickHouse/ClickHouse/issues/52153). [#52176](https://github.com/ClickHouse/ClickHouse/pull/52176) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Re-add SipHash keyed functions. [#52206](https://github.com/ClickHouse/ClickHouse/pull/52206) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Added `firstLine` function to extract the first line from the multi-line string. This closes [#51172](https://github.com/ClickHouse/ClickHouse/issues/51172). [#52209](https://github.com/ClickHouse/ClickHouse/pull/52209) ([Mikhail Koviazin](https://github.com/mkmkme)). + +#### Performance Improvement +* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)). +* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)). +* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). +* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now we use fixed-size tasks in `MergeTreePrefetchedReadPool` as in `MergeTreeReadPool`. Also from now we use connection pool for S3 requests. [#49732](https://github.com/ClickHouse/ClickHouse/pull/49732) ([Nikita Taranov](https://github.com/nickitat)). +* More pushdown to the right side of join. [#50532](https://github.com/ClickHouse/ClickHouse/pull/50532) ([Nikita Taranov](https://github.com/nickitat)). +* Improve grace_hash join by reserving hash table's size (resubmit). [#50875](https://github.com/ClickHouse/ClickHouse/pull/50875) ([lgbo](https://github.com/lgbo-ustc)). +* Waiting on lock in `OpenedFileCache` could be noticeable sometimes. We sharded it into multiple sub-maps (each with its own lock) to avoid contention. [#51341](https://github.com/ClickHouse/ClickHouse/pull/51341) ([Nikita Taranov](https://github.com/nickitat)). +* Remove duplicate condition in functionunixtimestamp64.h. [#51857](https://github.com/ClickHouse/ClickHouse/pull/51857) ([lcjh](https://github.com/ljhcage)). +* The idea is that conditions with PK columns are likely to be used in PK analysis and will not contribute much more to PREWHERE filtering. [#51958](https://github.com/ClickHouse/ClickHouse/pull/51958) ([Alexander Gololobov](https://github.com/davenger)). +* 1. Add rewriter for both old and new analyzer. 2. Add settings `optimize_uniq_to_count` which default is 0. [#52004](https://github.com/ClickHouse/ClickHouse/pull/52004) ([JackyWoo](https://github.com/JackyWoo)). +* The performance experiments of **OnTime** on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of **11.6%** to the QPS of the query **Q8** while having no impact on others. [#52036](https://github.com/ClickHouse/ClickHouse/pull/52036) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Enable `allow_vertical_merges_from_compact_to_wide_parts` by default. It will save memory usage during merges. [#52295](https://github.com/ClickHouse/ClickHouse/pull/52295) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). +* Reduce the number of syscalls in FileCache::loadMetadata. [#52435](https://github.com/ClickHouse/ClickHouse/pull/52435) ([Raúl Marín](https://github.com/Algunenano)). + +#### Improvement +* Added query `SYSTEM FLUSH ASYNC INSERT QUEUE` which flushes all pending asynchronous inserts to the destination tables. Added a server-side setting `async_insert_queue_flush_on_shutdown` (`true` by default) which determines whether to flush queue of asynchronous inserts on graceful shutdown. Setting `async_insert_threads` is now a server-side setting. [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) ([Anton Popov](https://github.com/CurtizJ)). +* Don't show messages about `16 EiB` free space in logs, as they don't make sense. This closes [#49320](https://github.com/ClickHouse/ClickHouse/issues/49320). [#49342](https://github.com/ClickHouse/ClickHouse/pull/49342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Properly check the limit for the `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block`. This is needed for generic query fuzzer. [#49343](https://github.com/ClickHouse/ClickHouse/pull/49343) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix two issues: ``` select geohashEncode(120.2, number::Float64) from numbers(10);. [#50066](https://github.com/ClickHouse/ClickHouse/pull/50066) ([李扬](https://github.com/taiyang-li)). +* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)). +* Allow to have strict lower boundary for file segment size by downloading remaining data in the background. Minimum size of file segment (if actual file size is bigger) is configured as cache configuration setting `boundary_alignment`, by default `4Mi`. Number of background threads are configured as cache configuration setting `background_download_threads`, by default `2`. Also `max_file_segment_size` was increased from `8Mi` to `32Mi` in this PR. [#51000](https://github.com/ClickHouse/ClickHouse/pull/51000) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow filtering HTTP headers with `http_forbid_headers` section in config. Both exact matching and regexp filters are available. [#51038](https://github.com/ClickHouse/ClickHouse/pull/51038) ([Nikolay Degterinsky](https://github.com/evillique)). +* #50727 new alias for function current_database and added new function current_schemas. [#51076](https://github.com/ClickHouse/ClickHouse/pull/51076) ([Pedro Riera](https://github.com/priera)). +* Log async insert flush queries into to system.query_log. [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) ([Raúl Marín](https://github.com/Algunenano)). +* Decreased default timeouts for S3 from 30 seconds to 3 seconds, and for other HTTP from 180 seconds to 30 seconds. [#51171](https://github.com/ClickHouse/ClickHouse/pull/51171) ([Michael Kolupaev](https://github.com/al13n321)). +* Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions for better progress indication. [#51286](https://github.com/ClickHouse/ClickHouse/pull/51286) ([Kruglov Pavel](https://github.com/Avogar)). +* Functions "date_diff() and age()" now support millisecond/microsecond unit and work with microsecond precision. [#51291](https://github.com/ClickHouse/ClickHouse/pull/51291) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Allow SQL standard `FETCH` without `OFFSET`. See https://antonz.org/sql-fetch/. [#51293](https://github.com/ClickHouse/ClickHouse/pull/51293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve parsing of path in clickhouse-keeper-client. [#51359](https://github.com/ClickHouse/ClickHouse/pull/51359) ([Azat Khuzhin](https://github.com/azat)). +* A third-party product depending on ClickHouse (Gluten: Plugin to Double SparkSQL's Performance) had a bug. This fix avoids heap overflow in that third-party product while reading from HDFS. [#51386](https://github.com/ClickHouse/ClickHouse/pull/51386) ([李扬](https://github.com/taiyang-li)). +* Fix checking error caused by uninitialized class members. [#51418](https://github.com/ClickHouse/ClickHouse/pull/51418) ([李扬](https://github.com/taiyang-li)). +* Add ability to disable native copy for S3 (setting for BACKUP/RESTORE `allow_s3_native_copy`, and `s3_allow_native_copy` for `s3`/`s3_plain` disks). [#51448](https://github.com/ClickHouse/ClickHouse/pull/51448) ([Azat Khuzhin](https://github.com/azat)). +* Add column `primary_key_size` to `system.parts` table to show compressed primary key size on disk. Closes [#51400](https://github.com/ClickHouse/ClickHouse/issues/51400). [#51496](https://github.com/ClickHouse/ClickHouse/pull/51496) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Allow running `clickhouse-local` without procfs, without home directory existing, and without name resolution plugins from glibc. [#51518](https://github.com/ClickHouse/ClickHouse/pull/51518) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Correcting the message of modify storage policy https://github.com/clickhouse/clickhouse/issues/51516 ### documentation entry for user-facing changes. [#51519](https://github.com/ClickHouse/ClickHouse/pull/51519) ([xiaolei565](https://github.com/xiaolei565)). +* Support `DROP FILESYSTEM CACHE KEY [ OFFSET ]`. [#51547](https://github.com/ClickHouse/ClickHouse/pull/51547) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add placeholder `%a` for rull filename in rename_files_after_processing setting. [#51603](https://github.com/ClickHouse/ClickHouse/pull/51603) ([Kruglov Pavel](https://github.com/Avogar)). +* Add column modification time into system.parts_columns. [#51685](https://github.com/ClickHouse/ClickHouse/pull/51685) ([Azat Khuzhin](https://github.com/azat)). +* Add new setting `input_format_csv_use_default_on_bad_values` to CSV format that allows to insert default value when parsing of a single field failed. [#51716](https://github.com/ClickHouse/ClickHouse/pull/51716) ([KevinyhZou](https://github.com/KevinyhZou)). +* Added a crash log flush to the disk after the unexpected crash. [#51720](https://github.com/ClickHouse/ClickHouse/pull/51720) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix behavior in dashboard page where errors unrelated to authentication are not shown. Also fix 'overlapping' chart behavior. [#51744](https://github.com/ClickHouse/ClickHouse/pull/51744) ([Zach Naimon](https://github.com/ArctypeZach)). +* Allow UUID to UInt128 conversion. [#51765](https://github.com/ClickHouse/ClickHouse/pull/51765) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added support for function range of Nullable arguments. [#51767](https://github.com/ClickHouse/ClickHouse/pull/51767) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Convert condition like `toyear(x) = c` to `c1 <= x < c2`. [#51795](https://github.com/ClickHouse/ClickHouse/pull/51795) ([Han Fei](https://github.com/hanfei1991)). +* Improve MySQL compatibility of statement SHOW INDEX. [#51796](https://github.com/ClickHouse/ClickHouse/pull/51796) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix `use_structure_from_insertion_table_in_table_functions` does not work with `MATERIALIZED` and `ALIAS` columns. Closes [#51817](https://github.com/ClickHouse/ClickHouse/issues/51817). Closes [#51019](https://github.com/ClickHouse/ClickHouse/issues/51019). [#51825](https://github.com/ClickHouse/ClickHouse/pull/51825) ([flynn](https://github.com/ucasfl)). +* Introduce a table setting `wait_for_unique_parts_send_before_shutdown_ms` which specify the amount of time replica will wait before closing interserver handler for replicated sends. Also fix inconsistency with shutdown of tables and interserver handlers: now server shutdown tables first and only after it shut down interserver handlers. [#51851](https://github.com/ClickHouse/ClickHouse/pull/51851) ([alesapin](https://github.com/alesapin)). +* CacheDictionary request only unique keys from source. Closes [#51762](https://github.com/ClickHouse/ClickHouse/issues/51762). [#51853](https://github.com/ClickHouse/ClickHouse/pull/51853) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed settings not applied for explain query when format provided. [#51859](https://github.com/ClickHouse/ClickHouse/pull/51859) ([Nikita Taranov](https://github.com/nickitat)). +* Allow SETTINGS before FORMAT in DESCRIBE TABLE query for compatibility with SELECT query. Closes [#51544](https://github.com/ClickHouse/ClickHouse/issues/51544). [#51899](https://github.com/ClickHouse/ClickHouse/pull/51899) ([Nikolay Degterinsky](https://github.com/evillique)). +* Var-int encoded integers (e.g. used by the native protocol) can now use the full 64-bit range. 3rd party clients are advised to update their var-int code accordingly. [#51905](https://github.com/ClickHouse/ClickHouse/pull/51905) ([Robert Schulze](https://github.com/rschu1ze)). +* Update certificates when they change without the need to manually SYSTEM RELOAD CONFIG. [#52030](https://github.com/ClickHouse/ClickHouse/pull/52030) ([Mike Kot](https://github.com/myrrc)). +* Added `allow_create_index_without_type` setting that allow to ignore `ADD INDEX` queries without specified `TYPE`. Standard SQL queries will just succeed without changing table schema. [#52056](https://github.com/ClickHouse/ClickHouse/pull/52056) ([Ilya Yatsishin](https://github.com/qoega)). +* Fixed crash when mysqlxx::Pool::Entry is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)). +* CREATE TABLE ... AS SELECT .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)). +* Introduced automatic conversion of text types to utf8 for MaterializedMySQL. [#52084](https://github.com/ClickHouse/ClickHouse/pull/52084) ([Val Doroshchuk](https://github.com/valbok)). +* Add alias for functions `today` (now available under the `curdate`/`current_date` names) and `now` (`current_timestamp`). [#52106](https://github.com/ClickHouse/ClickHouse/pull/52106) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). +* Log messages are written to text_log from the beginning. [#52113](https://github.com/ClickHouse/ClickHouse/pull/52113) ([Dmitry Kardymon](https://github.com/kardymonds)). +* In cases where the HTTP endpoint has multiple IP addresses and the first of them is unreachable, a timeout exception will be thrown. Made session creation with handling all resolved endpoints. [#52116](https://github.com/ClickHouse/ClickHouse/pull/52116) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Support async_deduplication_token for async insert. [#52136](https://github.com/ClickHouse/ClickHouse/pull/52136) ([Han Fei](https://github.com/hanfei1991)). +* Avro input format support Union with single type. Closes [#52131](https://github.com/ClickHouse/ClickHouse/issues/52131). [#52137](https://github.com/ClickHouse/ClickHouse/pull/52137) ([flynn](https://github.com/ucasfl)). +* Add setting `optimize_use_implicit_projections` to disable implicit projections (currently only `min_max_count` projection). This is defaulted to false until [#52075](https://github.com/ClickHouse/ClickHouse/issues/52075) is fixed. [#52152](https://github.com/ClickHouse/ClickHouse/pull/52152) ([Amos Bird](https://github.com/amosbird)). +* It was possible to use the function `hasToken` for infinite loop. Now this possibility is removed. This closes [#52156](https://github.com/ClickHouse/ClickHouse/issues/52156). [#52160](https://github.com/ClickHouse/ClickHouse/pull/52160) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* 1. Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)). +* Functions "date_diff() and age()" now support millisecond/microsecond unit and work with microsecond precision. [#52181](https://github.com/ClickHouse/ClickHouse/pull/52181) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Create ZK ancestors optimistically. [#52195](https://github.com/ClickHouse/ClickHouse/pull/52195) ([Raúl Marín](https://github.com/Algunenano)). +* Fix [#50582](https://github.com/ClickHouse/ClickHouse/issues/50582). Avoid the `Not found column ... in block` error in some cases of reading in-order and constants. [#52259](https://github.com/ClickHouse/ClickHouse/pull/52259) ([Chen768959](https://github.com/Chen768959)). +* Check whether S2 geo primitives are invalid as early as possible on ClickHouse side. This closes: [#27090](https://github.com/ClickHouse/ClickHouse/issues/27090). [#52260](https://github.com/ClickHouse/ClickHouse/pull/52260) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Now unquoted utf-8 strings are supported in DDL for MaterializedMySQL. [#52318](https://github.com/ClickHouse/ClickHouse/pull/52318) ([Val Doroshchuk](https://github.com/valbok)). +* Add back missing projection QueryAccessInfo when `query_plan_optimize_projection = 1`. This fixes [#50183](https://github.com/ClickHouse/ClickHouse/issues/50183) . This fixes [#50093](https://github.com/ClickHouse/ClickHouse/issues/50093) . [#52327](https://github.com/ClickHouse/ClickHouse/pull/52327) ([Amos Bird](https://github.com/amosbird)). +* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)). +* When `ZooKeeperRetriesControl` rethrows an error, it's more useful to see its original stack trace, not the one from `ZooKeeperRetriesControl` itself. [#52347](https://github.com/ClickHouse/ClickHouse/pull/52347) ([Vitaly Baranov](https://github.com/vitlibar)). +* Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)). +* Wait for zero copy replication lock even if some disks don't support it. [#52376](https://github.com/ClickHouse/ClickHouse/pull/52376) ([Raúl Marín](https://github.com/Algunenano)). +* Now it's possible to specify min (`memory_profiler_sample_min_allocation_size`) and max (`memory_profiler_sample_max_allocation_size`) size for allocations to be tracked with sampling memory profiler. [#52419](https://github.com/ClickHouse/ClickHouse/pull/52419) ([alesapin](https://github.com/alesapin)). +* The `session_timezone` setting is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)). +* Added field `refcount` to `system.remote_data_paths` table. [#52518](https://github.com/ClickHouse/ClickHouse/pull/52518) ([Anton Popov](https://github.com/CurtizJ)). +* New setting `merge_tree_determine_task_size_by_prewhere_columns` added. If set to `true` only sizes of the columns from `PREWHERE` section will be considered to determine reading task size. Otherwise all the columns from query are considered. [#52606](https://github.com/ClickHouse/ClickHouse/pull/52606) ([Nikita Taranov](https://github.com/nickitat)). + +#### Build/Testing/Packaging Improvement +* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed CRC32(WeakHash32) issue for s390x. [#50365](https://github.com/ClickHouse/ClickHouse/pull/50365) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Add integration test check with the enabled analyzer. [#50926](https://github.com/ClickHouse/ClickHouse/pull/50926) ([Dmitry Novik](https://github.com/novikd)). +* Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). +* Fixed several issues found by OSS-Fuzz. [#51736](https://github.com/ClickHouse/ClickHouse/pull/51736) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* There were a couple of failures because of (?) S3 availability. The sccache has a feature of failing over to local compilation. [#51893](https://github.com/ClickHouse/ClickHouse/pull/51893) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* 02242_delete_user_race and 02243_drop_user_grant_race tests have been corrected. [#51923](https://github.com/ClickHouse/ClickHouse/pull/51923) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Make the function `CHColumnToArrowColumn::fillArrowArrayWithArrayColumnData` to work with nullable arrays, which are not possible in ClickHouse, but needed for Gluten. [#52112](https://github.com/ClickHouse/ClickHouse/pull/52112) ([李扬](https://github.com/taiyang-li)). +* We've updated the CCTZ library to master, but there are no user-visible changes. [#52124](https://github.com/ClickHouse/ClickHouse/pull/52124) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `system.licenses` table now includes the hard-forked library Poco. This closes [#52066](https://github.com/ClickHouse/ClickHouse/issues/52066). [#52127](https://github.com/ClickHouse/ClickHouse/pull/52127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Follow up [#50926](https://github.com/ClickHouse/ClickHouse/issues/50926). Add integration tests check with enabled analyzer to master. [#52210](https://github.com/ClickHouse/ClickHouse/pull/52210) ([Dmitry Novik](https://github.com/novikd)). +* Reproducible builds for Rust. [#52395](https://github.com/ClickHouse/ClickHouse/pull/52395) ([Azat Khuzhin](https://github.com/azat)). +* Improve the startup time of `clickhouse-client` and `clickhouse-local` in debug and sanitizer builds. This closes [#52228](https://github.com/ClickHouse/ClickHouse/issues/52228). [#52489](https://github.com/ClickHouse/ClickHouse/pull/52489) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check that there are no cases of bad punctuation: whitespace before a comma like `Hello ,world` instead of `Hello, world`. [#52549](https://github.com/ClickHouse/ClickHouse/pull/52549) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix materialised pg syncTables [#49698](https://github.com/ClickHouse/ClickHouse/pull/49698) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix projection with optimize_aggregators_of_group_by_keys [#49709](https://github.com/ClickHouse/ClickHouse/pull/49709) ([Amos Bird](https://github.com/amosbird)). +* Fix optimize_skip_unused_shards with JOINs [#51037](https://github.com/ClickHouse/ClickHouse/pull/51037) ([Azat Khuzhin](https://github.com/azat)). +* Fix formatDateTime() with fractional negative datetime64 [#51290](https://github.com/ClickHouse/ClickHouse/pull/51290) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Functions `hasToken*` were totally wrong. Add a test for [#43358](https://github.com/ClickHouse/ClickHouse/issues/43358) [#51378](https://github.com/ClickHouse/ClickHouse/pull/51378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix SIGSEGV for clusters with zero weight across all shards (fixes INSERT INTO FUNCTION clusterAllReplicas()) [#51545](https://github.com/ClickHouse/ClickHouse/pull/51545) ([Azat Khuzhin](https://github.com/azat)). +* Fix timeout for hedged requests [#51582](https://github.com/ClickHouse/ClickHouse/pull/51582) ([Azat Khuzhin](https://github.com/azat)). +* Fix logical error in ANTI join with NULL [#51601](https://github.com/ClickHouse/ClickHouse/pull/51601) ([vdimir](https://github.com/vdimir)). +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join [#51633](https://github.com/ClickHouse/ClickHouse/pull/51633) ([vdimir](https://github.com/vdimir)). +* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Fix segfault when create invalid EmbeddedRocksdb table [#51847](https://github.com/ClickHouse/ClickHouse/pull/51847) ([Duc Canh Le](https://github.com/canhld94)). +* Fix inserts into MongoDB tables [#51876](https://github.com/ClickHouse/ClickHouse/pull/51876) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix error in subquery operators [#51922](https://github.com/ClickHouse/ClickHouse/pull/51922) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not remove inputs after ActionsDAG::merge [#51947](https://github.com/ClickHouse/ClickHouse/pull/51947) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fix for toDateTime64() for dates after 2283-12-31 [#52130](https://github.com/ClickHouse/ClickHouse/pull/52130) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis when aggregation expression contains monotonic functions [#52151](https://github.com/ClickHouse/ClickHouse/pull/52151) ([Amos Bird](https://github.com/amosbird)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable direct join for range dictionary [#52187](https://github.com/ClickHouse/ClickHouse/pull/52187) ([Duc Canh Le](https://github.com/canhld94)). +* Fix sticky mutations test (and extremely rare race condition) [#52197](https://github.com/ClickHouse/ClickHouse/pull/52197) ([alesapin](https://github.com/alesapin)). +* Fix race in Web disk [#52211](https://github.com/ClickHouse/ClickHouse/pull/52211) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix data race in Connection::setAsyncCallback on unknown packet from server [#52219](https://github.com/ClickHouse/ClickHouse/pull/52219) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix temp data deletion on startup, add test [#52275](https://github.com/ClickHouse/ClickHouse/pull/52275) ([vdimir](https://github.com/vdimir)). +* Don't use minmax_count projections when counting nullable columns [#52297](https://github.com/ClickHouse/ClickHouse/pull/52297) ([Amos Bird](https://github.com/amosbird)). +* MergeTree/ReplicatedMergeTree should use server timezone for log entries [#52325](https://github.com/ClickHouse/ClickHouse/pull/52325) ([Azat Khuzhin](https://github.com/azat)). +* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)). +* Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)). +* Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() [#52490](https://github.com/ClickHouse/ClickHouse/pull/52490) ([Azat Khuzhin](https://github.com/azat)). +* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Add documentation for building in docker"'. [#51773](https://github.com/ClickHouse/ClickHouse/pull/51773) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Fix build"'. [#51911](https://github.com/ClickHouse/ClickHouse/pull/51911) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Millisecond and microsecond support in date_diff / age functions"'. [#52129](https://github.com/ClickHouse/ClickHouse/pull/52129) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Re-add SipHash keyed functions"'. [#52466](https://github.com/ClickHouse/ClickHouse/pull/52466) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add an ability to specify allocations size for sampling memory profiler"'. [#52496](https://github.com/ClickHouse/ClickHouse/pull/52496) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Rewrite uniq to count"'. [#52576](https://github.com/ClickHouse/ClickHouse/pull/52576) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Remove duplicate_order_by_and_distinct optimization [#47135](https://github.com/ClickHouse/ClickHouse/pull/47135) ([Igor Nikonov](https://github.com/devcrafter)). +* Update sort desc in ReadFromMergeTree after applying PREWHERE info [#48669](https://github.com/ClickHouse/ClickHouse/pull/48669) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix `BindException: Address already in use` in HDFS integration tests [#49428](https://github.com/ClickHouse/ClickHouse/pull/49428) ([Nikita Taranov](https://github.com/nickitat)). +* Force libunwind usage (removes gcc_eh support) [#49438](https://github.com/ClickHouse/ClickHouse/pull/49438) ([Azat Khuzhin](https://github.com/azat)). +* Cleanup `storage_conf.xml` [#49557](https://github.com/ClickHouse/ClickHouse/pull/49557) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky tests caused by OPTIMIZE FINAL failing memory budget check [#49764](https://github.com/ClickHouse/ClickHouse/pull/49764) ([Michael Kolupaev](https://github.com/al13n321)). +* Remove unstable queries from performance/join_set_filter [#50235](https://github.com/ClickHouse/ClickHouse/pull/50235) ([vdimir](https://github.com/vdimir)). +* More accurate DNS resolve for the keeper connection [#50738](https://github.com/ClickHouse/ClickHouse/pull/50738) ([pufit](https://github.com/pufit)). +* Try to fix some trash in Disks and part moves [#51135](https://github.com/ClickHouse/ClickHouse/pull/51135) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add jemalloc support fro s390x [#51186](https://github.com/ClickHouse/ClickHouse/pull/51186) ([Boris Kuschel](https://github.com/bkuschel)). +* Resubmit [#48821](https://github.com/ClickHouse/ClickHouse/issues/48821) [#51208](https://github.com/ClickHouse/ClickHouse/pull/51208) ([Kseniia Sumarokova](https://github.com/kssenii)). +* test for [#36894](https://github.com/ClickHouse/ClickHouse/issues/36894) [#51274](https://github.com/ClickHouse/ClickHouse/pull/51274) ([Denny Crane](https://github.com/den-crane)). +* external_aggregation_fix for big endian machines [#51280](https://github.com/ClickHouse/ClickHouse/pull/51280) ([Sanjam Panda](https://github.com/saitama951)). +* Fix: Invalid number of rows in Chunk column Object [#51296](https://github.com/ClickHouse/ClickHouse/pull/51296) ([Igor Nikonov](https://github.com/devcrafter)). +* Add a test for [#44816](https://github.com/ClickHouse/ClickHouse/issues/44816) [#51305](https://github.com/ClickHouse/ClickHouse/pull/51305) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for `calculate_text_stack_trace` setting [#51311](https://github.com/ClickHouse/ClickHouse/pull/51311) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* decrease log level, make logs shorter [#51320](https://github.com/ClickHouse/ClickHouse/pull/51320) ([Sema Checherinda](https://github.com/CheSema)). +* Collect stack traces from job's scheduling and print along with exception's stack trace. [#51349](https://github.com/ClickHouse/ClickHouse/pull/51349) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add a test for [#42691](https://github.com/ClickHouse/ClickHouse/issues/42691) [#51352](https://github.com/ClickHouse/ClickHouse/pull/51352) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#32474](https://github.com/ClickHouse/ClickHouse/issues/32474) [#51354](https://github.com/ClickHouse/ClickHouse/pull/51354) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#41727](https://github.com/ClickHouse/ClickHouse/issues/41727) [#51355](https://github.com/ClickHouse/ClickHouse/pull/51355) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#35801](https://github.com/ClickHouse/ClickHouse/issues/35801) [#51356](https://github.com/ClickHouse/ClickHouse/pull/51356) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#34626](https://github.com/ClickHouse/ClickHouse/issues/34626) [#51357](https://github.com/ClickHouse/ClickHouse/pull/51357) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Initialize text_log earlier to capture table startup messages [#51360](https://github.com/ClickHouse/ClickHouse/pull/51360) ([Azat Khuzhin](https://github.com/azat)). +* Use separate default settings for clickhouse-local [#51363](https://github.com/ClickHouse/ClickHouse/pull/51363) ([Azat Khuzhin](https://github.com/azat)). +* Attempt to remove wrong code (catch/throw in Functions) [#51367](https://github.com/ClickHouse/ClickHouse/pull/51367) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove suspicious code [#51383](https://github.com/ClickHouse/ClickHouse/pull/51383) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable hedged requests under TSan [#51392](https://github.com/ClickHouse/ClickHouse/pull/51392) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* no finalize in d-tor WriteBufferFromOStream [#51404](https://github.com/ClickHouse/ClickHouse/pull/51404) ([Sema Checherinda](https://github.com/CheSema)). +* Better diagnostics for 01193_metadata_loading [#51414](https://github.com/ClickHouse/ClickHouse/pull/51414) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix attaching gdb in stress tests [#51445](https://github.com/ClickHouse/ClickHouse/pull/51445) ([Kruglov Pavel](https://github.com/Avogar)). +* Merging [#36384](https://github.com/ClickHouse/ClickHouse/issues/36384) [#51458](https://github.com/ClickHouse/ClickHouse/pull/51458) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible race on shutdown wait [#51497](https://github.com/ClickHouse/ClickHouse/pull/51497) ([Sergei Trifonov](https://github.com/serxa)). +* Fix `test_alter_moving_garbage`: lock between getActiveContainingPart and swapActivePart in parts mover [#51498](https://github.com/ClickHouse/ClickHouse/pull/51498) ([vdimir](https://github.com/vdimir)). +* Fix a logical error on mutation [#51502](https://github.com/ClickHouse/ClickHouse/pull/51502) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix running integration tests with spaces in it's names [#51514](https://github.com/ClickHouse/ClickHouse/pull/51514) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test 00417_kill_query [#51522](https://github.com/ClickHouse/ClickHouse/pull/51522) ([Nikolay Degterinsky](https://github.com/evillique)). +* fs cache: add some checks [#51536](https://github.com/ClickHouse/ClickHouse/pull/51536) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Don't run 02782_uniq_exact_parallel_merging_bug in parallel with other tests [#51549](https://github.com/ClickHouse/ClickHouse/pull/51549) ([Nikita Taranov](https://github.com/nickitat)). +* 00900_orc_load: lift kill timeout [#51559](https://github.com/ClickHouse/ClickHouse/pull/51559) ([Robert Schulze](https://github.com/rschu1ze)). +* Add retries to 00416_pocopatch_progress_in_http_headers [#51575](https://github.com/ClickHouse/ClickHouse/pull/51575) ([Nikolay Degterinsky](https://github.com/evillique)). +* Remove the usage of Analyzer setting in the client [#51578](https://github.com/ClickHouse/ClickHouse/pull/51578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix merge_selecting_task scheduling [#51591](https://github.com/ClickHouse/ClickHouse/pull/51591) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add hex functions for cityhash [#51595](https://github.com/ClickHouse/ClickHouse/pull/51595) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove `unset CLICKHOUSE_LOG_COMMENT` from tests [#51623](https://github.com/ClickHouse/ClickHouse/pull/51623) ([Nikita Taranov](https://github.com/nickitat)). +* Implement endianness-independent serialization [#51637](https://github.com/ClickHouse/ClickHouse/pull/51637) ([ltrk2](https://github.com/ltrk2)). +* Ignore APPEND and TRUNCATE modifiers if file does not exist. [#51640](https://github.com/ClickHouse/ClickHouse/pull/51640) ([alekar](https://github.com/alekar)). +* Try to fix flaky 02210_processors_profile_log [#51641](https://github.com/ClickHouse/ClickHouse/pull/51641) ([Igor Nikonov](https://github.com/devcrafter)). +* Make common macros extendable [#51646](https://github.com/ClickHouse/ClickHouse/pull/51646) ([Amos Bird](https://github.com/amosbird)). +* Correct an exception message in src/Functions/nested.cpp [#51651](https://github.com/ClickHouse/ClickHouse/pull/51651) ([Alex Cheng](https://github.com/Alex-Cheng)). +* tests: fix 02050_client_profile_events flakiness [#51653](https://github.com/ClickHouse/ClickHouse/pull/51653) ([Azat Khuzhin](https://github.com/azat)). +* Minor follow-up to re2 update to 2023-06-02 ([#50949](https://github.com/ClickHouse/ClickHouse/issues/50949)) [#51655](https://github.com/ClickHouse/ClickHouse/pull/51655) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix 02116_tuple_element with Analyzer [#51669](https://github.com/ClickHouse/ClickHouse/pull/51669) ([Robert Schulze](https://github.com/rschu1ze)). +* Update timeouts in tests for transactions [#51683](https://github.com/ClickHouse/ClickHouse/pull/51683) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove unused code [#51684](https://github.com/ClickHouse/ClickHouse/pull/51684) ([Sergei Trifonov](https://github.com/serxa)). +* Remove `mmap/mremap/munmap` from Allocator.h [#51686](https://github.com/ClickHouse/ClickHouse/pull/51686) ([alesapin](https://github.com/alesapin)). +* SonarCloud: Add C++23 Experimental Flag [#51687](https://github.com/ClickHouse/ClickHouse/pull/51687) ([Julio Jimenez](https://github.com/juliojimenez)). +* Wait with retries when attaching GDB in tests [#51688](https://github.com/ClickHouse/ClickHouse/pull/51688) ([Antonio Andelic](https://github.com/antonio2368)). +* Update version_date.tsv and changelogs after v23.6.1.1524-stable [#51691](https://github.com/ClickHouse/ClickHouse/pull/51691) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* fix write to finalized buffer [#51696](https://github.com/ClickHouse/ClickHouse/pull/51696) ([Sema Checherinda](https://github.com/CheSema)). +* do not log exception aborted for pending mutate/merge entries when shutdown [#51697](https://github.com/ClickHouse/ClickHouse/pull/51697) ([Sema Checherinda](https://github.com/CheSema)). +* Fix race in ContextAccess [#51704](https://github.com/ClickHouse/ClickHouse/pull/51704) ([Vitaly Baranov](https://github.com/vitlibar)). +* Make test scripts backwards compatible [#51707](https://github.com/ClickHouse/ClickHouse/pull/51707) ([Antonio Andelic](https://github.com/antonio2368)). +* test for full join and null predicate [#51709](https://github.com/ClickHouse/ClickHouse/pull/51709) ([Denny Crane](https://github.com/den-crane)). +* A cmake warning on job limits underutilizing CPU [#51710](https://github.com/ClickHouse/ClickHouse/pull/51710) ([velavokr](https://github.com/velavokr)). +* Fix SQLLogic docker images [#51719](https://github.com/ClickHouse/ClickHouse/pull/51719) ([Antonio Andelic](https://github.com/antonio2368)). +* Added ASK_PASSWORD client constant instead of hardcoded '\n' [#51723](https://github.com/ClickHouse/ClickHouse/pull/51723) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Update README.md [#51726](https://github.com/ClickHouse/ClickHouse/pull/51726) ([Tyler Hannan](https://github.com/tylerhannan)). +* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove MemoryPool from Poco because it's useless [#51732](https://github.com/ClickHouse/ClickHouse/pull/51732) ([alesapin](https://github.com/alesapin)). +* Fix: logical error in grace hash join [#51737](https://github.com/ClickHouse/ClickHouse/pull/51737) ([Igor Nikonov](https://github.com/devcrafter)). +* Update 01320_create_sync_race_condition_zookeeper.sh [#51742](https://github.com/ClickHouse/ClickHouse/pull/51742) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Pin for docker-ce [#51743](https://github.com/ClickHouse/ClickHouse/pull/51743) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "Fix: Invalid number of rows in Chunk column Object" [#51750](https://github.com/ClickHouse/ClickHouse/pull/51750) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add SonarCloud to README [#51751](https://github.com/ClickHouse/ClickHouse/pull/51751) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix test `02789_object_type_invalid_num_of_rows` [#51754](https://github.com/ClickHouse/ClickHouse/pull/51754) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix (benign) data race in `transform` [#51755](https://github.com/ClickHouse/ClickHouse/pull/51755) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky KeeperMap test [#51764](https://github.com/ClickHouse/ClickHouse/pull/51764) ([Antonio Andelic](https://github.com/antonio2368)). +* Version mypy=1.4.1 falsly reports unused ignore comment [#51769](https://github.com/ClickHouse/ClickHouse/pull/51769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Avoid keeping lock Context::getLock() while calculating access rights [#51772](https://github.com/ClickHouse/ClickHouse/pull/51772) ([Vitaly Baranov](https://github.com/vitlibar)). +* Making stateless tests with timeout less flaky [#51774](https://github.com/ClickHouse/ClickHouse/pull/51774) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix after [#51000](https://github.com/ClickHouse/ClickHouse/issues/51000) [#51790](https://github.com/ClickHouse/ClickHouse/pull/51790) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add assert in ThreadStatus destructor for correct current_thread [#51800](https://github.com/ClickHouse/ClickHouse/pull/51800) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix broken parts handling in `ReplicatedMergeTree` [#51801](https://github.com/ClickHouse/ClickHouse/pull/51801) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix tsan signal-unsafe call [#51802](https://github.com/ClickHouse/ClickHouse/pull/51802) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix for parallel replicas not completely disabled by granule count threshold [#51805](https://github.com/ClickHouse/ClickHouse/pull/51805) ([Alexander Gololobov](https://github.com/davenger)). +* Make sure that we don't attempt to serialize/deserialize block with 0 columns and non-zero rows [#51807](https://github.com/ClickHouse/ClickHouse/pull/51807) ([Alexander Gololobov](https://github.com/davenger)). +* Fix rare bug in `DROP COLUMN` and enabled sparse columns [#51809](https://github.com/ClickHouse/ClickHouse/pull/51809) ([Anton Popov](https://github.com/CurtizJ)). +* Fix flaky `test_multiple_disks` [#51821](https://github.com/ClickHouse/ClickHouse/pull/51821) ([Antonio Andelic](https://github.com/antonio2368)). +* Follow up to [#51547](https://github.com/ClickHouse/ClickHouse/issues/51547) [#51822](https://github.com/ClickHouse/ClickHouse/pull/51822) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly grep archives in stress tests [#51824](https://github.com/ClickHouse/ClickHouse/pull/51824) ([Antonio Andelic](https://github.com/antonio2368)). +* Update analyzer_tech_debt.txt [#51836](https://github.com/ClickHouse/ClickHouse/pull/51836) ([Alexander Tokmakov](https://github.com/tavplubix)). +* remove unused code [#51837](https://github.com/ClickHouse/ClickHouse/pull/51837) ([flynn](https://github.com/ucasfl)). +* Fix disk config for upgrade tests [#51839](https://github.com/ClickHouse/ClickHouse/pull/51839) ([Antonio Andelic](https://github.com/antonio2368)). +* Remove Coverity from workflows, but leave in the code [#51842](https://github.com/ClickHouse/ClickHouse/pull/51842) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Many fixes [3] [#51848](https://github.com/ClickHouse/ClickHouse/pull/51848) ([Ilya Yatsishin](https://github.com/qoega)). +* Change misleading name in joins: addJoinedBlock -> addBlockToJoin [#51852](https://github.com/ClickHouse/ClickHouse/pull/51852) ([Igor Nikonov](https://github.com/devcrafter)). +* fix: correct exception messages on policies comparison [#51854](https://github.com/ClickHouse/ClickHouse/pull/51854) ([Feng Kaiyu](https://github.com/fky2015)). +* Update 02439_merge_selecting_partitions.sql [#51862](https://github.com/ClickHouse/ClickHouse/pull/51862) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove useless packages [#51863](https://github.com/ClickHouse/ClickHouse/pull/51863) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove useless logs [#51865](https://github.com/ClickHouse/ClickHouse/pull/51865) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect log level = warning [#51867](https://github.com/ClickHouse/ClickHouse/pull/51867) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test_replicated_table_attach [#51868](https://github.com/ClickHouse/ClickHouse/pull/51868) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better usability of a test [#51869](https://github.com/ClickHouse/ClickHouse/pull/51869) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove useless code [#51873](https://github.com/ClickHouse/ClickHouse/pull/51873) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Another fix upgrade check script [#51878](https://github.com/ClickHouse/ClickHouse/pull/51878) ([Antonio Andelic](https://github.com/antonio2368)). +* Sqlloogic improvements [#51883](https://github.com/ClickHouse/ClickHouse/pull/51883) ([Ilya Yatsishin](https://github.com/qoega)). +* Disable ThinLTO on non-Linux [#51897](https://github.com/ClickHouse/ClickHouse/pull/51897) ([Robert Schulze](https://github.com/rschu1ze)). +* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)). +* Fix build [#51909](https://github.com/ClickHouse/ClickHouse/pull/51909) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix build [#51910](https://github.com/ClickHouse/ClickHouse/pull/51910) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test `00175_partition_by_ignore` and move it to correct location [#51913](https://github.com/ClickHouse/ClickHouse/pull/51913) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test 02360_send_logs_level_colors: avoid usage of `file` tool [#51914](https://github.com/ClickHouse/ClickHouse/pull/51914) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Maybe better tests [#51916](https://github.com/ClickHouse/ClickHouse/pull/51916) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Revert system drop filesystem cache by key [#51917](https://github.com/ClickHouse/ClickHouse/pull/51917) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test `detach_attach_partition_race` [#51920](https://github.com/ClickHouse/ClickHouse/pull/51920) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Another fix for `02481_async_insert_race_long` [#51925](https://github.com/ClickHouse/ClickHouse/pull/51925) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix segfault caused by `ThreadStatus` [#51931](https://github.com/ClickHouse/ClickHouse/pull/51931) ([Antonio Andelic](https://github.com/antonio2368)). +* Print short fault info only from safe fields [#51932](https://github.com/ClickHouse/ClickHouse/pull/51932) ([Alexander Gololobov](https://github.com/davenger)). +* Fix typo in integration tests [#51944](https://github.com/ClickHouse/ClickHouse/pull/51944) ([Ilya Yatsishin](https://github.com/qoega)). +* Better logs on shutdown [#51951](https://github.com/ClickHouse/ClickHouse/pull/51951) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Filter databases list before querying potentially slow fields [#51955](https://github.com/ClickHouse/ClickHouse/pull/51955) ([Alexander Gololobov](https://github.com/davenger)). +* Fix some issues with transactions [#51959](https://github.com/ClickHouse/ClickHouse/pull/51959) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix unrelated messages from LSan in clickhouse-client [#51966](https://github.com/ClickHouse/ClickHouse/pull/51966) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow OOM in AST Fuzzer with Sanitizers [#51967](https://github.com/ClickHouse/ClickHouse/pull/51967) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable one test under Analyzer [#51968](https://github.com/ClickHouse/ClickHouse/pull/51968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix Docker [#51969](https://github.com/ClickHouse/ClickHouse/pull/51969) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `01825_type_json_from_map` [#51970](https://github.com/ClickHouse/ClickHouse/pull/51970) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `02354_distributed_with_external_aggregation_memory_usage` [#51971](https://github.com/ClickHouse/ClickHouse/pull/51971) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix disaster in integration tests, part 2 [#51973](https://github.com/ClickHouse/ClickHouse/pull/51973) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* [RFC] Cleanup remote_servers in dist config.xml [#51985](https://github.com/ClickHouse/ClickHouse/pull/51985) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v23.6.2.18-stable [#51986](https://github.com/ClickHouse/ClickHouse/pull/51986) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.8.20.11-lts [#51987](https://github.com/ClickHouse/ClickHouse/pull/51987) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix performance test for regexp cache [#51988](https://github.com/ClickHouse/ClickHouse/pull/51988) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Move a test to the right place [#51989](https://github.com/ClickHouse/ClickHouse/pull/51989) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a check to validate that the stateful tests are stateful [#51990](https://github.com/ClickHouse/ClickHouse/pull/51990) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check that functional tests cleanup their tables [#51991](https://github.com/ClickHouse/ClickHouse/pull/51991) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test_extreme_deduplication [#51992](https://github.com/ClickHouse/ClickHouse/pull/51992) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Cleanup SymbolIndex after reload got removed [#51993](https://github.com/ClickHouse/ClickHouse/pull/51993) ([Azat Khuzhin](https://github.com/azat)). +* Update CompletedPipelineExecutor exception log name [#52028](https://github.com/ClickHouse/ClickHouse/pull/52028) ([xiao](https://github.com/nicelulu)). +* Fix `00502_custom_partitioning_replicated_zookeeper_long` [#52032](https://github.com/ClickHouse/ClickHouse/pull/52032) ([Antonio Andelic](https://github.com/antonio2368)). +* Prohibit send_metadata for s3_plain disks [#52038](https://github.com/ClickHouse/ClickHouse/pull/52038) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v23.4.6.25-stable [#52061](https://github.com/ClickHouse/ClickHouse/pull/52061) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Preparations for Trivial Support For Resharding (part1) [#52068](https://github.com/ClickHouse/ClickHouse/pull/52068) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v23.3.8.21-lts [#52077](https://github.com/ClickHouse/ClickHouse/pull/52077) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix flakiness of test_keeper_s3_snapshot flakiness [#52083](https://github.com/ClickHouse/ClickHouse/pull/52083) ([Azat Khuzhin](https://github.com/azat)). +* Fix test_extreme_deduplication flakiness [#52085](https://github.com/ClickHouse/ClickHouse/pull/52085) ([Azat Khuzhin](https://github.com/azat)). +* Small docs update for toYearWeek() function [#52090](https://github.com/ClickHouse/ClickHouse/pull/52090) ([Andrey Zvonov](https://github.com/zvonand)). +* Small docs update for DateTime, DateTime64 [#52094](https://github.com/ClickHouse/ClickHouse/pull/52094) ([Andrey Zvonov](https://github.com/zvonand)). +* Add missing --force for docker network prune (otherwise it is noop on CI) [#52095](https://github.com/ClickHouse/ClickHouse/pull/52095) ([Azat Khuzhin](https://github.com/azat)). +* tests: drop existing view in test_materialized_mysql_database [#52103](https://github.com/ClickHouse/ClickHouse/pull/52103) ([Azat Khuzhin](https://github.com/azat)). +* Update README.md [#52115](https://github.com/ClickHouse/ClickHouse/pull/52115) ([Tyler Hannan](https://github.com/tylerhannan)). +* Print Zxid in keeper stat command in hex (so as ZooKeeper) [#52122](https://github.com/ClickHouse/ClickHouse/pull/52122) ([Azat Khuzhin](https://github.com/azat)). +* Skip protection from double decompression if inode from maps cannot be obtained [#52138](https://github.com/ClickHouse/ClickHouse/pull/52138) ([Azat Khuzhin](https://github.com/azat)). +* There is no point in detecting flaky tests [#52142](https://github.com/ClickHouse/ClickHouse/pull/52142) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove default argument value [#52143](https://github.com/ClickHouse/ClickHouse/pull/52143) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the "kill_mutation" test [#52144](https://github.com/ClickHouse/ClickHouse/pull/52144) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ORDER BY tuple of WINDOW functions (and slightly more changes) [#52146](https://github.com/ClickHouse/ClickHouse/pull/52146) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible EADDRINUSE ("Address already in use") in integration tests [#52148](https://github.com/ClickHouse/ClickHouse/pull/52148) ([Azat Khuzhin](https://github.com/azat)). +* Fix test 02497_storage_file_reader_selection [#52154](https://github.com/ClickHouse/ClickHouse/pull/52154) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix unexpected AST Set [#52158](https://github.com/ClickHouse/ClickHouse/pull/52158) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix slow test `02317_distinct_in_order_optimization` [#52173](https://github.com/ClickHouse/ClickHouse/pull/52173) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add comments for https://github.com/ClickHouse/ClickHouse/pull/52112 [#52175](https://github.com/ClickHouse/ClickHouse/pull/52175) ([李扬](https://github.com/taiyang-li)). +* Randomize timezone in tests across non-deterministic around 1970 and default [#52184](https://github.com/ClickHouse/ClickHouse/pull/52184) ([Azat Khuzhin](https://github.com/azat)). +* Fix `test_multiple_disks/test.py::test_start_stop_moves` [#52189](https://github.com/ClickHouse/ClickHouse/pull/52189) ([Antonio Andelic](https://github.com/antonio2368)). +* CMake: Simplify job limiting [#52196](https://github.com/ClickHouse/ClickHouse/pull/52196) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix self extracting binaries under qemu linux-user (qemu-$ARCH-static) [#52198](https://github.com/ClickHouse/ClickHouse/pull/52198) ([Azat Khuzhin](https://github.com/azat)). +* Fix `Integration tests flaky check (asan)` [#52201](https://github.com/ClickHouse/ClickHouse/pull/52201) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky test test_lost_part [#52202](https://github.com/ClickHouse/ClickHouse/pull/52202) ([alesapin](https://github.com/alesapin)). +* MaterializedMySQL: Replace to_string by magic_enum::enum_name [#52204](https://github.com/ClickHouse/ClickHouse/pull/52204) ([Val Doroshchuk](https://github.com/valbok)). +* MaterializedMySQL: Add tests to parse db and table names from DDL [#52208](https://github.com/ClickHouse/ClickHouse/pull/52208) ([Val Doroshchuk](https://github.com/valbok)). +* Revert "Fixed several issues found by OSS-Fuzz" [#52216](https://github.com/ClickHouse/ClickHouse/pull/52216) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Use one copy replication more agressively [#52218](https://github.com/ClickHouse/ClickHouse/pull/52218) ([alesapin](https://github.com/alesapin)). +* Fix flaky test `01076_parallel_alter_replicated_zookeeper` [#52221](https://github.com/ClickHouse/ClickHouse/pull/52221) ([alesapin](https://github.com/alesapin)). +* Fix 01889_key_condition_function_chains for analyzer. [#52223](https://github.com/ClickHouse/ClickHouse/pull/52223) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Inhibit settings randomization in the test `json_ghdata` [#52226](https://github.com/ClickHouse/ClickHouse/pull/52226) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly better diagnostics in a test [#52227](https://github.com/ClickHouse/ClickHouse/pull/52227) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable no-upgrade-check for 02273_full_sort_join [#52235](https://github.com/ClickHouse/ClickHouse/pull/52235) ([vdimir](https://github.com/vdimir)). +* Fix network manager for integration tests [#52237](https://github.com/ClickHouse/ClickHouse/pull/52237) ([Azat Khuzhin](https://github.com/azat)). +* List replication queue only for current test database [#52238](https://github.com/ClickHouse/ClickHouse/pull/52238) ([Alexander Gololobov](https://github.com/davenger)). +* Attempt to fix assert in tsan with fibers [#52241](https://github.com/ClickHouse/ClickHouse/pull/52241) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix undefined behaviour in fuzzer [#52256](https://github.com/ClickHouse/ClickHouse/pull/52256) ([Antonio Andelic](https://github.com/antonio2368)). +* Follow-up to [#51959](https://github.com/ClickHouse/ClickHouse/issues/51959) [#52261](https://github.com/ClickHouse/ClickHouse/pull/52261) ([Alexander Tokmakov](https://github.com/tavplubix)). +* More fair queue for `drop table sync` [#52276](https://github.com/ClickHouse/ClickHouse/pull/52276) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `02497_trace_events_stress_long` [#52279](https://github.com/ClickHouse/ClickHouse/pull/52279) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix test `01111_create_drop_replicated_db_stress` [#52283](https://github.com/ClickHouse/ClickHouse/pull/52283) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ugly code [#52284](https://github.com/ClickHouse/ClickHouse/pull/52284) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add missing replica syncs in test_backup_restore_on_cluster [#52306](https://github.com/ClickHouse/ClickHouse/pull/52306) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix test_replicated_database 'node doesn't exist' flakiness [#52307](https://github.com/ClickHouse/ClickHouse/pull/52307) ([Michael Kolupaev](https://github.com/al13n321)). +* Minor: Update description of events "QueryCacheHits/Misses" [#52309](https://github.com/ClickHouse/ClickHouse/pull/52309) ([Robert Schulze](https://github.com/rschu1ze)). +* Beautify pretty-printing of the query string in SYSTEM.QUERY_CACHE [#52312](https://github.com/ClickHouse/ClickHouse/pull/52312) ([Robert Schulze](https://github.com/rschu1ze)). +* Reduce dependencies for skim by avoid using default features [#52316](https://github.com/ClickHouse/ClickHouse/pull/52316) ([Azat Khuzhin](https://github.com/azat)). +* Fix 02725_memory-for-merges [#52317](https://github.com/ClickHouse/ClickHouse/pull/52317) ([alesapin](https://github.com/alesapin)). +* Skip unsupported disks in Keeper [#52321](https://github.com/ClickHouse/ClickHouse/pull/52321) ([Antonio Andelic](https://github.com/antonio2368)). +* Revert "Improve CSVInputFormat to check and set default value to column if deserialize failed" [#52322](https://github.com/ClickHouse/ClickHouse/pull/52322) ([Kruglov Pavel](https://github.com/Avogar)). +* Resubmit [#51716](https://github.com/ClickHouse/ClickHouse/issues/51716) [#52323](https://github.com/ClickHouse/ClickHouse/pull/52323) ([Kruglov Pavel](https://github.com/Avogar)). +* Add logging about all found workflows for merge_pr.py [#52324](https://github.com/ClickHouse/ClickHouse/pull/52324) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Minor: Less awkward IAST::FormatSettings [#52332](https://github.com/ClickHouse/ClickHouse/pull/52332) ([Robert Schulze](https://github.com/rschu1ze)). +* Mark test 02125_many_mutations_2 as no-parallel to avoid flakiness [#52338](https://github.com/ClickHouse/ClickHouse/pull/52338) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix capabilities installed via systemd service (fixes netlink/IO priorities) [#52357](https://github.com/ClickHouse/ClickHouse/pull/52357) ([Azat Khuzhin](https://github.com/azat)). +* Update 01606_git_import.sh [#52360](https://github.com/ClickHouse/ClickHouse/pull/52360) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update ci-slack-bot.py [#52372](https://github.com/ClickHouse/ClickHouse/pull/52372) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `test_keeper_session` [#52373](https://github.com/ClickHouse/ClickHouse/pull/52373) ([Antonio Andelic](https://github.com/antonio2368)). +* Update ci-slack-bot.py [#52374](https://github.com/ClickHouse/ClickHouse/pull/52374) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable analyzer setting in backward_compatibility integration tests. [#52375](https://github.com/ClickHouse/ClickHouse/pull/52375) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* New metric - Filesystem cache size limit [#52378](https://github.com/ClickHouse/ClickHouse/pull/52378) ([Krzysztof Góralski](https://github.com/kgoralski)). +* Fix `test_replicated_merge_tree_encrypted_disk ` [#52379](https://github.com/ClickHouse/ClickHouse/pull/52379) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix `02122_parallel_formatting_XML ` [#52380](https://github.com/ClickHouse/ClickHouse/pull/52380) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Follow up to [#49698](https://github.com/ClickHouse/ClickHouse/issues/49698) [#52381](https://github.com/ClickHouse/ClickHouse/pull/52381) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Rename TaskStatsInfoGetter into NetlinkMetricsProvider [#52392](https://github.com/ClickHouse/ClickHouse/pull/52392) ([Azat Khuzhin](https://github.com/azat)). +* Fix `test_keeper_force_recovery` [#52408](https://github.com/ClickHouse/ClickHouse/pull/52408) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky gtest_lru_file_cache.cpp [#52418](https://github.com/ClickHouse/ClickHouse/pull/52418) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix: remove redundant distinct with views [#52438](https://github.com/ClickHouse/ClickHouse/pull/52438) ([Igor Nikonov](https://github.com/devcrafter)). +* Add 02815_range_dict_no_direct_join to analyzer_tech_debt.txt [#52464](https://github.com/ClickHouse/ClickHouse/pull/52464) ([vdimir](https://github.com/vdimir)). +* do not throw exception in OptimizedRegularExpressionImpl::analyze [#52467](https://github.com/ClickHouse/ClickHouse/pull/52467) ([Han Fei](https://github.com/hanfei1991)). +* Remove skip_startup_tables from IDatabase::loadStoredObjects() [#52491](https://github.com/ClickHouse/ClickHouse/pull/52491) ([Azat Khuzhin](https://github.com/azat)). +* Fix test_insert_same_partition_and_merge by increasing wait time [#52497](https://github.com/ClickHouse/ClickHouse/pull/52497) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Try to fix asan wanring in HashJoin [#52499](https://github.com/ClickHouse/ClickHouse/pull/52499) ([Igor Nikonov](https://github.com/devcrafter)). +* Replace with three way comparison [#52509](https://github.com/ClickHouse/ClickHouse/pull/52509) ([flynn](https://github.com/ucasfl)). +* Fix flakiness of test_version_update_after_mutation by enabling force_remove_data_recursively_on_drop [#52514](https://github.com/ClickHouse/ClickHouse/pull/52514) ([Azat Khuzhin](https://github.com/azat)). +* Fix `test_throttling` [#52515](https://github.com/ClickHouse/ClickHouse/pull/52515) ([Antonio Andelic](https://github.com/antonio2368)). +* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `toDecimalString` function [#52520](https://github.com/ClickHouse/ClickHouse/pull/52520) ([Andrey Zvonov](https://github.com/zvonand)). +* Remove unused code [#52527](https://github.com/ClickHouse/ClickHouse/pull/52527) ([Raúl Marín](https://github.com/Algunenano)). +* Cancel execution in PipelineExecutor in case of exception in graph->updateNode [#52533](https://github.com/ClickHouse/ClickHouse/pull/52533) ([Kruglov Pavel](https://github.com/Avogar)). +* Make 01951_distributed_push_down_limit analyzer agnostic [#52534](https://github.com/ClickHouse/ClickHouse/pull/52534) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix disallow_concurrency test for backup and restore [#52536](https://github.com/ClickHouse/ClickHouse/pull/52536) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Update 02136_scalar_subquery_metrics.sql [#52537](https://github.com/ClickHouse/ClickHouse/pull/52537) ([Alexander Tokmakov](https://github.com/tavplubix)). +* tests: fix 01035_avg_weighted_long flakiness [#52556](https://github.com/ClickHouse/ClickHouse/pull/52556) ([Azat Khuzhin](https://github.com/azat)). +* tests: increase throttling for 01923_network_receive_time_metric_insert [#52557](https://github.com/ClickHouse/ClickHouse/pull/52557) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix 00719_parallel_ddl_table flakiness in debug builds [#52558](https://github.com/ClickHouse/ClickHouse/pull/52558) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix 01821_join_table_race_long flakiness [#52559](https://github.com/ClickHouse/ClickHouse/pull/52559) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky `00995_exception_while_insert` [#52568](https://github.com/ClickHouse/ClickHouse/pull/52568) ([Antonio Andelic](https://github.com/antonio2368)). +* MaterializedMySQL: Fix typos in tests [#52575](https://github.com/ClickHouse/ClickHouse/pull/52575) ([Val Doroshchuk](https://github.com/valbok)). +* Fix `02497_trace_events_stress_long` again [#52587](https://github.com/ClickHouse/ClickHouse/pull/52587) ([Antonio Andelic](https://github.com/antonio2368)). +* Revert "Remove `mmap/mremap/munmap` from Allocator.h" [#52589](https://github.com/ClickHouse/ClickHouse/pull/52589) ([Nikita Taranov](https://github.com/nickitat)). +* Remove peak memory usage from the final message in the client [#52598](https://github.com/ClickHouse/ClickHouse/pull/52598) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* GinIndexStore: fix a bug when files are finalizated after first write, [#52602](https://github.com/ClickHouse/ClickHouse/pull/52602) ([Sema Checherinda](https://github.com/CheSema)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix build with clang-15 [#52627](https://github.com/ClickHouse/ClickHouse/pull/52627) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix style [#52647](https://github.com/ClickHouse/ClickHouse/pull/52647) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix logging level of a noisy message [#52648](https://github.com/ClickHouse/ClickHouse/pull/52648) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Revert "Added field `refcount` to `system.remote_data_paths` table" [#52657](https://github.com/ClickHouse/ClickHouse/pull/52657) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v23.7.2.25-stable.md b/docs/changelogs/v23.7.2.25-stable.md new file mode 100644 index 00000000000..267083d8e03 --- /dev/null +++ b/docs/changelogs/v23.7.2.25-stable.md @@ -0,0 +1,31 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.2.25-stable (8dd1107b032) FIXME as compared to v23.7.1.2470-stable (a70127baecc) + +#### Backward Incompatible Change +* Backported in [#52850](https://github.com/ClickHouse/ClickHouse/issues/52850): If a dynamic disk contains a name, it should be specified as `disk = disk(name = 'disk_name'`, ...) in disk function arguments. In previous version it could be specified as `disk = disk_(...)`, which is no longer supported. [#52820](https://github.com/ClickHouse/ClickHouse/pull/52820) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement +* Backported in [#52913](https://github.com/ClickHouse/ClickHouse/issues/52913): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Rename setting disable_url_encoding to enable_url_encoding and add a test [#52656](https://github.com/ClickHouse/ClickHouse/pull/52656) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bugs and better test for SYSTEM STOP LISTEN [#52680](https://github.com/ClickHouse/ClickHouse/pull/52680) ([Nikolay Degterinsky](https://github.com/evillique)). +* Increase min protocol version for sparse serialization [#52835](https://github.com/ClickHouse/ClickHouse/pull/52835) ([Anton Popov](https://github.com/CurtizJ)). +* Docker improvements [#52869](https://github.com/ClickHouse/ClickHouse/pull/52869) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.7.3.14-stable.md b/docs/changelogs/v23.7.3.14-stable.md new file mode 100644 index 00000000000..dbe76bd19e7 --- /dev/null +++ b/docs/changelogs/v23.7.3.14-stable.md @@ -0,0 +1,23 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.3.14-stable (bd9a510550c) FIXME as compared to v23.7.2.25-stable (8dd1107b032) + +#### Build/Testing/Packaging Improvement +* Backported in [#53025](https://github.com/ClickHouse/ClickHouse/issues/53025): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix named collections on cluster 23.7 [#52687](https://github.com/ClickHouse/ClickHouse/pull/52687) ([Al Korgun](https://github.com/alkorgun)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Fix ZstdDeflatingWriteBuffer truncating the output sometimes [#53064](https://github.com/ClickHouse/ClickHouse/pull/53064) ([Michael Kolupaev](https://github.com/al13n321)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Suspicious DISTINCT crashes from sqlancer [#52636](https://github.com/ClickHouse/ClickHouse/pull/52636) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix Parquet stats for Float32 and Float64 [#53067](https://github.com/ClickHouse/ClickHouse/pull/53067) ([Michael Kolupaev](https://github.com/al13n321)). + diff --git a/docs/changelogs/v23.7.4.5-stable.md b/docs/changelogs/v23.7.4.5-stable.md new file mode 100644 index 00000000000..c7926d79bde --- /dev/null +++ b/docs/changelogs/v23.7.4.5-stable.md @@ -0,0 +1,17 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.4.5-stable (bd2fcd44553) FIXME as compared to v23.7.3.14-stable (bd9a510550c) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Disable the new parquet encoder [#53130](https://github.com/ClickHouse/ClickHouse/pull/53130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Revert changes in `ZstdDeflatingAppendableWriteBuffer` [#53111](https://github.com/ClickHouse/ClickHouse/pull/53111) ([Antonio Andelic](https://github.com/antonio2368)). + diff --git a/docs/changelogs/v23.7.5.30-stable.md b/docs/changelogs/v23.7.5.30-stable.md new file mode 100644 index 00000000000..78bef9fb489 --- /dev/null +++ b/docs/changelogs/v23.7.5.30-stable.md @@ -0,0 +1,31 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.5.30-stable (e86c21fb922) FIXME as compared to v23.7.4.5-stable (bd2fcd44553) + +#### Build/Testing/Packaging Improvement +* Backported in [#53291](https://github.com/ClickHouse/ClickHouse/issues/53291): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#53467](https://github.com/ClickHouse/ClickHouse/issues/53467): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix broken `02862_sorted_distinct_sparse_fix` [#53738](https://github.com/ClickHouse/ClickHouse/pull/53738) ([Antonio Andelic](https://github.com/antonio2368)). +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.8.1.2992-lts.md b/docs/changelogs/v23.8.1.2992-lts.md new file mode 100644 index 00000000000..e3e0e4f0344 --- /dev/null +++ b/docs/changelogs/v23.8.1.2992-lts.md @@ -0,0 +1,591 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.8.1.2992-lts (ebc7d9a9f3b) FIXME as compared to v23.7.1.2470-stable (a70127baecc) + +#### Backward Incompatible Change +* Deprecate the metadata cache feature. It is experimental and we have never used it. The feature is dangerous: [#51182](https://github.com/ClickHouse/ClickHouse/issues/51182). Remove the `system.merge_tree_metadata_cache` system table. The metadata cache is still available in this version but will be removed soon. This closes [#39197](https://github.com/ClickHouse/ClickHouse/issues/39197). [#51303](https://github.com/ClickHouse/ClickHouse/pull/51303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* If a dynamic disk contains a name, it should be specified as `disk = disk(name = 'disk_name'`, ...) in disk function arguments. In previous version it could be specified as `disk = disk_(...)`, which is no longer supported. [#52820](https://github.com/ClickHouse/ClickHouse/pull/52820) ([Kseniia Sumarokova](https://github.com/kssenii)). +* `clickhouse-benchmark` will establish connections in parallel when invoked with `--concurrency` more than one. Previously it was unusable if you ran it with 1000 concurrent connections from Europe to the US. Correct calculation of QPS for connections with high latency. Backward incompatible change: the option for JSON output of `clickhouse-benchmark` is removed. If you've used this option, you can also extract data from the `system.query_log` in JSON format as a workaround. [#53293](https://github.com/ClickHouse/ClickHouse/pull/53293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `microseconds` column is removed from the `system.text_log`, and the `milliseconds` column is removed from the `system.metric_log`, because they are redundant in the presence of the `event_time_microseconds` column. [#53601](https://github.com/ClickHouse/ClickHouse/pull/53601) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Changed zookeeper paths for storage `S3Queue` metadata. [#54137](https://github.com/ClickHouse/ClickHouse/pull/54137) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### New Feature +* Add column `ptr` to `system.trace_log` for `trace_type = 'MemorySample'`. This column contains an address of allocation. Added function `flameGraph` which can build flamegraph containing allocated and not released memory. Reworking of [#38391](https://github.com/ClickHouse/ClickHouse/issues/38391). [#45322](https://github.com/ClickHouse/ClickHouse/pull/45322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add setting `rewrite_count_distinct_if_with_count_distinct_implementation` to rewrite `countDistinctIf` with `count_distinct_implementation`. Closes [#30642](https://github.com/ClickHouse/ClickHouse/issues/30642). [#46051](https://github.com/ClickHouse/ClickHouse/pull/46051) ([flynn](https://github.com/ucasfl)). +* Add new table engine `S3Queue` for streaming data import from s3. Closes [#37012](https://github.com/ClickHouse/ClickHouse/issues/37012). [#49086](https://github.com/ClickHouse/ClickHouse/pull/49086) ([s-kat](https://github.com/s-kat)). +* SevenZipArchiveReader - TarArchiveReader - Table Function file('path_to_archive :: filename') - Functional tests for "Table Function file('path_to_archive :: filename')" - Unit tests for TarArchiveReader/SevenZipArchiveReader. [#50321](https://github.com/ClickHouse/ClickHouse/pull/50321) ([nikitakeba](https://github.com/nikitakeba)). +* Added table function azureBlobStorageCluster table function. The supported set of features is very similar to table function S3Cluster. [#50795](https://github.com/ClickHouse/ClickHouse/pull/50795) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Allow using cluster, clusterAllReplicas, remote, remoteRaw and remoteSecure without table name in issue [#50808](https://github.com/ClickHouse/ClickHouse/issues/50808). [#50848](https://github.com/ClickHouse/ClickHouse/pull/50848) ([Yangkuan Liu](https://github.com/LiuYangkuan)). +* System table to monitor kafka consumers. [#50999](https://github.com/ClickHouse/ClickHouse/pull/50999) ([Ilya Golshtein](https://github.com/ilejn)). +* Added max_sessions_for_user setting. [#51724](https://github.com/ClickHouse/ClickHouse/pull/51724) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Now that clickhouse do not have a function to convert UTC timezone timestamp to other timezone timestamp, which is not same as spark, and so we and the functions `toUTCTimestamp/fromUTCTimestamp` to act same as spark's `to_utc_timestamp/from_utc_timestamp`. [#52117](https://github.com/ClickHouse/ClickHouse/pull/52117) ([KevinyhZou](https://github.com/KevinyhZou)). +* Add new functions `structureToCapnProtoSchema`/`structureToProtobufSchema` that convert ClickHouse table structure to CapnProto/Protobuf format schema. Allow to intput/output data in CapnProto/Protobuf format without external format schema using autogenerated schema from table structure (controled by settings `format_capn_proto_use_autogenerated_schema`/`format_protobuf_use_autogenerated_schema`). Allow to export autogenerated schema while input/outoput using setting `output_format_schema`. [#52278](https://github.com/ClickHouse/ClickHouse/pull/52278) ([Kruglov Pavel](https://github.com/Avogar)). +* A new field "query_cache_usage" in SYSTEM.QUERY_LOG now shows if and how the query cache was used. [#52384](https://github.com/ClickHouse/ClickHouse/pull/52384) ([Robert Schulze](https://github.com/rschu1ze)). +* Add new function startsWithUTF8 and endsWithUTF8. [#52555](https://github.com/ClickHouse/ClickHouse/pull/52555) ([李扬](https://github.com/taiyang-li)). +* Allow variable number of columns in TSV/CuatomSeprarated/JSONCompactEachRow, make schema inference work with variable number of columns. Add settings `input_format_tsv_allow_variable_number_of_columns`, `input_format_custom_allow_variable_number_of_columns`, `input_format_json_compact_allow_variable_number_of_columns`. [#52692](https://github.com/ClickHouse/ClickHouse/pull/52692) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `SYSTEM STOP/START PULLING REPLICATION LOG` queries (for testing `ReplicatedMergeTree`). [#52881](https://github.com/ClickHouse/ClickHouse/pull/52881) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow to execute constant non-deterministic functions in mutations on initiator. [#53129](https://github.com/ClickHouse/ClickHouse/pull/53129) ([Anton Popov](https://github.com/CurtizJ)). +* Add input format One that doesn't read any data and always returns single row with column `dummy` with type `UInt8` and value `0` like `system.one`. It can be used together with `_file/_path` virtual columns to list files in file/s3/url/hdfs/etc table functions without reading any data. [#53209](https://github.com/ClickHouse/ClickHouse/pull/53209) ([Kruglov Pavel](https://github.com/Avogar)). +* Add tupleConcat function. Closes [#52759](https://github.com/ClickHouse/ClickHouse/issues/52759). [#53239](https://github.com/ClickHouse/ClickHouse/pull/53239) ([Nikolay Degterinsky](https://github.com/evillique)). +* Support `TRUNCATE DATABASE` operation. [#53261](https://github.com/ClickHouse/ClickHouse/pull/53261) ([Bharat Nallan](https://github.com/bharatnc)). +* Add max_threads_for_indexes setting to limit number of threads used for primary key processing. [#53313](https://github.com/ClickHouse/ClickHouse/pull/53313) ([jorisgio](https://github.com/jorisgio)). +* Add experimental support for HNSW as approximate neighbor search method. [#53447](https://github.com/ClickHouse/ClickHouse/pull/53447) ([Davit Vardanyan](https://github.com/davvard)). +* Re-add SipHash keyed functions. [#53525](https://github.com/ClickHouse/ClickHouse/pull/53525) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* ([#52755](https://github.com/ClickHouse/ClickHouse/issues/52755) , [#52895](https://github.com/ClickHouse/ClickHouse/issues/52895)) Added functions `arrayRotateLeft`, `arrayRotateRight`, `arrayShiftLeft`, `arrayShiftRight`. [#53557](https://github.com/ClickHouse/ClickHouse/pull/53557) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Add column `name` to `system.clusters` as an alias to cluster. [#53605](https://github.com/ClickHouse/ClickHouse/pull/53605) ([irenjj](https://github.com/irenjj)). +* The advanced dashboard now allows mass editing (save/load). [#53608](https://github.com/ClickHouse/ClickHouse/pull/53608) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add support for plural units. [#53641](https://github.com/ClickHouse/ClickHouse/pull/53641) ([irenjj](https://github.com/irenjj)). +* Support function `isNotDistinctFrom` in join on section for null-safe comparison, ref [#53061](https://github.com/ClickHouse/ClickHouse/issues/53061). [#53755](https://github.com/ClickHouse/ClickHouse/pull/53755) ([vdimir](https://github.com/vdimir)). +* Added the "hide_in_preprocessed" attribute to ClickHouse's server configuration XML dialect. This is a mechanism to hide certain settings from appearing in preprocessed server configuration files. Useful e.g. for passwords or private keys that should not appear verbatim in files. [#53818](https://github.com/ClickHouse/ClickHouse/pull/53818) ([Roman Vasin](https://github.com/rvasin)). +* Added server setting validate_tcp_client_information determines whether validation of client information enabled when query packet is received. [#53907](https://github.com/ClickHouse/ClickHouse/pull/53907) ([Alexey Gerasimchuck](https://github.com/Demilivor)). + +#### Performance Improvement +* Enable JIT compilation for AArch64, PowerPC, SystemZ, RISCV. [#38217](https://github.com/ClickHouse/ClickHouse/pull/38217) ([Maksim Kita](https://github.com/kitaisreal)). +* This patch will provide a method to deal with all the hashsets in parallel before merge. [#50748](https://github.com/ClickHouse/ClickHouse/pull/50748) ([Jiebin Sun](https://github.com/jiebinn)). +* Optimize aggregation performance of nullable string key when using aggregationmethodserialized. [#51399](https://github.com/ClickHouse/ClickHouse/pull/51399) ([LiuNeng](https://github.com/liuneng1994)). +* The performance experiments of **SSB** on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of **8.5%** to the **geomean QPS** when the experimental analyzer is enabled. The details are shown below: ![image](https://github.com/ClickHouse/ClickHouse/assets/26588299/4e58bf8b-d276-408d-ad45-38c82d3cb918). [#52091](https://github.com/ClickHouse/ClickHouse/pull/52091) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Parquet filter pushdown. I.e. when reading Parquet files, row groups (chunks of the file) are skipped based on the WHERE condition and the min/max values in each column. In particular, if the file is roughly sorted by some column, queries that filter by a short range of that column will be much faster. [#52951](https://github.com/ClickHouse/ClickHouse/pull/52951) ([Michael Kolupaev](https://github.com/al13n321)). +* Optimize the merge if all hashSets are singleLevel in UniqExactSet. [#52973](https://github.com/ClickHouse/ClickHouse/pull/52973) ([Jiebin Sun](https://github.com/jiebinn)). +* StorageJoin: do not create clone hash join with all columns. [#53046](https://github.com/ClickHouse/ClickHouse/pull/53046) ([Duc Canh Le](https://github.com/canhld94)). +* Optimize reading small row groups by batching them together in Parquet. Closes [#53069](https://github.com/ClickHouse/ClickHouse/issues/53069). [#53281](https://github.com/ClickHouse/ClickHouse/pull/53281) ([Kruglov Pavel](https://github.com/Avogar)). +* Implement native orc input format without arrow to improve performance. [#53324](https://github.com/ClickHouse/ClickHouse/pull/53324) ([李扬](https://github.com/taiyang-li)). +* The dashboard will tell the server to compress the data, which is useful for large time frames over slow internet connections. For example, one chart with 86400 points can be 1.5 MB uncompressed and 60 KB compressed with `br`. [#53569](https://github.com/ClickHouse/ClickHouse/pull/53569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Optimize count from files in most input formats. Closes [#44334](https://github.com/ClickHouse/ClickHouse/issues/44334). [#53637](https://github.com/ClickHouse/ClickHouse/pull/53637) ([Kruglov Pavel](https://github.com/Avogar)). +* Better utilization of thread pool for BACKUPs&RESTOREs. [#53649](https://github.com/ClickHouse/ClickHouse/pull/53649) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Remove useless slow on client performance check. [#53695](https://github.com/ClickHouse/ClickHouse/pull/53695) ([Raúl Marín](https://github.com/Algunenano)). + +#### Improvement +* Bloom filter indices are pruned so that they correlate with cardinality of the data set they are tracking. [#35102](https://github.com/ClickHouse/ClickHouse/pull/35102) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Add `stderr_reaction` configuration/setting to control the reaction (none, log or throw) when external command stderr has data. This helps make debugging external command easier. [#43210](https://github.com/ClickHouse/ClickHouse/pull/43210) ([Amos Bird](https://github.com/amosbird)). +* Https://github.com/clickhouse/clickhouse/issues/48720. @kgoralski helped with some thought about `system.merges` part. :d. [#48990](https://github.com/ClickHouse/ClickHouse/pull/48990) ([Jianfei Hu](https://github.com/incfly)). +* If a dictionary is created with a complex key, automatically choose the "complex key" layout variant. [#49587](https://github.com/ClickHouse/ClickHouse/pull/49587) ([xiebin](https://github.com/xbthink)). +* Add setting `use_concurrency_control` for better testing of the new concurrency control feature. [#49618](https://github.com/ClickHouse/ClickHouse/pull/49618) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added suggestions for mistyped names for db and tables with different scenarios commented. [#49801](https://github.com/ClickHouse/ClickHouse/pull/49801) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* While read small files from hdfs by gluten, we found that it will cost more times when compare to directly query by spark. [#50063](https://github.com/ClickHouse/ClickHouse/pull/50063) ([KevinyhZou](https://github.com/KevinyhZou)). +* Too many worthless error logs after session expiration. [#50171](https://github.com/ClickHouse/ClickHouse/pull/50171) ([helifu](https://github.com/helifu)). +* Introduce fallback ZooKeeper sessions which are time-bound. Fixed `index` column in system.zookeeper_connection for DNS addresses. [#50424](https://github.com/ClickHouse/ClickHouse/pull/50424) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Add ability to log when max_partitions_per_insert_block is reached ... [#50948](https://github.com/ClickHouse/ClickHouse/pull/50948) ([Sean Haynes](https://github.com/seandhaynes)). +* Added a bunch of custom commands (mostly to make ClickHouse debugging easier). [#51117](https://github.com/ClickHouse/ClickHouse/pull/51117) ([pufit](https://github.com/pufit)). +* Updated check for connection_string as connection string with sas does not always begin with DefaultEndPoint and updated connection url to include sas token after adding container to url. [#51141](https://github.com/ClickHouse/ClickHouse/pull/51141) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix description for filtering sets in full_sorting_merge join. [#51329](https://github.com/ClickHouse/ClickHouse/pull/51329) ([Tanay Tummalapalli](https://github.com/ttanay)). +* The sizes of the (index) uncompressed/mark, mmap and query caches can now be configured dynamically at runtime. [#51446](https://github.com/ClickHouse/ClickHouse/pull/51446) ([Robert Schulze](https://github.com/rschu1ze)). +* Fixed memory consumption in `Aggregator` when `max_block_size` is huge. [#51566](https://github.com/ClickHouse/ClickHouse/pull/51566) ([Nikita Taranov](https://github.com/nickitat)). +* Add `SYSTEM SYNC FILESYSTEM CACHE` command. It will compare in-memory state of filesystem cache with what it has on disk and fix in-memory state if needed. [#51622](https://github.com/ClickHouse/ClickHouse/pull/51622) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Attempt to create a generic proxy resolver for CH while keeping backwards compatibility with existing S3 storage conf proxy resolver. [#51749](https://github.com/ClickHouse/ClickHouse/pull/51749) ([Arthur Passos](https://github.com/arthurpassos)). +* Support reading tuple subcolumns from file/s3/hdfs/url/azureBlobStorage table functions. [#51806](https://github.com/ClickHouse/ClickHouse/pull/51806) ([Kruglov Pavel](https://github.com/Avogar)). +* Function `arrayIntersect` now returns the values sorted like the first argument. Closes [#27622](https://github.com/ClickHouse/ClickHouse/issues/27622). [#51850](https://github.com/ClickHouse/ClickHouse/pull/51850) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add new queries, which allow to create/drop of access entities in specified access storage or move access entities from one access storage to another. [#51912](https://github.com/ClickHouse/ClickHouse/pull/51912) ([pufit](https://github.com/pufit)). +* ALTER TABLE FREEZE are not replicated in Replicated engine. [#52064](https://github.com/ClickHouse/ClickHouse/pull/52064) ([Mike Kot](https://github.com/myrrc)). +* Added possibility to flush logs to the disk on crash - Added logs buffer configuration. [#52174](https://github.com/ClickHouse/ClickHouse/pull/52174) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix S3 table function does not work for pre-signed URL. close [#50846](https://github.com/ClickHouse/ClickHouse/issues/50846). [#52310](https://github.com/ClickHouse/ClickHouse/pull/52310) ([chen](https://github.com/xiedeyantu)). +* System.events and system.metrics tables add column name as an alias to event and metric. close [#51257](https://github.com/ClickHouse/ClickHouse/issues/51257). [#52315](https://github.com/ClickHouse/ClickHouse/pull/52315) ([chen](https://github.com/xiedeyantu)). +* Added support of syntax `CREATE UNIQUE INDEX` in parser for better SQL compatibility. `UNIQUE` index is not supported. Set `create_index_ignore_unique=1` to ignore UNIQUE keyword in queries. [#52320](https://github.com/ClickHouse/ClickHouse/pull/52320) ([Ilya Yatsishin](https://github.com/qoega)). +* Add support of predefined macro (`{database}` and `{table}`) in some kafka engine settings: topic, consumer, client_id, etc. [#52386](https://github.com/ClickHouse/ClickHouse/pull/52386) ([Yury Bogomolov](https://github.com/ybogo)). +* Disable updating fs cache during backup/restore. Filesystem cache must not be updated during backup/restore, it seems it just slows down the process without any profit (because the BACKUP command can read a lot of data and it's no use to put all the data to the filesystem cache and immediately evict it). [#52402](https://github.com/ClickHouse/ClickHouse/pull/52402) ([Vitaly Baranov](https://github.com/vitlibar)). +* Updated parameterized view implementation to create new StorageView with substituted parameters for every SELECT query of a parameterized view. [#52569](https://github.com/ClickHouse/ClickHouse/pull/52569) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* The configuration of S3 endpoint allow using it from the root, and append '/' automatically if needed. [#47809](https://github.com/ClickHouse/ClickHouse/issues/47809). [#52600](https://github.com/ClickHouse/ClickHouse/pull/52600) ([xiaolei565](https://github.com/xiaolei565)). +* Added support for adding and subtracting arrays: `[5,2] + [1,7]`. Division and multiplication were not implemented due to confusion between pointwise multiplication and the scalar product of arguments. Closes [#49939](https://github.com/ClickHouse/ClickHouse/issues/49939). [#52625](https://github.com/ClickHouse/ClickHouse/pull/52625) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add support for string literals as table name. Closes [#52178](https://github.com/ClickHouse/ClickHouse/issues/52178). [#52635](https://github.com/ClickHouse/ClickHouse/pull/52635) ([hendrik-m](https://github.com/hendrik-m)). +* For clickhouse-local allow positional options and populate global UDF settings (user_scripts_path and user_defined_executable_functions_config). [#52643](https://github.com/ClickHouse/ClickHouse/pull/52643) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* System.asynchronous_metrics now includes metrics "querycacheentries" and "querycachebytes" to inspect the query cache. [#52650](https://github.com/ClickHouse/ClickHouse/pull/52650) ([Robert Schulze](https://github.com/rschu1ze)). +* Added possibility use s3_storage_class parameter in SETTINGS of BACKUP statement for backups to S3. [#52658](https://github.com/ClickHouse/ClickHouse/pull/52658) ([Roman Vasin](https://github.com/rvasin)). +* Improve insert retries on keeper session expiration. [#52688](https://github.com/ClickHouse/ClickHouse/pull/52688) ([Raúl Marín](https://github.com/Algunenano)). +* Add utility `print-backup-info.py` which parses a backup metadata file and prints information about the backup. [#52690](https://github.com/ClickHouse/ClickHouse/pull/52690) ([Vitaly Baranov](https://github.com/vitlibar)). +* Closes [#49510](https://github.com/ClickHouse/ClickHouse/issues/49510). Currently we have database and table names case-sensitive, but the tools query `information_schema` sometimes in lowercase, sometimes in uppercase. For this reason we have `information_schema` database, containing lowercase tables, such as `information_schema.tables` and `INFORMATION_SCHEMA` database, containing uppercase tables, such as `INFORMATION_SCHEMA.TABLES`. But some tools are querying `INFORMATION_SCHEMA.tables` and `information_schema.TABLES`. The proposed solution is to duplicate both lowercase and uppercase tables in lowercase and uppercase `information_schema` database. [#52695](https://github.com/ClickHouse/ClickHouse/pull/52695) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* `GET_PART` and `ATTACH_PART` are almost identical, so they should use same executor pool. [#52716](https://github.com/ClickHouse/ClickHouse/pull/52716) ([Duc Canh Le](https://github.com/canhld94)). +* Query`CHECK TABLE` has better performance and usability (sends progress updates, cancellable). [#52745](https://github.com/ClickHouse/ClickHouse/pull/52745) ([vdimir](https://github.com/vdimir)). +* Add modulo, intDiv, intDivOrZero for tuple. [#52758](https://github.com/ClickHouse/ClickHouse/pull/52758) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Search for default `yaml` and `yml` configs in clickhouse-client after `xml`. [#52767](https://github.com/ClickHouse/ClickHouse/pull/52767) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* When merging into non-'clickhouse' rooted configuration, configs with different root node name just bypassed without exception. [#52770](https://github.com/ClickHouse/ClickHouse/pull/52770) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Now it's possible to specify min (`memory_profiler_sample_min_allocation_size`) and max (`memory_profiler_sample_max_allocation_size`) size for allocations to be tracked with sampling memory profiler. [#52779](https://github.com/ClickHouse/ClickHouse/pull/52779) ([alesapin](https://github.com/alesapin)). +* Add `precise_float_parsing` setting to switch float parsing methods (fast/precise). [#52791](https://github.com/ClickHouse/ClickHouse/pull/52791) ([Andrey Zvonov](https://github.com/zvonand)). +* Use the same default paths for `clickhouse_keeper` (symlink) as for `clickhouse_keeper` (executable). [#52861](https://github.com/ClickHouse/ClickHouse/pull/52861) ([Vitaly Baranov](https://github.com/vitlibar)). +* CVE-2016-2183: disable 3DES. [#52893](https://github.com/ClickHouse/ClickHouse/pull/52893) ([Kenji Noguchi](https://github.com/knoguchi)). +* Load filesystem cache metadata on startup in parallel. Configured by `load_metadata_threads` (default: 1) cache config setting. Related to [#52037](https://github.com/ClickHouse/ClickHouse/issues/52037). [#52943](https://github.com/ClickHouse/ClickHouse/pull/52943) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Improve error message for table function remote. Closes [#40220](https://github.com/ClickHouse/ClickHouse/issues/40220). [#52959](https://github.com/ClickHouse/ClickHouse/pull/52959) ([jiyoungyoooo](https://github.com/jiyoungyoooo)). +* Added the possibility to specify custom storage policy in the `SETTINGS` clause of `RESTORE` queries. [#52970](https://github.com/ClickHouse/ClickHouse/pull/52970) ([Victor Krasnov](https://github.com/sirvickr)). +* Add the ability to throttle the S3 requests on backup operations (`BACKUP` and `RESTORE` commands now honor `s3_max_[get/put]_[rps/burst]`). [#52974](https://github.com/ClickHouse/ClickHouse/pull/52974) ([Daniel Pozo Escalona](https://github.com/danipozo)). +* Add settings to ignore ON CLUSTER clause in queries for management of replicated user-defined functions or access control entities with replicated storage. [#52975](https://github.com/ClickHouse/ClickHouse/pull/52975) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Enable parallel reading from replicas over distributed table. Related to [#49708](https://github.com/ClickHouse/ClickHouse/issues/49708). [#53005](https://github.com/ClickHouse/ClickHouse/pull/53005) ([Igor Nikonov](https://github.com/devcrafter)). +* EXPLAIN actions for JOIN step. [#53006](https://github.com/ClickHouse/ClickHouse/pull/53006) ([Maksim Kita](https://github.com/kitaisreal)). +* Make `hasTokenOrNull` and `hasTokenCaseInsensitiveOrNull` return null for empty needles. [#53059](https://github.com/ClickHouse/ClickHouse/pull/53059) ([ltrk2](https://github.com/ltrk2)). +* Allow to restrict allowed paths for filesystem caches. Mainly useful for dynamic disks. If in server config `filesystem_caches_path` is specified, all filesystem caches' paths will be restricted to this directory. E.g. if the `path` in cache config is relative - it will be put in `filesystem_caches_path`; if `path` in cache config is absolute, it will be required to lie inside `filesystem_caches_path`. If `filesystem_caches_path` is not specified in config, then behaviour will be the same as in earlier versions. [#53124](https://github.com/ClickHouse/ClickHouse/pull/53124) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added a bunch of custom commands (mostly to make ClickHouse debugging easier). [#53127](https://github.com/ClickHouse/ClickHouse/pull/53127) ([pufit](https://github.com/pufit)). +* Add diagnostic info about file name during schema inference - it helps when you process multiple files with globs. [#53135](https://github.com/ClickHouse/ClickHouse/pull/53135) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Client will load suggestions using the main connection if the second connection is not allowed to create a session. [#53177](https://github.com/ClickHouse/ClickHouse/pull/53177) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Add EXCEPT clause to `SYSTEM STOP/START LISTEN QUERIES [ALL/DEFAULT/CUSTOM]` query, for example `SYSTEM STOP LISTEN QUERIES ALL EXCEPT TCP, HTTP`. [#53280](https://github.com/ClickHouse/ClickHouse/pull/53280) ([Nikolay Degterinsky](https://github.com/evillique)). +* Change the default of `max_concurrent_queries` from 100 to 1000. It's ok to have many concurrent queries if they are not heavy, and mostly waiting for the network. Note: don't confuse concurrent queries and QPS: for example, ClickHouse server can do tens of thousands of QPS with less than 100 concurrent queries. [#53285](https://github.com/ClickHouse/ClickHouse/pull/53285) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ability to override credentials for accessing base backup in S3 (since tokens may be expired). [#53326](https://github.com/ClickHouse/ClickHouse/pull/53326) ([Azat Khuzhin](https://github.com/azat)). +* Improve `move_primary_key_columns_to_end_of_prewhere`. [#53337](https://github.com/ClickHouse/ClickHouse/pull/53337) ([Han Fei](https://github.com/hanfei1991)). +* Limit number of concurrent background partition optimize merges. [#53405](https://github.com/ClickHouse/ClickHouse/pull/53405) ([Duc Canh Le](https://github.com/canhld94)). +* Added a setting `allow_moving_table_directory_to_trash` that allows to ignore `Directory for table data already exists` error when replicating/recovering a `Replicated` database. [#53425](https://github.com/ClickHouse/ClickHouse/pull/53425) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Server settings asynchronous_metrics_update_period_s and asynchronous_heavy_metrics_update_period_s configured to 0 now fail gracefully instead of crash the server. [#53428](https://github.com/ClickHouse/ClickHouse/pull/53428) ([Robert Schulze](https://github.com/rschu1ze)). +* Previously the caller could register the same watch callback multiple times. In that case each entry was consuming memory and the same callback was called multiple times which didn't make much sense. In order to avoid this the caller could have some logic to not add the same watch multiple times. With this change this deduplication is done internally if the watch callback is passed via shared_ptr. [#53452](https://github.com/ClickHouse/ClickHouse/pull/53452) ([Alexander Gololobov](https://github.com/davenger)). +* The ClickHouse server now respects memory limits changed via cgroups when reloading its configuration. [#53455](https://github.com/ClickHouse/ClickHouse/pull/53455) ([Robert Schulze](https://github.com/rschu1ze)). +* Add ability to turn off flush of Distributed tables on `DETACH`/`DROP`/server shutdown. [#53501](https://github.com/ClickHouse/ClickHouse/pull/53501) ([Azat Khuzhin](https://github.com/azat)). +* Domainrfc support ipv6(ip literal within square brackets). [#53506](https://github.com/ClickHouse/ClickHouse/pull/53506) ([Chen768959](https://github.com/Chen768959)). +* Use filter by file/path before reading in url/file/hdfs table functins. [#53529](https://github.com/ClickHouse/ClickHouse/pull/53529) ([Kruglov Pavel](https://github.com/Avogar)). +* Use longer timeout for S3 CopyObject requests. [#53533](https://github.com/ClickHouse/ClickHouse/pull/53533) ([Michael Kolupaev](https://github.com/al13n321)). +* Added server setting `aggregate_function_group_array_max_element_size`. This setting is used to limit array size for `groupArray` function at serialization. The default value is `16777215`. [#53550](https://github.com/ClickHouse/ClickHouse/pull/53550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* `SCHEMA()` was added as alias for `DATABASE()` to improve MySQL compatibility. [#53587](https://github.com/ClickHouse/ClickHouse/pull/53587) ([Daniël van Eeden](https://github.com/dveeden)). +* Add asynchronous metrics about tables in the system database. For example, `TotalBytesOfMergeTreeTablesSystem`. This closes [#53603](https://github.com/ClickHouse/ClickHouse/issues/53603). [#53604](https://github.com/ClickHouse/ClickHouse/pull/53604) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* SQL editor in the Play UI and Dashboard will not use Grammarly. [#53614](https://github.com/ClickHouse/ClickHouse/pull/53614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The advanced dashboard now has an option to maximize charts and move them around. [#53622](https://github.com/ClickHouse/ClickHouse/pull/53622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* As expert-level settings, it is now possible to 1. configure the size_ratio (i.e. the relative size of the protected queue) of the [index] mark/uncompressed caches, 2. configure the cache policy of the index mark and index uncompressed caches. [#53657](https://github.com/ClickHouse/ClickHouse/pull/53657) ([Robert Schulze](https://github.com/rschu1ze)). +* More careful thread management will improve the speed of the S3 table function over a large number of files by more than ~25%. [#53668](https://github.com/ClickHouse/ClickHouse/pull/53668) ([pufit](https://github.com/pufit)). +* Upgrade snappy to 1.1.10, clickhouse may benefit from it. [#53672](https://github.com/ClickHouse/ClickHouse/pull/53672) ([李扬](https://github.com/taiyang-li)). +* Added client info validation to the query packet in TCPHandler. [#53673](https://github.com/ClickHouse/ClickHouse/pull/53673) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Cache number of rows in files for count in file/s3/url/hdfs/azure functions. The cache can be enabled/disabled by setting `use_cache_for_count_from_files` (enabled by default). Continuation of https://github.com/ClickHouse/ClickHouse/pull/53637. [#53692](https://github.com/ClickHouse/ClickHouse/pull/53692) ([Kruglov Pavel](https://github.com/Avogar)). +* Updated to retry loading part in case of Azure::Core::Http::TransportException (https://github.com/ClickHouse/ClickHouse/issues/39700#issuecomment-1686442785). [#53750](https://github.com/ClickHouse/ClickHouse/pull/53750) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Stacktrace for exceptions, Materailized view exceptions are propagated. [#53766](https://github.com/ClickHouse/ClickHouse/pull/53766) ([Ilya Golshtein](https://github.com/ilejn)). +* If no hostname or port were specified, keeper client will try to search for a connection string in the ClickHouse's config.xml. [#53769](https://github.com/ClickHouse/ClickHouse/pull/53769) ([pufit](https://github.com/pufit)). +* Add profile event `PartsLockMicroseconds` which shows the amount of microseconds we hold the data parts lock in MergeTree table engine family. [#53797](https://github.com/ClickHouse/ClickHouse/pull/53797) ([alesapin](https://github.com/alesapin)). +* Make reconnect limit in raft limits configurable for keeper. This configuration can help to make keeper to rebuild connection with peers quicker if the current connection is broken. [#53817](https://github.com/ClickHouse/ClickHouse/pull/53817) ([Pengyuan Bian](https://github.com/bianpengyuan)). +* Supported globs in select from file in clickhouse-local. [#53863](https://github.com/ClickHouse/ClickHouse/pull/53863) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* ...Ignore foreign keys in tables definition to improve compatibility with MySQL, so a user wouldn't need to rewrite his SQL of the foreign key part, ref [#53380](https://github.com/ClickHouse/ClickHouse/issues/53380). [#53864](https://github.com/ClickHouse/ClickHouse/pull/53864) ([jsc0218](https://github.com/jsc0218)). +* 'from' is supported as a Expression. [#53914](https://github.com/ClickHouse/ClickHouse/pull/53914) ([Chen768959](https://github.com/Chen768959)). +* Changes of the server configuration are now detected with high precision (milliseconds and less). [#54065](https://github.com/ClickHouse/ClickHouse/pull/54065) ([Mikhail Koviazin](https://github.com/mkmkme)). + +#### Build/Testing/Packaging Improvement +* Don't expose symbols from ClickHouse binary to dynamic linker. It might fix [#43933](https://github.com/ClickHouse/ClickHouse/issues/43933). [#47475](https://github.com/ClickHouse/ClickHouse/pull/47475) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed endian issues in native protocol. [#50267](https://github.com/ClickHouse/ClickHouse/pull/50267) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Build `clickhouse/nginx-dav` and use it in integration tests instead of `kssenii/nginx-test`. Addresses [#43182](https://github.com/ClickHouse/ClickHouse/issues/43182). [#51843](https://github.com/ClickHouse/ClickHouse/pull/51843) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fixed ForEach aggregate function state for s390x. [#52040](https://github.com/ClickHouse/ClickHouse/pull/52040) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Add https://github.com/elliotchance/sqltest to CI to report the SQL 2016 conformance. [#52293](https://github.com/ClickHouse/ClickHouse/pull/52293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed codec delta endian issue for s390x. [#52592](https://github.com/ClickHouse/ClickHouse/pull/52592) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Upgrade PRQL to 0.9.3. [#53060](https://github.com/ClickHouse/ClickHouse/pull/53060) ([Maximilian Roos](https://github.com/max-sixty)). +* System tables from CI checks are exported to ClickHouse Cloud. [#53086](https://github.com/ClickHouse/ClickHouse/pull/53086) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud. [#53100](https://github.com/ClickHouse/ClickHouse/pull/53100) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up Debug and Tidy builds. [#53178](https://github.com/ClickHouse/ClickHouse/pull/53178) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up the build by removing tons and tonnes of garbage. One of the frequently included headers was poisoned by boost. [#53180](https://github.com/ClickHouse/ClickHouse/pull/53180) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ClickHouse builds for Linux s390x to CI. [#53181](https://github.com/ClickHouse/ClickHouse/pull/53181) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Remove even more garbage. [#53182](https://github.com/ClickHouse/ClickHouse/pull/53182) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The function `arrayAUC` was using heavy C++ templates. [#53183](https://github.com/ClickHouse/ClickHouse/pull/53183) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some translation units were always rebuilt regardless of ccache. The culprit is found and fixed. [#53184](https://github.com/ClickHouse/ClickHouse/pull/53184) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Three tests were failing / flaky: 1. test_host_regexp_multiple_ptr_records 2. test_host_regexp_multiple_ptr_records_concurrent 3. test_reverse_dns_query. [#53286](https://github.com/ClickHouse/ClickHouse/pull/53286) ([Arthur Passos](https://github.com/arthurpassos)). +* Export logs from CI in stateful tests to ClickHouse Cloud. [#53351](https://github.com/ClickHouse/ClickHouse/pull/53351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in stress tests. [#53353](https://github.com/ClickHouse/ClickHouse/pull/53353) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in fuzzer. [#53354](https://github.com/ClickHouse/ClickHouse/pull/53354) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in performance test to ClickHouse Cloud. [#53355](https://github.com/ClickHouse/ClickHouse/pull/53355) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Follow up for [#53418](https://github.com/ClickHouse/ClickHouse/issues/53418). Small improvements for install_check.py, adding tests for proper ENV parameters passing to the main process on `init.d start`. [#53457](https://github.com/ClickHouse/ClickHouse/pull/53457) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fixed base64 endian issue for s390x. [#53570](https://github.com/ClickHouse/ClickHouse/pull/53570) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Reorganize file management in CMake to prevent potential duplications. For instance, `indexHint.cpp` is duplicated in both `dbms_sources` and `clickhouse_functions_sources`. [#53621](https://github.com/ClickHouse/ClickHouse/pull/53621) ([Amos Bird](https://github.com/amosbird)). +* Fixed functional test in 02354_distributed_with_external_aggregation_memory_usage in s390x. [#53648](https://github.com/ClickHouse/ClickHouse/pull/53648) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Skipped QPL functional test for s390x. [#53758](https://github.com/ClickHouse/ClickHouse/pull/53758) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Slightly improve cmake build by sanitizing some dependencies and removing some duplicates. Each commit includes a short description of the changes made. [#53759](https://github.com/ClickHouse/ClickHouse/pull/53759) ([Amos Bird](https://github.com/amosbird)). +* Fixed StripeLog storage endian issue on the s390x platform. [#53902](https://github.com/ClickHouse/ClickHouse/pull/53902) ([Harry Lee](https://github.com/HarryLeeIBM)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Do not reset Annoy index during build-up with > 1 mark [#51325](https://github.com/ClickHouse/ClickHouse/pull/51325) ([Tian Xinhui](https://github.com/xinhuitian)). +* Fix usage of temporary directories during RESTORE [#51493](https://github.com/ClickHouse/ClickHouse/pull/51493) ([Azat Khuzhin](https://github.com/azat)). +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Bug fix for checksum of compress marks [#51777](https://github.com/ClickHouse/ClickHouse/pull/51777) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix mistakenly comma parsing as part of datetime in CSV best effort parsing [#51950](https://github.com/ClickHouse/ClickHouse/pull/51950) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't throw exception when exec udf has parameters [#51961](https://github.com/ClickHouse/ClickHouse/pull/51961) ([Nikita Taranov](https://github.com/nickitat)). +* Fix recalculation of skip indexes and projections in `ALTER DELETE` queries [#52530](https://github.com/ClickHouse/ClickHouse/pull/52530) ([Anton Popov](https://github.com/CurtizJ)). +* MaterializedMySQL: Fix the infinite loop in ReadBuffer::read [#52621](https://github.com/ClickHouse/ClickHouse/pull/52621) ([Val Doroshchuk](https://github.com/valbok)). +* Load suggestion only with `clickhouse` dialect [#52628](https://github.com/ClickHouse/ClickHouse/pull/52628) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* RFC: Fix filtering by virtual columns with OR expression [#52653](https://github.com/ClickHouse/ClickHouse/pull/52653) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* Fix named collections on cluster 23.7 [#52687](https://github.com/ClickHouse/ClickHouse/pull/52687) ([Al Korgun](https://github.com/alkorgun)). +* Fix reading of unnecessary column in case of multistage `PREWHERE` [#52689](https://github.com/ClickHouse/ClickHouse/pull/52689) ([Anton Popov](https://github.com/CurtizJ)). +* Fix unexpected sort result on multi columns with nulls first direction [#52761](https://github.com/ClickHouse/ClickHouse/pull/52761) ([copperybean](https://github.com/copperybean)). +* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix sorting of sparse columns with large limit [#52827](https://github.com/ClickHouse/ClickHouse/pull/52827) ([Anton Popov](https://github.com/CurtizJ)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* make regexp analyzer recognize named capturing groups [#52840](https://github.com/ClickHouse/ClickHouse/pull/52840) ([Han Fei](https://github.com/hanfei1991)). +* Fix possible assert in ~PushingAsyncPipelineExecutor in clickhouse-local [#52862](https://github.com/ClickHouse/ClickHouse/pull/52862) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading of empty `Nested(Array(LowCardinality(...)))` [#52949](https://github.com/ClickHouse/ClickHouse/pull/52949) ([Anton Popov](https://github.com/CurtizJ)). +* Added new tests for session_log and fixed the inconsistency between login and logout. [#52958](https://github.com/ClickHouse/ClickHouse/pull/52958) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Convert sparse to full in CreateSetAndFilterOnTheFlyStep [#53000](https://github.com/ClickHouse/ClickHouse/pull/53000) ([vdimir](https://github.com/vdimir)). +* Fix rare race condition with empty key prefix directory deletion in fs cache [#53055](https://github.com/ClickHouse/ClickHouse/pull/53055) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix ZstdDeflatingWriteBuffer truncating the output sometimes [#53064](https://github.com/ClickHouse/ClickHouse/pull/53064) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix query_id in part_log with async flush queries [#53103](https://github.com/ClickHouse/ClickHouse/pull/53103) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible error from cache "Read unexpected size" [#53121](https://github.com/ClickHouse/ClickHouse/pull/53121) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable the new parquet encoder [#53130](https://github.com/ClickHouse/ClickHouse/pull/53130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix character escaping in the PostgreSQL engine [#53250](https://github.com/ClickHouse/ClickHouse/pull/53250) ([Nikolay Degterinsky](https://github.com/evillique)). +* #2 Added new tests for session_log and fixed the inconsistency between login and logout. [#53255](https://github.com/ClickHouse/ClickHouse/pull/53255) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* #3 Fixed inconsistency between login success and logout [#53302](https://github.com/ClickHouse/ClickHouse/pull/53302) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix adding sub-second intervals to DateTime [#53309](https://github.com/ClickHouse/ClickHouse/pull/53309) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix "Context has expired" error in dictionaries [#53342](https://github.com/ClickHouse/ClickHouse/pull/53342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Forbid use_structure_from_insertion_table_in_table_functions when execute Scalar [#53348](https://github.com/ClickHouse/ClickHouse/pull/53348) ([flynn](https://github.com/ucasfl)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fixed system.data_skipping_indices for MaterializedMySQL [#53381](https://github.com/ClickHouse/ClickHouse/pull/53381) ([Filipp Ozinov](https://github.com/bakwc)). +* Fix processing single carriage return in TSV file segmentation engine [#53407](https://github.com/ClickHouse/ClickHouse/pull/53407) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix 'Context has expired' error properly [#53433](https://github.com/ClickHouse/ClickHouse/pull/53433) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix timeout_overflow_mode when having subquery in the rhs of IN [#53439](https://github.com/ClickHouse/ClickHouse/pull/53439) ([Duc Canh Le](https://github.com/canhld94)). +* Fix an unexpected behavior in [#53152](https://github.com/ClickHouse/ClickHouse/issues/53152) [#53440](https://github.com/ClickHouse/ClickHouse/pull/53440) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Fix JSON_QUERY Function parse error while path is all number [#53470](https://github.com/ClickHouse/ClickHouse/pull/53470) ([KevinyhZou](https://github.com/KevinyhZou)). +* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed SELECTing from ReplacingMergeTree with do_not_merge_across_partitions_select_final [#53511](https://github.com/ClickHouse/ClickHouse/pull/53511) ([Vasily Nemkov](https://github.com/Enmk)). +* bugfix: Flush async insert queue first on shutdown [#53547](https://github.com/ClickHouse/ClickHouse/pull/53547) ([joelynch](https://github.com/joelynch)). +* Fix crash in join on sparse column [#53548](https://github.com/ClickHouse/ClickHouse/pull/53548) ([vdimir](https://github.com/vdimir)). +* Fix possible UB in Set skipping index for functions with incorrect args [#53559](https://github.com/ClickHouse/ClickHouse/pull/53559) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible UB in inverted indexes (experimental feature) [#53560](https://github.com/ClickHouse/ClickHouse/pull/53560) ([Azat Khuzhin](https://github.com/azat)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix number of dropped granules in EXPLAIN PLAN index=1 [#53616](https://github.com/ClickHouse/ClickHouse/pull/53616) ([wangxiaobo](https://github.com/wzb5212)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Prepared set cache in mutation pipeline stuck [#53645](https://github.com/ClickHouse/ClickHouse/pull/53645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug on mutations with subcolumns of type JSON in predicates of UPDATE and DELETE queries. [#53677](https://github.com/ClickHouse/ClickHouse/pull/53677) ([VanDarkholme7](https://github.com/VanDarkholme7)). +* Fix filter pushdown for full_sorting_merge join [#53699](https://github.com/ClickHouse/ClickHouse/pull/53699) ([vdimir](https://github.com/vdimir)). +* Try to fix bug with NULL::LowCardinality(Nullable(...)) NOT IN [#53706](https://github.com/ClickHouse/ClickHouse/pull/53706) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* transform: correctly handle default column with multiple rows [#53742](https://github.com/ClickHouse/ClickHouse/pull/53742) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). +* Materialized postgres: fix uncaught exception in getCreateTableQueryImpl [#53832](https://github.com/ClickHouse/ClickHouse/pull/53832) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible segfault while using PostgreSQL engine [#53847](https://github.com/ClickHouse/ClickHouse/pull/53847) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix named_collection_admin alias [#54066](https://github.com/ClickHouse/ClickHouse/pull/54066) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix rows_before_limit_at_least for DelayedSource. [#54122](https://github.com/ClickHouse/ClickHouse/pull/54122) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Implementing new commands for keeper-client"'. [#52985](https://github.com/ClickHouse/ClickHouse/pull/52985) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Remove try/catch from DatabaseFilesystem"'. [#53044](https://github.com/ClickHouse/ClickHouse/pull/53044) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Upload build time-trace data to CI database"'. [#53210](https://github.com/ClickHouse/ClickHouse/pull/53210) ([Alexander Gololobov](https://github.com/davenger)). +* NO CL ENTRY: 'Revert "Added new tests for session_log and fixed the inconsistency between login and logout."'. [#53247](https://github.com/ClickHouse/ClickHouse/pull/53247) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Improve CHECK TABLE system query"'. [#53272](https://github.com/ClickHouse/ClickHouse/pull/53272) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "#2 Added new tests for session_log and fixed the inconsistency between login and logout."'. [#53294](https://github.com/ClickHouse/ClickHouse/pull/53294) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Documentation: add Ibis project to the integrations section"'. [#53374](https://github.com/ClickHouse/ClickHouse/pull/53374) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Planner prepare filters for analysis"'. [#53782](https://github.com/ClickHouse/ClickHouse/pull/53782) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "dateDiff: add support for plural units."'. [#53795](https://github.com/ClickHouse/ClickHouse/pull/53795) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Fixed wrong python test name pattern"'. [#53929](https://github.com/ClickHouse/ClickHouse/pull/53929) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Fix bug on mutations with subcolumns of type JSON in predicates of UPDATE and DELETE queries."'. [#54063](https://github.com/ClickHouse/ClickHouse/pull/54063) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* clickhouse-copier add check drop partition [#35263](https://github.com/ClickHouse/ClickHouse/pull/35263) ([sunny](https://github.com/sunny19930321)). +* Add more checks into ThreadStatus ctor. [#42019](https://github.com/ClickHouse/ClickHouse/pull/42019) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Refactor Query Tree visitor [#46740](https://github.com/ClickHouse/ClickHouse/pull/46740) ([Dmitry Novik](https://github.com/novikd)). +* Revert "Revert "Randomize JIT settings in tests"" [#48282](https://github.com/ClickHouse/ClickHouse/pull/48282) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix outdated cache configuration in s3 tests: s3_storage_policy_by_defau… [#48424](https://github.com/ClickHouse/ClickHouse/pull/48424) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix IN with decimal in analyzer [#48754](https://github.com/ClickHouse/ClickHouse/pull/48754) ([vdimir](https://github.com/vdimir)). +* Some unclear change in StorageBuffer::reschedule() for something [#49723](https://github.com/ClickHouse/ClickHouse/pull/49723) ([DimasKovas](https://github.com/DimasKovas)). +* MergeTree & SipHash checksum big-endian support [#50276](https://github.com/ClickHouse/ClickHouse/pull/50276) ([ltrk2](https://github.com/ltrk2)). +* Maintain same aggregate function merge behavior for small and big endian machine [#50609](https://github.com/ClickHouse/ClickHouse/pull/50609) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Add a test to limit client max opening fd [#51213](https://github.com/ClickHouse/ClickHouse/pull/51213) ([Duc Canh Le](https://github.com/canhld94)). +* Add info about acquired space in cache to not enough space error [#51537](https://github.com/ClickHouse/ClickHouse/pull/51537) ([vdimir](https://github.com/vdimir)). +* KeeperDispatcher: remove reductant lock as the ConcurrentBoundedQueue is thread-safe [#51766](https://github.com/ClickHouse/ClickHouse/pull/51766) ([frinkr](https://github.com/frinkr)). +* Fix build type in packager [#51771](https://github.com/ClickHouse/ClickHouse/pull/51771) ([Antonio Andelic](https://github.com/antonio2368)). +* metrics_perf_events_enabled turn off in perf tests [#52072](https://github.com/ClickHouse/ClickHouse/pull/52072) ([Sema Checherinda](https://github.com/CheSema)). +* Remove try/catch from DatabaseFilesystem [#52155](https://github.com/ClickHouse/ClickHouse/pull/52155) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test that clickhouse-client or local do not throw/catch on startup [#52159](https://github.com/ClickHouse/ClickHouse/pull/52159) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Retry blob listing in test_alter_moving_garbage [#52193](https://github.com/ClickHouse/ClickHouse/pull/52193) ([vdimir](https://github.com/vdimir)). +* Try to make `test_kafka_formats_with_broken_message` and `test_kafka_formats` integration tests stable [#52273](https://github.com/ClickHouse/ClickHouse/pull/52273) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Kill the runner process with all subprocesses [#52277](https://github.com/ClickHouse/ClickHouse/pull/52277) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Implement endianness-indepedent support for MergeTree checksums [#52329](https://github.com/ClickHouse/ClickHouse/pull/52329) ([ltrk2](https://github.com/ltrk2)). +* add tests with connection reset by peer error, and retry it inside client [#52441](https://github.com/ClickHouse/ClickHouse/pull/52441) ([Sema Checherinda](https://github.com/CheSema)). +* Fix logging for asynchronous non-batched distributed sends [#52583](https://github.com/ClickHouse/ClickHouse/pull/52583) ([Azat Khuzhin](https://github.com/azat)). +* Follow-up to "Implement support of encrypted elements in configuration file" [#52609](https://github.com/ClickHouse/ClickHouse/pull/52609) ([Robert Schulze](https://github.com/rschu1ze)). +* Return zxid from TestKeeper and in multi responses [#52618](https://github.com/ClickHouse/ClickHouse/pull/52618) ([Alexander Gololobov](https://github.com/davenger)). +* Analyzer: Support ARRAY JOIN COLUMNS(...) syntax [#52622](https://github.com/ClickHouse/ClickHouse/pull/52622) ([Dmitry Novik](https://github.com/novikd)). +* Fix stress test: check if storage shutdown before we operate MergeTreeDeduplicationLog [#52623](https://github.com/ClickHouse/ClickHouse/pull/52623) ([Han Fei](https://github.com/hanfei1991)). +* Suspicious DISTINCT crashes from sqlancer [#52636](https://github.com/ClickHouse/ClickHouse/pull/52636) ([Igor Nikonov](https://github.com/devcrafter)). +* Partially fixed test 01747_system_session_log_long [#52640](https://github.com/ClickHouse/ClickHouse/pull/52640) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Check for unexpected Cyrillic [#52641](https://github.com/ClickHouse/ClickHouse/pull/52641) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `test_keeper_reconfig_replace_leader` [#52651](https://github.com/ClickHouse/ClickHouse/pull/52651) ([Antonio Andelic](https://github.com/antonio2368)). +* Rename setting disable_url_encoding to enable_url_encoding and add a test [#52656](https://github.com/ClickHouse/ClickHouse/pull/52656) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove creation of a unnecessary temporary ContextAccess on login [#52660](https://github.com/ClickHouse/ClickHouse/pull/52660) ([Vitaly Baranov](https://github.com/vitlibar)). +* Update version after release [#52661](https://github.com/ClickHouse/ClickHouse/pull/52661) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.7.1.2470-stable [#52664](https://github.com/ClickHouse/ClickHouse/pull/52664) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix bugs and better test for SYSTEM STOP LISTEN [#52680](https://github.com/ClickHouse/ClickHouse/pull/52680) ([Nikolay Degterinsky](https://github.com/evillique)). +* Remove unneeded readBinary() specializations + update docs [#52683](https://github.com/ClickHouse/ClickHouse/pull/52683) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove remainders of legacy setting 'allow_experimental_query_cache' [#52685](https://github.com/ClickHouse/ClickHouse/pull/52685) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix 02417_opentelemetry_insert_on_distributed_table flakiness [#52691](https://github.com/ClickHouse/ClickHouse/pull/52691) ([Azat Khuzhin](https://github.com/azat)). +* Improvements to backup restore disallow_concurrency test [#52709](https://github.com/ClickHouse/ClickHouse/pull/52709) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Move UnlinkMetadataFileOperationOutcome to common header [#52710](https://github.com/ClickHouse/ClickHouse/pull/52710) ([Alexander Gololobov](https://github.com/davenger)). +* Improve endianness-independent support for hash functions [#52712](https://github.com/ClickHouse/ClickHouse/pull/52712) ([ltrk2](https://github.com/ltrk2)). +* Allow reading zero objects in CachedObjectStorage::readObjects() [#52733](https://github.com/ClickHouse/ClickHouse/pull/52733) ([Michael Kolupaev](https://github.com/al13n321)). +* Merging reading from archives [#50321](https://github.com/ClickHouse/ClickHouse/issues/50321) [#52734](https://github.com/ClickHouse/ClickHouse/pull/52734) ([Antonio Andelic](https://github.com/antonio2368)). +* Merging [#52640](https://github.com/ClickHouse/ClickHouse/issues/52640) [#52744](https://github.com/ClickHouse/ClickHouse/pull/52744) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer: fix 00979_set_index_not.sql [#52754](https://github.com/ClickHouse/ClickHouse/pull/52754) ([Igor Nikonov](https://github.com/devcrafter)). +* Planner prepare filters for analysis [#52762](https://github.com/ClickHouse/ClickHouse/pull/52762) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow reading empty file with no blobs [#52763](https://github.com/ClickHouse/ClickHouse/pull/52763) ([Alexander Gololobov](https://github.com/davenger)). +* Fix: check correctly window frame bounds for RANGE [#52768](https://github.com/ClickHouse/ClickHouse/pull/52768) ([Igor Nikonov](https://github.com/devcrafter)). +* Numerical stability of the test for Polygons [#52769](https://github.com/ClickHouse/ClickHouse/pull/52769) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Change the default timezones in Docker test images [#52772](https://github.com/ClickHouse/ClickHouse/pull/52772) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Upload build statistics to the CI database [#52773](https://github.com/ClickHouse/ClickHouse/pull/52773) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `instance_type` information to the CI database [#52774](https://github.com/ClickHouse/ClickHouse/pull/52774) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove Coverity (part 2) [#52775](https://github.com/ClickHouse/ClickHouse/pull/52775) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a tool to upload `-ftime-trace` to ClickHouse [#52776](https://github.com/ClickHouse/ClickHouse/pull/52776) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Revert revert of system drop filesystem cache by key [#52778](https://github.com/ClickHouse/ClickHouse/pull/52778) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove obsolete part of a check name [#52793](https://github.com/ClickHouse/ClickHouse/pull/52793) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Maybe fix TLS tests [#52796](https://github.com/ClickHouse/ClickHouse/pull/52796) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow OOM in Stress and Upgrade checks [#52807](https://github.com/ClickHouse/ClickHouse/pull/52807) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not test upper bounds for throttlers [#52821](https://github.com/ClickHouse/ClickHouse/pull/52821) ([Sergei Trifonov](https://github.com/serxa)). +* Add more logging and touch test for materialize mysql [#52822](https://github.com/ClickHouse/ClickHouse/pull/52822) ([alesapin](https://github.com/alesapin)). +* Try to remove more leftovers. [#52823](https://github.com/ClickHouse/ClickHouse/pull/52823) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update test_crash_log/test.py [#52825](https://github.com/ClickHouse/ClickHouse/pull/52825) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Don't report LOGICAL_ERROR if a file got truncated during read [#52828](https://github.com/ClickHouse/ClickHouse/pull/52828) ([Michael Kolupaev](https://github.com/al13n321)). +* Throw S3Exception whenever possible. [#52829](https://github.com/ClickHouse/ClickHouse/pull/52829) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Increase min protocol version for sparse serialization [#52835](https://github.com/ClickHouse/ClickHouse/pull/52835) ([Anton Popov](https://github.com/CurtizJ)). +* Cleanup localBackup [#52837](https://github.com/ClickHouse/ClickHouse/pull/52837) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Try to fix 02352_rwlock [#52852](https://github.com/ClickHouse/ClickHouse/pull/52852) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Disable a couple of long tests for debug build. [#52854](https://github.com/ClickHouse/ClickHouse/pull/52854) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix flaky tests in test_merge_tree_azure_blob_storage & test_storage_azure_blob_storage [#52855](https://github.com/ClickHouse/ClickHouse/pull/52855) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Cancel merges before renaming a system log table [#52858](https://github.com/ClickHouse/ClickHouse/pull/52858) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Try to fix a rare fail in 00612_http_max_query_size [#52859](https://github.com/ClickHouse/ClickHouse/pull/52859) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove duplicated dialect setting value [#52864](https://github.com/ClickHouse/ClickHouse/pull/52864) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Significant improvement of rust caching [#52865](https://github.com/ClickHouse/ClickHouse/pull/52865) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Docker improvements [#52869](https://github.com/ClickHouse/ClickHouse/pull/52869) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Try to continue clickhouse process in stress test after terminating gdb. [#52871](https://github.com/ClickHouse/ClickHouse/pull/52871) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* fix master ci for [#52091](https://github.com/ClickHouse/ClickHouse/issues/52091) [#52873](https://github.com/ClickHouse/ClickHouse/pull/52873) ([Han Fei](https://github.com/hanfei1991)). +* Fix the PR body check for `Reverts #number` [#52874](https://github.com/ClickHouse/ClickHouse/pull/52874) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Analyzer WITH statement references test [#52875](https://github.com/ClickHouse/ClickHouse/pull/52875) ([Maksim Kita](https://github.com/kitaisreal)). +* Disable more tests for debug. [#52878](https://github.com/ClickHouse/ClickHouse/pull/52878) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix assertion in mutations with transactions [#52894](https://github.com/ClickHouse/ClickHouse/pull/52894) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed test_profile_max_sessions_for_user test flakiness [#52897](https://github.com/ClickHouse/ClickHouse/pull/52897) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Use concepts to replace more std::enable_if_t [#52898](https://github.com/ClickHouse/ClickHouse/pull/52898) ([flynn](https://github.com/ucasfl)). +* Disable `test_reconfig_replace_leader_in_one_command` [#52901](https://github.com/ClickHouse/ClickHouse/pull/52901) ([Antonio Andelic](https://github.com/antonio2368)). +* tests: fix possible EADDRINUSE v2 [#52906](https://github.com/ClickHouse/ClickHouse/pull/52906) ([Azat Khuzhin](https://github.com/azat)). +* Merging [#52897](https://github.com/ClickHouse/ClickHouse/issues/52897) [#52907](https://github.com/ClickHouse/ClickHouse/pull/52907) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove obsolete `no-upgrade-check` tag [#52915](https://github.com/ClickHouse/ClickHouse/pull/52915) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky test_storage_s3_queue::test_multiple_tables_streaming_sync_distributed [#52944](https://github.com/ClickHouse/ClickHouse/pull/52944) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Don't create empty parts on drop partittion if we have a transaction [#52945](https://github.com/ClickHouse/ClickHouse/pull/52945) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer: fix WITH clause resolving [#52947](https://github.com/ClickHouse/ClickHouse/pull/52947) ([Dmitry Novik](https://github.com/novikd)). +* Refactor CI_CONFIG [#52948](https://github.com/ClickHouse/ClickHouse/pull/52948) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Try to fix assert in remove redundant sorting [#52950](https://github.com/ClickHouse/ClickHouse/pull/52950) ([Igor Nikonov](https://github.com/devcrafter)). +* Remove unused code in StorageSystemStackTrace [#52952](https://github.com/ClickHouse/ClickHouse/pull/52952) ([Azat Khuzhin](https://github.com/azat)). +* Fix wrong error code "BAD_GET" [#52954](https://github.com/ClickHouse/ClickHouse/pull/52954) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix some issues with databases [#52956](https://github.com/ClickHouse/ClickHouse/pull/52956) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix config update in HTTP Header Filtering [#52957](https://github.com/ClickHouse/ClickHouse/pull/52957) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added peak_memory_usage to clickhouse-client final progress message [#52961](https://github.com/ClickHouse/ClickHouse/pull/52961) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* tests: fix 01293_client_interactive_vertical_multiline flakiness (increase timeout) [#52965](https://github.com/ClickHouse/ClickHouse/pull/52965) ([Azat Khuzhin](https://github.com/azat)). +* Added TSAN option report_atomic_races=0 for test_max_sessions_for_user [#52969](https://github.com/ClickHouse/ClickHouse/pull/52969) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* MaterializedMySQL: Add tests for unquoted utf8 column names in DML [#52971](https://github.com/ClickHouse/ClickHouse/pull/52971) ([Val Doroshchuk](https://github.com/valbok)). +* Update version_date.tsv and changelogs after v23.7.2.25-stable [#52976](https://github.com/ClickHouse/ClickHouse/pull/52976) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Decrease a num of tries for a couple of too slow tests for debug. [#52981](https://github.com/ClickHouse/ClickHouse/pull/52981) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix test `00061_storage_buffer` [#52983](https://github.com/ClickHouse/ClickHouse/pull/52983) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove `test_host_regexp_multiple_ptr_records_concurrent`, CC @arthurpassos [#52984](https://github.com/ClickHouse/ClickHouse/pull/52984) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `test_zookeeper_config` [#52988](https://github.com/ClickHouse/ClickHouse/pull/52988) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove assertion from test_no_ttl_merges_in_busy_pool [#52989](https://github.com/ClickHouse/ClickHouse/pull/52989) ([alesapin](https://github.com/alesapin)). +* Fix `test_dictionary_custom_settings` [#52990](https://github.com/ClickHouse/ClickHouse/pull/52990) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test [#53007](https://github.com/ClickHouse/ClickHouse/pull/53007) ([alesapin](https://github.com/alesapin)). +* Fix default port for Keeper Client [#53010](https://github.com/ClickHouse/ClickHouse/pull/53010) ([pufit](https://github.com/pufit)). +* Add a test to broken tests (Analyzer) [#53013](https://github.com/ClickHouse/ClickHouse/pull/53013) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Implement big-endian support for transform [#53015](https://github.com/ClickHouse/ClickHouse/pull/53015) ([ltrk2](https://github.com/ltrk2)). +* Fix completion for clickhouse-keeper-client [#53029](https://github.com/ClickHouse/ClickHouse/pull/53029) ([Azat Khuzhin](https://github.com/azat)). +* clickhouse-keeper-client: fix version parsing for set command [#53031](https://github.com/ClickHouse/ClickHouse/pull/53031) ([Azat Khuzhin](https://github.com/azat)). +* MaterializedMySQL: Add tests to alter named collections [#53032](https://github.com/ClickHouse/ClickHouse/pull/53032) ([Val Doroshchuk](https://github.com/valbok)). +* Fix description for 's3_upload_part_size_multiply_parts_count_threshold' setting [#53042](https://github.com/ClickHouse/ClickHouse/pull/53042) ([Elena Torró](https://github.com/elenatorro)). +* Update 01114_database_atomic.sh [#53043](https://github.com/ClickHouse/ClickHouse/pull/53043) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Revert revert of "Remove try/catch from DatabaseFilesystem" [#53045](https://github.com/ClickHouse/ClickHouse/pull/53045) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix cache related logical error in stress tests [#53047](https://github.com/ClickHouse/ClickHouse/pull/53047) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove upgrade checks with sanitizers [#53051](https://github.com/ClickHouse/ClickHouse/pull/53051) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Increase election timeout in integration tests [#53052](https://github.com/ClickHouse/ClickHouse/pull/53052) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer: do not enable it for old servers in tests [#53053](https://github.com/ClickHouse/ClickHouse/pull/53053) ([Dmitry Novik](https://github.com/novikd)). +* Try to make `01414_mutations_and_errors_zookeeper` less flaky [#53056](https://github.com/ClickHouse/ClickHouse/pull/53056) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix test `02434_cancel_insert_when_client_dies` [#53062](https://github.com/ClickHouse/ClickHouse/pull/53062) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `abort_on_error=1` to `TSAN_OPTIONS` [#53065](https://github.com/ClickHouse/ClickHouse/pull/53065) ([Nikita Taranov](https://github.com/nickitat)). +* Fix Parquet stats for Float32 and Float64 [#53067](https://github.com/ClickHouse/ClickHouse/pull/53067) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix a comment [#53072](https://github.com/ClickHouse/ClickHouse/pull/53072) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix 02263_format_insert_settings flakiness [#53080](https://github.com/ClickHouse/ClickHouse/pull/53080) ([Azat Khuzhin](https://github.com/azat)). +* Something with tests [#53081](https://github.com/ClickHouse/ClickHouse/pull/53081) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.7.3.14-stable [#53084](https://github.com/ClickHouse/ClickHouse/pull/53084) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Simplify system logs creation [#53085](https://github.com/ClickHouse/ClickHouse/pull/53085) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix hung check in stress test [#53090](https://github.com/ClickHouse/ClickHouse/pull/53090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add clusters for running tests locally easily [#53091](https://github.com/ClickHouse/ClickHouse/pull/53091) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wording [#53092](https://github.com/ClickHouse/ClickHouse/pull/53092) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update README.md [#53097](https://github.com/ClickHouse/ClickHouse/pull/53097) ([Tyler Hannan](https://github.com/tylerhannan)). +* Remove old util [#53099](https://github.com/ClickHouse/ClickHouse/pull/53099) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add optional parameters to Buffer Engine definition [#53102](https://github.com/ClickHouse/ClickHouse/pull/53102) ([Elena Torró](https://github.com/elenatorro)). +* Compatibility with clang-17 [#53104](https://github.com/ClickHouse/ClickHouse/pull/53104) ([Raúl Marín](https://github.com/Algunenano)). +* Remove duplicate test: `test_concurrent_alter_with_ttl_move` [#53107](https://github.com/ClickHouse/ClickHouse/pull/53107) ([alesapin](https://github.com/alesapin)). +* Relax flaky test `test_s3_engine_heavy_write_check_mem` [#53108](https://github.com/ClickHouse/ClickHouse/pull/53108) ([alesapin](https://github.com/alesapin)). +* Update PocoHTTPClient.cpp [#53109](https://github.com/ClickHouse/ClickHouse/pull/53109) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add hints for HTTP handlers [#53110](https://github.com/ClickHouse/ClickHouse/pull/53110) ([Ruslan Mardugalliamov](https://github.com/rmarduga)). +* Revert changes in `ZstdDeflatingAppendableWriteBuffer` [#53111](https://github.com/ClickHouse/ClickHouse/pull/53111) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky test by using azure_query function [#53113](https://github.com/ClickHouse/ClickHouse/pull/53113) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Update `test_restore_replica` [#53119](https://github.com/ClickHouse/ClickHouse/pull/53119) ([Alexander Tokmakov](https://github.com/tavplubix)). +* do not fail if prctl is not allowed ([#43589](https://github.com/ClickHouse/ClickHouse/issues/43589)) [#53122](https://github.com/ClickHouse/ClickHouse/pull/53122) ([ekrasikov](https://github.com/ekrasikov)). +* Use more unique name for TemporaryFileOnDisk [#53123](https://github.com/ClickHouse/ClickHouse/pull/53123) ([Vitaly Baranov](https://github.com/vitlibar)). +* Update `Mergeable Check` at the finishing CI [#53126](https://github.com/ClickHouse/ClickHouse/pull/53126) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Added retry for TransportException in azure blob storage [#53128](https://github.com/ClickHouse/ClickHouse/pull/53128) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Small fix for HTTPHeaderFilter [#53146](https://github.com/ClickHouse/ClickHouse/pull/53146) ([San](https://github.com/santrancisco)). +* Added functions to disallow concurrency of backup restore test [#53150](https://github.com/ClickHouse/ClickHouse/pull/53150) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Attempt to fix test_insert_quorum by adding sync second replica [#53155](https://github.com/ClickHouse/ClickHouse/pull/53155) ([vdimir](https://github.com/vdimir)). +* fix mem leak in RegExpTreeDictionary [#53160](https://github.com/ClickHouse/ClickHouse/pull/53160) ([Han Fei](https://github.com/hanfei1991)). +* Fixes for detach/attach partition and broken detached parts cleanup [#53164](https://github.com/ClickHouse/ClickHouse/pull/53164) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update conftest.py [#53166](https://github.com/ClickHouse/ClickHouse/pull/53166) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow experimantal features when recovering Replicated db replica [#53167](https://github.com/ClickHouse/ClickHouse/pull/53167) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update version_date.tsv and changelogs after v23.7.4.5-stable [#53169](https://github.com/ClickHouse/ClickHouse/pull/53169) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Analyzer: fix test_system_flush_logs [#53171](https://github.com/ClickHouse/ClickHouse/pull/53171) ([Dmitry Novik](https://github.com/novikd)). +* Fix warning in test_replicated_database [#53173](https://github.com/ClickHouse/ClickHouse/pull/53173) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix: 00838_unique_index test with analyzer [#53175](https://github.com/ClickHouse/ClickHouse/pull/53175) ([Igor Nikonov](https://github.com/devcrafter)). +* Improved efficiency for array operations [#53193](https://github.com/ClickHouse/ClickHouse/pull/53193) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Improve reading from archives [#53198](https://github.com/ClickHouse/ClickHouse/pull/53198) ([Antonio Andelic](https://github.com/antonio2368)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Enable hedged requests under tsan [#53219](https://github.com/ClickHouse/ClickHouse/pull/53219) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove garbage [#53241](https://github.com/ClickHouse/ClickHouse/pull/53241) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix LOGICAL_ERROR exception in ALTER query [#53242](https://github.com/ClickHouse/ClickHouse/pull/53242) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix bad test `00417_kill_query` [#53244](https://github.com/ClickHouse/ClickHouse/pull/53244) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `02428_delete_with_settings` [#53246](https://github.com/ClickHouse/ClickHouse/pull/53246) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove unrecognizable garbage from the performance test [#53249](https://github.com/ClickHouse/ClickHouse/pull/53249) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable randomization in `02273_full_sort_join` [#53251](https://github.com/ClickHouse/ClickHouse/pull/53251) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove outdated Dockerfile [#53252](https://github.com/ClickHouse/ClickHouse/pull/53252) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve fs cache cleanup [#53273](https://github.com/ClickHouse/ClickHouse/pull/53273) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add garbage [#53279](https://github.com/ClickHouse/ClickHouse/pull/53279) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Inhibit randomization in `00906_low_cardinality_cache` [#53283](https://github.com/ClickHouse/ClickHouse/pull/53283) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test 01169_old_alter_partition_isolation_stress [#53292](https://github.com/ClickHouse/ClickHouse/pull/53292) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove no-parallel tag from some tests [#53295](https://github.com/ClickHouse/ClickHouse/pull/53295) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix test `00002_log_and_exception_messages_formatting` [#53296](https://github.com/ClickHouse/ClickHouse/pull/53296) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `01485_256_bit_multiply` [#53297](https://github.com/ClickHouse/ClickHouse/pull/53297) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove flaky tests for the experimental `UNDROP` feature [#53298](https://github.com/ClickHouse/ClickHouse/pull/53298) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added test for session_log using remote and mysql sessions [#53304](https://github.com/ClickHouse/ClickHouse/pull/53304) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Added integration test for session_log using concurrrent GRPC/PostgreSQL/MySQL sessions [#53305](https://github.com/ClickHouse/ClickHouse/pull/53305) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Added test for session_log using concurrrent TCP/HTTP/MySQL sessions [#53306](https://github.com/ClickHouse/ClickHouse/pull/53306) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Added test for session_log dropping user/role/profile currently used in active session [#53307](https://github.com/ClickHouse/ClickHouse/pull/53307) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Added an integration test for client peak_memory_usage value [#53308](https://github.com/ClickHouse/ClickHouse/pull/53308) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix log message [#53339](https://github.com/ClickHouse/ClickHouse/pull/53339) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Analyzer: fix quotas for system tables [#53343](https://github.com/ClickHouse/ClickHouse/pull/53343) ([Dmitry Novik](https://github.com/novikd)). +* Relax mergeable check [#53344](https://github.com/ClickHouse/ClickHouse/pull/53344) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add clickhouse-keeper-client and clickhouse-keeper-converter symlinks to clickhouse-keeper package [#53357](https://github.com/ClickHouse/ClickHouse/pull/53357) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Add linux s390x to universal installer [#53358](https://github.com/ClickHouse/ClickHouse/pull/53358) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Make one exception message longer [#53375](https://github.com/ClickHouse/ClickHouse/pull/53375) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong query in log messages check [#53376](https://github.com/ClickHouse/ClickHouse/pull/53376) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Non-significant changes [#53377](https://github.com/ClickHouse/ClickHouse/pull/53377) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Inhibit randomization in more tests [#53378](https://github.com/ClickHouse/ClickHouse/pull/53378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make some Keeper exceptions more structured [#53379](https://github.com/ClickHouse/ClickHouse/pull/53379) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Follow-up to [#52695](https://github.com/ClickHouse/ClickHouse/issues/52695): Move tests to a more appropriate place [#53400](https://github.com/ClickHouse/ClickHouse/pull/53400) ([Robert Schulze](https://github.com/rschu1ze)). +* Minor fixes (hints for wrong DB or table name) [#53402](https://github.com/ClickHouse/ClickHouse/pull/53402) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Quick fail undocumented features [#53413](https://github.com/ClickHouse/ClickHouse/pull/53413) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* boost getNestedColumnWithDefaultOnNull by insertManyDefaults [#53414](https://github.com/ClickHouse/ClickHouse/pull/53414) ([frinkr](https://github.com/frinkr)). +* Update test_distributed_inter_server_secret to pass with analyzer [#53416](https://github.com/ClickHouse/ClickHouse/pull/53416) ([vdimir](https://github.com/vdimir)). +* Parallel replicas: remove unnecessary code [#53419](https://github.com/ClickHouse/ClickHouse/pull/53419) ([Igor Nikonov](https://github.com/devcrafter)). +* Refactorings for configuration of in-memory caches [#53422](https://github.com/ClickHouse/ClickHouse/pull/53422) ([Robert Schulze](https://github.com/rschu1ze)). +* Less exceptions with runtime format string [#53424](https://github.com/ClickHouse/ClickHouse/pull/53424) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer: fix virtual columns in StorageDistributed [#53426](https://github.com/ClickHouse/ClickHouse/pull/53426) ([Dmitry Novik](https://github.com/novikd)). +* Fix creation of empty parts [#53429](https://github.com/ClickHouse/ClickHouse/pull/53429) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Merging [#53177](https://github.com/ClickHouse/ClickHouse/issues/53177) [#53430](https://github.com/ClickHouse/ClickHouse/pull/53430) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Merging [#53142](https://github.com/ClickHouse/ClickHouse/issues/53142) [#53431](https://github.com/ClickHouse/ClickHouse/pull/53431) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not send logs to CI if the credentials are not set [#53441](https://github.com/ClickHouse/ClickHouse/pull/53441) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Minor: Factorize constants in Annoy index [#53444](https://github.com/ClickHouse/ClickHouse/pull/53444) ([Robert Schulze](https://github.com/rschu1ze)). +* Restart killed PublishedReleaseCI workflows [#53445](https://github.com/ClickHouse/ClickHouse/pull/53445) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Follow-up: Do not send logs to CI if the credentials are not set [#53456](https://github.com/ClickHouse/ClickHouse/pull/53456) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Merging [#53307](https://github.com/ClickHouse/ClickHouse/issues/53307) [#53472](https://github.com/ClickHouse/ClickHouse/pull/53472) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Merging [#53306](https://github.com/ClickHouse/ClickHouse/issues/53306) [#53473](https://github.com/ClickHouse/ClickHouse/pull/53473) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Merging [#53304](https://github.com/ClickHouse/ClickHouse/issues/53304) [#53474](https://github.com/ClickHouse/ClickHouse/pull/53474) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Merging [#53373](https://github.com/ClickHouse/ClickHouse/issues/53373) [#53475](https://github.com/ClickHouse/ClickHouse/pull/53475) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky test `02443_detach_attach_partition` [#53478](https://github.com/ClickHouse/ClickHouse/pull/53478) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove outdated code in ReplicatedMergeTreeQueue::initialize() [#53484](https://github.com/ClickHouse/ClickHouse/pull/53484) ([Azat Khuzhin](https://github.com/azat)). +* krb5: Fix CVE-2023-36054 [#53485](https://github.com/ClickHouse/ClickHouse/pull/53485) ([Robert Schulze](https://github.com/rschu1ze)). +* curl: update to latest master (fixes CVE-2023-32001) [#53487](https://github.com/ClickHouse/ClickHouse/pull/53487) ([Robert Schulze](https://github.com/rschu1ze)). +* Update boost to 1.79 [#53490](https://github.com/ClickHouse/ClickHouse/pull/53490) ([Robert Schulze](https://github.com/rschu1ze)). +* Get rid of secrets CLICKHOUSE_CI_LOGS [#53491](https://github.com/ClickHouse/ClickHouse/pull/53491) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update style checker [#53493](https://github.com/ClickHouse/ClickHouse/pull/53493) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update materialized_with_ddl.py [#53494](https://github.com/ClickHouse/ClickHouse/pull/53494) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix a race condition between RESTART REPLICAS and DROP DATABASE [#53495](https://github.com/ClickHouse/ClickHouse/pull/53495) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix tiny thing in Replicated database [#53496](https://github.com/ClickHouse/ClickHouse/pull/53496) ([Nikolay Degterinsky](https://github.com/evillique)). +* Simplify performance test [#53499](https://github.com/ClickHouse/ClickHouse/pull/53499) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added waiting for PostgreSQL compatibility port open in integrational tests. [#53505](https://github.com/ClickHouse/ClickHouse/pull/53505) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Allow non standalone keeper run in integration tests [#53512](https://github.com/ClickHouse/ClickHouse/pull/53512) ([Duc Canh Le](https://github.com/canhld94)). +* Make sending logs to the cloud less fragile (and fix an unrelated flaky test) [#53528](https://github.com/ClickHouse/ClickHouse/pull/53528) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update test.py [#53534](https://github.com/ClickHouse/ClickHouse/pull/53534) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `AddressSanitizer failed to allocate 0x0 (0) bytes of SetAlternateSignalStack` in integration tests [#53535](https://github.com/ClickHouse/ClickHouse/pull/53535) ([Nikita Taranov](https://github.com/nickitat)). +* Fix keeper default path check [#53539](https://github.com/ClickHouse/ClickHouse/pull/53539) ([pufit](https://github.com/pufit)). +* Follow-up to [#53528](https://github.com/ClickHouse/ClickHouse/issues/53528) [#53544](https://github.com/ClickHouse/ClickHouse/pull/53544) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update 00002_log_and_exception_messages_formatting.sql [#53545](https://github.com/ClickHouse/ClickHouse/pull/53545) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update krb5 to 1.21.2 [#53552](https://github.com/ClickHouse/ClickHouse/pull/53552) ([Robert Schulze](https://github.com/rschu1ze)). +* Enable ISA-L on x86-64 only by default [#53553](https://github.com/ClickHouse/ClickHouse/pull/53553) ([ltrk2](https://github.com/ltrk2)). +* Change Big Endian-UUID to work the same as Little Endian-UUID [#53556](https://github.com/ClickHouse/ClickHouse/pull/53556) ([Austin Kothig](https://github.com/kothiga)). +* Bump openldap to LTS version (v2.5.16) [#53558](https://github.com/ClickHouse/ClickHouse/pull/53558) ([Robert Schulze](https://github.com/rschu1ze)). +* Update 02443_detach_attach_partition.sh [#53564](https://github.com/ClickHouse/ClickHouse/pull/53564) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Proper destruction of task in ShellCommandSource [#53573](https://github.com/ClickHouse/ClickHouse/pull/53573) ([Amos Bird](https://github.com/amosbird)). +* Fix for flaky test_ssl_cert_authentication [#53586](https://github.com/ClickHouse/ClickHouse/pull/53586) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* AARCH64 Neon memequal wide [#53588](https://github.com/ClickHouse/ClickHouse/pull/53588) ([Maksim Kita](https://github.com/kitaisreal)). +* Experiment Aggregator merge and destroy states in batch [#53589](https://github.com/ClickHouse/ClickHouse/pull/53589) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix test `02102_row_binary_with_names_and_types` [#53592](https://github.com/ClickHouse/ClickHouse/pull/53592) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove useless test [#53599](https://github.com/ClickHouse/ClickHouse/pull/53599) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Simplify test `01600_parts_types_metrics_long` [#53606](https://github.com/ClickHouse/ClickHouse/pull/53606) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* :lipstick: [S3::URI] Fix comment typos around versionId [#53607](https://github.com/ClickHouse/ClickHouse/pull/53607) ([Tomáš Hromada](https://github.com/gyfis)). +* Fix upgrade check [#53611](https://github.com/ClickHouse/ClickHouse/pull/53611) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Cleanup cluster test: remove unnecessary zookeeper [#53617](https://github.com/ClickHouse/ClickHouse/pull/53617) ([Igor Nikonov](https://github.com/devcrafter)). +* Bump boost to 1.80 [#53625](https://github.com/ClickHouse/ClickHouse/pull/53625) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v23.3.9.55-lts [#53626](https://github.com/ClickHouse/ClickHouse/pull/53626) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* CMake small refactoring [#53628](https://github.com/ClickHouse/ClickHouse/pull/53628) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix data race of shell command [#53631](https://github.com/ClickHouse/ClickHouse/pull/53631) ([Amos Bird](https://github.com/amosbird)). +* Fix 02443_detach_attach_partition [#53633](https://github.com/ClickHouse/ClickHouse/pull/53633) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add default timeout value for ClickHouseHelper [#53639](https://github.com/ClickHouse/ClickHouse/pull/53639) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Implement support for more aggregate functions on big-endian [#53650](https://github.com/ClickHouse/ClickHouse/pull/53650) ([ltrk2](https://github.com/ltrk2)). +* fix Logical Error in AsynchronousBoundedReadBuffer [#53651](https://github.com/ClickHouse/ClickHouse/pull/53651) ([Sema Checherinda](https://github.com/CheSema)). +* State of State and avg aggregation function fix for big endian [#53655](https://github.com/ClickHouse/ClickHouse/pull/53655) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Resubmit [#50171](https://github.com/ClickHouse/ClickHouse/issues/50171) [#53678](https://github.com/ClickHouse/ClickHouse/pull/53678) ([alesapin](https://github.com/alesapin)). +* Bump boost to 1.81 [#53679](https://github.com/ClickHouse/ClickHouse/pull/53679) ([Robert Schulze](https://github.com/rschu1ze)). +* Whitespaces [#53690](https://github.com/ClickHouse/ClickHouse/pull/53690) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove bad test [#53691](https://github.com/ClickHouse/ClickHouse/pull/53691) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad path format in logs [#53693](https://github.com/ClickHouse/ClickHouse/pull/53693) ([alesapin](https://github.com/alesapin)). +* Correct a functional test to not use endianness-specific input [#53697](https://github.com/ClickHouse/ClickHouse/pull/53697) ([ltrk2](https://github.com/ltrk2)). +* Fix running clickhouse-test with python 3.8 [#53700](https://github.com/ClickHouse/ClickHouse/pull/53700) ([Dmitry Novik](https://github.com/novikd)). +* refactor some old code [#53704](https://github.com/ClickHouse/ClickHouse/pull/53704) ([flynn](https://github.com/ucasfl)). +* Fixed wrong python test name pattern [#53713](https://github.com/ClickHouse/ClickHouse/pull/53713) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix flaky `shutdown_wait_unfinished_queries` integration test [#53714](https://github.com/ClickHouse/ClickHouse/pull/53714) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Update version_date.tsv and changelogs after v23.3.10.5-lts [#53733](https://github.com/ClickHouse/ClickHouse/pull/53733) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix flaky test_storage_s3_queue/test.py::test_delete_after_processing [#53736](https://github.com/ClickHouse/ClickHouse/pull/53736) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix broken `02862_sorted_distinct_sparse_fix` [#53738](https://github.com/ClickHouse/ClickHouse/pull/53738) ([Antonio Andelic](https://github.com/antonio2368)). +* Do not warn about arch_sys_counter clock [#53739](https://github.com/ClickHouse/ClickHouse/pull/53739) ([Artur Malchanau](https://github.com/Hexta)). +* Add some profile events [#53741](https://github.com/ClickHouse/ClickHouse/pull/53741) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support clang-18 (Wmissing-field-initializers) [#53751](https://github.com/ClickHouse/ClickHouse/pull/53751) ([Raúl Marín](https://github.com/Algunenano)). +* Upgrade openSSL to v3.0.10 [#53756](https://github.com/ClickHouse/ClickHouse/pull/53756) ([bhavnajindal](https://github.com/bhavnajindal)). +* Improve JSON-handling on s390x [#53760](https://github.com/ClickHouse/ClickHouse/pull/53760) ([ltrk2](https://github.com/ltrk2)). +* Reduce API calls to SSM client [#53762](https://github.com/ClickHouse/ClickHouse/pull/53762) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove branch references from .gitmodules [#53763](https://github.com/ClickHouse/ClickHouse/pull/53763) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix reading from `url` with all filtered paths [#53796](https://github.com/ClickHouse/ClickHouse/pull/53796) ([Antonio Andelic](https://github.com/antonio2368)). +* Follow-up to [#53611](https://github.com/ClickHouse/ClickHouse/issues/53611) [#53799](https://github.com/ClickHouse/ClickHouse/pull/53799) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix a bug in attach partition [#53811](https://github.com/ClickHouse/ClickHouse/pull/53811) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Bump boost to 1.82 [#53812](https://github.com/ClickHouse/ClickHouse/pull/53812) ([Robert Schulze](https://github.com/rschu1ze)). +* Enable producing endianness-independent output in lz4 [#53816](https://github.com/ClickHouse/ClickHouse/pull/53816) ([ltrk2](https://github.com/ltrk2)). +* Fix typo in cluster name. [#53829](https://github.com/ClickHouse/ClickHouse/pull/53829) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update 00002_log_and_exception_messages_formatting.sql [#53839](https://github.com/ClickHouse/ClickHouse/pull/53839) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix after [#51622](https://github.com/ClickHouse/ClickHouse/issues/51622) [#53840](https://github.com/ClickHouse/ClickHouse/pull/53840) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix segfault in `TableNameHints` (with `Lazy` database) [#53849](https://github.com/ClickHouse/ClickHouse/pull/53849) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Follow-up to [#53501](https://github.com/ClickHouse/ClickHouse/issues/53501) [#53851](https://github.com/ClickHouse/ClickHouse/pull/53851) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Follow-up to [#53528](https://github.com/ClickHouse/ClickHouse/issues/53528) [#53852](https://github.com/ClickHouse/ClickHouse/pull/53852) ([Alexander Tokmakov](https://github.com/tavplubix)). +* refactor some code [#53856](https://github.com/ClickHouse/ClickHouse/pull/53856) ([flynn](https://github.com/ucasfl)). +* Bump boost to 1.83 [#53859](https://github.com/ClickHouse/ClickHouse/pull/53859) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove unused parallel replicas coordinator in query info [#53862](https://github.com/ClickHouse/ClickHouse/pull/53862) ([Igor Nikonov](https://github.com/devcrafter)). +* Update version_date.tsv and changelogs after v23.7.5.30-stable [#53870](https://github.com/ClickHouse/ClickHouse/pull/53870) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.6.3.87-stable [#53872](https://github.com/ClickHouse/ClickHouse/pull/53872) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.11.5-lts [#53873](https://github.com/ClickHouse/ClickHouse/pull/53873) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.5.5.92-stable [#53874](https://github.com/ClickHouse/ClickHouse/pull/53874) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.8.21.38-lts [#53875](https://github.com/ClickHouse/ClickHouse/pull/53875) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix: USearch deserialize [#53876](https://github.com/ClickHouse/ClickHouse/pull/53876) ([Davit Vardanyan](https://github.com/davvard)). +* Improve schema inference for archives [#53880](https://github.com/ClickHouse/ClickHouse/pull/53880) ([Antonio Andelic](https://github.com/antonio2368)). +* Make UInt128TrivialHash endianness-independent [#53891](https://github.com/ClickHouse/ClickHouse/pull/53891) ([ltrk2](https://github.com/ltrk2)). +* Use iterators instead of std::ranges [#53893](https://github.com/ClickHouse/ClickHouse/pull/53893) ([ltrk2](https://github.com/ltrk2)). +* Finalize file descriptor in ~WriteBufferToFileSegment [#53895](https://github.com/ClickHouse/ClickHouse/pull/53895) ([vdimir](https://github.com/vdimir)). +* Fix: respect skip_unavailable_shards with parallel replicas [#53904](https://github.com/ClickHouse/ClickHouse/pull/53904) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix flakiness of 00514_interval_operators [#53906](https://github.com/ClickHouse/ClickHouse/pull/53906) ([Michael Kolupaev](https://github.com/al13n321)). +* Change IStorage interface by random walk, no goal in particular [#54009](https://github.com/ClickHouse/ClickHouse/pull/54009) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Refactor logic around async insert with deduplication [#54012](https://github.com/ClickHouse/ClickHouse/pull/54012) ([Antonio Andelic](https://github.com/antonio2368)). +* More assertive [#54044](https://github.com/ClickHouse/ClickHouse/pull/54044) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Correct doc for filesystem_prefetch_max_memory_usage [#54058](https://github.com/ClickHouse/ClickHouse/pull/54058) ([Raúl Marín](https://github.com/Algunenano)). +* Fix after [#52943](https://github.com/ClickHouse/ClickHouse/issues/52943) [#54064](https://github.com/ClickHouse/ClickHouse/pull/54064) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Parse IS NOT DISTINCT and <=> operators [#54067](https://github.com/ClickHouse/ClickHouse/pull/54067) ([vdimir](https://github.com/vdimir)). +* Replace dlcdn.apache.org by archive domain [#54081](https://github.com/ClickHouse/ClickHouse/pull/54081) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Increased log waiting timeout in test_profile_max_sessions_for_user [#54092](https://github.com/ClickHouse/ClickHouse/pull/54092) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Update Dockerfile [#54118](https://github.com/ClickHouse/ClickHouse/pull/54118) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Small improvements in `getAlterMutationCommandsForPart` [#54126](https://github.com/ClickHouse/ClickHouse/pull/54126) ([Anton Popov](https://github.com/CurtizJ)). +* Fix some more analyzer tests [#54128](https://github.com/ClickHouse/ClickHouse/pull/54128) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Disable `01600_parts_types_metrics_long` for asan [#54132](https://github.com/ClickHouse/ClickHouse/pull/54132) ([Antonio Andelic](https://github.com/antonio2368)). +* Fixing 01086_odbc_roundtrip with analyzer. [#54133](https://github.com/ClickHouse/ClickHouse/pull/54133) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add warnings about ingestion script speed and memory usage in Laion dataset instructions [#54153](https://github.com/ClickHouse/ClickHouse/pull/54153) ([Michael Kolupaev](https://github.com/al13n321)). +* tests: mark 02152_http_external_tables_memory_tracking as no-parallel [#54155](https://github.com/ClickHouse/ClickHouse/pull/54155) ([Azat Khuzhin](https://github.com/azat)). +* The external logs have had colliding arguments [#54165](https://github.com/ClickHouse/ClickHouse/pull/54165) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Rename macro [#54169](https://github.com/ClickHouse/ClickHouse/pull/54169) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/changelogs/v23.8.2.7-lts.md b/docs/changelogs/v23.8.2.7-lts.md new file mode 100644 index 00000000000..317e2c6d56a --- /dev/null +++ b/docs/changelogs/v23.8.2.7-lts.md @@ -0,0 +1,18 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.8.2.7-lts (f73c8f37874) FIXME as compared to v23.8.1.2992-lts (ebc7d9a9f3b) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix: parallel replicas over distributed don't read from all replicas [#54199](https://github.com/ClickHouse/ClickHouse/pull/54199) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix: allow IPv6 for bloom filter [#54200](https://github.com/ClickHouse/ClickHouse/pull/54200) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* S3Queue is experimental [#54214](https://github.com/ClickHouse/ClickHouse/pull/54214) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/en/development/build-cross-s390x.md b/docs/en/development/build-cross-s390x.md index be2c37f5f41..088dd6f2679 100644 --- a/docs/en/development/build-cross-s390x.md +++ b/docs/en/development/build-cross-s390x.md @@ -90,34 +90,117 @@ Process 1 stopped ## Visual Studio Code integration -- [CodeLLDB extension](https://github.com/vadimcn/vscode-lldb) is required for visual debugging, the [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [cmake variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md). -- Make sure to set the backend to your llvm installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"` -- Launcher: +- [CodeLLDB](https://github.com/vadimcn/vscode-lldb) extension is required for visual debugging. +- [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [CMake Variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md). +- Make sure to set the backend to your LLVM installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"` +- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this) + +### Example configurations +#### cmake-variants.yaml +```yaml +buildType: + default: relwithdebinfo + choices: + debug: + short: Debug + long: Emit debug information + buildType: Debug + release: + short: Release + long: Optimize generated code + buildType: Release + relwithdebinfo: + short: RelWithDebInfo + long: Release with Debug Info + buildType: RelWithDebInfo + tsan: + short: MinSizeRel + long: Minimum Size Release + buildType: MinSizeRel + +toolchain: + default: default + description: Select toolchain + choices: + default: + short: x86_64 + long: x86_64 + s390x: + short: s390x + long: s390x + settings: + CMAKE_TOOLCHAIN_FILE: cmake/linux/toolchain-s390x.cmake +``` + +#### launch.json ```json { "version": "0.2.0", "configurations": [ { - "name": "Debug", "type": "lldb", "request": "custom", - "targetCreateCommands": ["target create ${command:cmake.launchTargetDirectory}/clickhouse"], - "processCreateCommands": ["settings set target.source-map ${input:targetdir} ${workspaceFolder}", "gdb-remote 31338"], - "sourceMap": { "${input:targetdir}": "${workspaceFolder}" }, - } - ], - "inputs": [ - { - "id": "targetdir", - "type": "command", - "command": "extension.commandvariable.transform", - "args": { - "text": "${command:cmake.launchTargetDirectory}", - "find": ".*/([^/]+)/[^/]+$", - "replace": "$1" - } + "name": "(lldb) Launch s390x with qemu", + "targetCreateCommands": ["target create ${command:cmake.launchTargetPath}"], + "processCreateCommands": ["gdb-remote 2159"], + "preLaunchTask": "Run ClickHouse" + } + ] +} +``` + +#### settings.json +This would also put different builds under different subfolders of the `build` folder. +```json +{ + "cmake.buildDirectory": "${workspaceFolder}/build/${buildKitVendor}-${buildKitVersion}-${variant:toolchain}-${variant:buildType}", + "lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so" +} +``` + +#### run-debug.sh +```sh +#! /bin/sh +echo 'Starting debugger session' +cd $1 +qemu-s390x-static -g 2159 -L /usr/s390x-linux-gnu $2 $3 $4 +``` + +#### tasks.json +Defines a task to run the compiled executable in `server` mode under a `tmp` folder next to the binaries, with configuration from under `programs/server/config.xml`. +```json +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Run ClickHouse", + "type": "shell", + "isBackground": true, + "command": "${workspaceFolder}/.vscode/run-debug.sh", + "args": [ + "${command:cmake.launchTargetDirectory}/tmp", + "${command:cmake.launchTargetPath}", + "server", + "--config-file=${workspaceFolder}/programs/server/config.xml" + ], + "problemMatcher": [ + { + "pattern": [ + { + "regexp": ".", + "file": 1, + "location": 2, + "message": 3 + } + ], + "background": { + "activeOnStart": true, + "beginsPattern": "^Starting debugger session", + "endsPattern": ".*" + } + } + ] } ] } ``` -- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this) \ No newline at end of file diff --git a/docs/en/development/build.md b/docs/en/development/build.md index e3749608bbc..04dbc26aac1 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -14,6 +14,20 @@ Supported platforms: - PowerPC 64 LE (experimental) - RISC-V 64 (experimental) +## Building in docker +We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage: + +```bash +# define a directory for the output artifacts +output_dir="build_results" +# a simplest build +./docker/packager/packager --package-type=binary --output-dir "$output_dir" +# build debian packages +./docker/packager/packager --package-type=deb --output-dir "$output_dir" +# by default, debian packages use thin LTO, so we can override it to speed up the build +CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "./$(git rev-parse --show-cdup)/build_results" +``` + ## Building on Ubuntu The following tutorial is based on Ubuntu Linux. @@ -28,20 +42,20 @@ sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk lsb-rel ### Install and Use the Clang compiler -On Ubuntu/Debian you can use LLVM's automatic installation script, see [here](https://apt.llvm.org/). +On Ubuntu/Debian, you can use LLVM's automatic installation script; see [here](https://apt.llvm.org/). ``` bash sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` -Note: in case of troubles, you can also use this: +Note: in case of trouble, you can also use this: ```bash sudo apt-get install software-properties-common sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test ``` -For other Linux distribution - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html). +For other Linux distributions - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html). As of April 2023, clang-16 or higher will work. GCC as a compiler is not supported. @@ -78,8 +92,12 @@ cmake -S . -B build cmake --build build # or: `cd build; ninja` ``` +:::tip +In case `cmake` isn't able to detect the number of available logical cores, the build will be done by one thread. To overcome this, you can tweak `cmake` to use a specific number of threads with `-j` flag, for example, `cmake --build build -j 16`. Alternatively, you can generate build files with a specific number of jobs in advance to avoid always setting the flag: `cmake -DPARALLEL_COMPILE_JOBS=16 -S . -B build`, where `16` is the desired number of threads. +::: + To create an executable, run `cmake --build build --target clickhouse` (or: `cd build; ninja clickhouse`). -This will create executable `build/programs/clickhouse` which can be used with `client` or `server` arguments. +This will create an executable `build/programs/clickhouse`, which can be used with `client` or `server` arguments. ## Building on Any Linux {#how-to-build-clickhouse-on-any-linux} @@ -93,7 +111,7 @@ The build requires the following components: - Yasm - Gawk -If all the components are installed, you may build in the same way as the steps above. +If all the components are installed, you may build it in the same way as the steps above. Example for OpenSUSE Tumbleweed: @@ -109,7 +127,7 @@ Example for Fedora Rawhide: ``` bash sudo yum update -sudo yum --nogpg install git cmake make clang python3 ccache nasm yasm gawk +sudo yum --nogpg install git cmake make clang python3 ccache lld nasm yasm gawk git clone --recursive https://github.com/ClickHouse/ClickHouse.git mkdir build cmake -S . -B build diff --git a/docs/en/development/building_and_benchmarking_deflate_qpl.md b/docs/en/development/building_and_benchmarking_deflate_qpl.md index 0501c1cbdcb..4e01b41ab3c 100644 --- a/docs/en/development/building_and_benchmarking_deflate_qpl.md +++ b/docs/en/development/building_and_benchmarking_deflate_qpl.md @@ -7,12 +7,8 @@ description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec # Build Clickhouse with DEFLATE_QPL -- Make sure your target machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites) -- Pass the following flag to CMake when building ClickHouse: - -``` bash -cmake -DENABLE_QPL=1 .. -``` +- Make sure your host machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites) +- deflate_qpl is enabled by default during cmake build. In case you accidentally change it, please double-check build flag: ENABLE_QPL=1 - For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md) diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index 738c5458cc3..c76ab738004 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -141,6 +141,10 @@ Runs [stateful functional tests](tests.md#functional-tests). Treat them in the s Runs [integration tests](tests.md#integration-tests). +## Bugfix validate check +Checks that either a new test (functional or integration) or there some changed tests that fail with the binary built on master branch. This check is triggered when pull request has "pr-bugfix" label. + + ## Stress Test Runs stateless functional tests concurrently from several clients to detect concurrency-related errors. If it fails: diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index f7cc52e622e..b7e567c7b6c 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -190,7 +190,7 @@ These are the schema conversion manipulations you can do with table overrides fo * Modify [column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl). * Modify [column compression codec](/docs/en/sql-reference/statements/create/table.md/#codecs). * Add [ALIAS columns](/docs/en/sql-reference/statements/create/table.md/#alias). - * Add [skipping indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-data_skipping-indexes) + * Add [skipping indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-data_skipping-indexes). Note that you need to enable `use_skip_indexes_if_final` setting to make them work (MaterializedMySQL is using `SELECT ... FINAL` by default) * Add [projections](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#projections). Note that projection optimizations are disabled when using `SELECT ... FINAL` (which MaterializedMySQL does by default), so their utility is limited here. `INDEX ... TYPE hypothesis` as [described in the v21.12 blog post]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/) diff --git a/docs/en/engines/database-engines/replicated.md b/docs/en/engines/database-engines/replicated.md index 5672633c4a2..1f90f2acabf 100644 --- a/docs/en/engines/database-engines/replicated.md +++ b/docs/en/engines/database-engines/replicated.md @@ -35,7 +35,7 @@ The [system.clusters](../../operations/system-tables/clusters.md) system table c When creating a new replica of the database, this replica creates tables by itself. If the replica has been unavailable for a long time and has lagged behind the replication log — it checks its local metadata with the current metadata in ZooKeeper, moves the extra tables with data to a separate non-replicated database (so as not to accidentally delete anything superfluous), creates the missing tables, updates the table names if they have been renamed. The data is replicated at the `ReplicatedMergeTree` level, i.e. if the table is not replicated, the data will not be replicated (the database is responsible only for metadata). -[`ALTER TABLE ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md) queries are allowed but not replicated. The database engine will only add/fetch/remove the partition/part to the current replica. However, if the table itself uses a Replicated table engine, then the data will be replicated after using `ATTACH`. +[`ALTER TABLE FREEZE|ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md) queries are allowed but not replicated. The database engine will only add/fetch/remove the partition/part to the current replica. However, if the table itself uses a Replicated table engine, then the data will be replicated after using `ATTACH`. ## Usage Example {#usage-example} diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md index bd704d0e87e..b024820024a 100644 --- a/docs/en/engines/table-engines/index.md +++ b/docs/en/engines/table-engines/index.md @@ -60,6 +60,7 @@ Engines in the family: - [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) - [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md) - [PostgreSQL](../../engines/table-engines/integrations/postgresql.md) +- [S3Queue](../../engines/table-engines/integrations/s3queue.md) ### Special Engines {#special-engines} diff --git a/docs/en/engines/table-engines/integrations/azureBlobStorage.md b/docs/en/engines/table-engines/integrations/azureBlobStorage.md index 60e448377d0..3df08ee2ffb 100644 --- a/docs/en/engines/table-engines/integrations/azureBlobStorage.md +++ b/docs/en/engines/table-engines/integrations/azureBlobStorage.md @@ -21,7 +21,7 @@ CREATE TABLE azure_blob_storage_table (name String, value UInt32) - `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key) - `container_name` - Container name -- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. - `account_name` - if storage_account_url is used, then account name can be specified here - `account_key` - if storage_account_url is used, then account key can be specified here - `format` — The [format](/docs/en/interfaces/formats.md) of the file. diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md index b562e9d7fe6..964c952f31a 100644 --- a/docs/en/engines/table-engines/integrations/deltalake.md +++ b/docs/en/engines/table-engines/integrations/deltalake.md @@ -22,7 +22,7 @@ CREATE TABLE deltalake - `url` — Bucket url with path to the existing Delta Lake table. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. -Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) +Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md). **Example** diff --git a/docs/en/engines/table-engines/integrations/hudi.md b/docs/en/engines/table-engines/integrations/hudi.md index c60618af289..b2f599e5c92 100644 --- a/docs/en/engines/table-engines/integrations/hudi.md +++ b/docs/en/engines/table-engines/integrations/hudi.md @@ -22,7 +22,7 @@ CREATE TABLE hudi_table - `url` — Bucket url with the path to an existing Hudi table. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. -Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) +Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md). **Example** diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index b81d5624c1a..e4d3ac762ed 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -173,6 +173,7 @@ Similar to GraphiteMergeTree, the Kafka engine supports extended configuration u cgrp smallest + 600 @@ -260,3 +261,4 @@ The number of rows in one Kafka message depends on whether the format is row-bas - [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) - [background_message_broker_schedule_pool_size](../../../operations/server-configuration-parameters/settings.md#background_message_broker_schedule_pool_size) +- [system.kafka_consumers](../../../operations/system-tables/kafka_consumers.md) diff --git a/docs/en/engines/table-engines/integrations/materialized-postgresql.md b/docs/en/engines/table-engines/integrations/materialized-postgresql.md index bccafd67c2c..47dae2ed494 100644 --- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md +++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md @@ -13,7 +13,7 @@ If more than one table is required, it is highly recommended to use the [Materia ``` sql CREATE TABLE postgresql_db.postgresql_replica (key UInt64, value UInt64) -ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password') +ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_table', 'postgres_user', 'postgres_password') PRIMARY KEY key; ``` diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 0e2b48ef6a6..2967a15494c 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -37,7 +37,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32) ### Engine parameters -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). - `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. - `format` — The [format](../../../interfaces/formats.md#formats) of the file. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). @@ -57,7 +57,8 @@ Notice that the S3 endpoint in the `ENGINE` configuration uses the parameter tok :::note As shown in the example, querying from S3 tables that are partitioned is -not directly supported at this time, but can be accomplished by querying the bucket contents with a wildcard. +not directly supported at this time, but can be accomplished by querying the individual partitions +using the S3 table function. The primary use-case for writing partitioned data in S3 is to enable transferring that data into another @@ -127,23 +128,7 @@ FROM s3('http://minio:10000/clickhouse//test_45.csv', 'minioadmin', 'minioadminp └────┴────┴────┘ ``` -#### Select from all partitions - -```sql -SELECT * -FROM s3('http://minio:10000/clickhouse//**', 'minioadmin', 'minioadminpassword', 'CSV') -``` -```response -┌─c1─┬─c2─┬─c3─┐ -│ 3 │ 2 │ 1 │ -└────┴────┴────┘ -┌─c1─┬─c2─┬─c3─┐ -│ 1 │ 2 │ 3 │ -└────┴────┴────┘ -┌─c1─┬─c2─┬─c3─┐ -│ 78 │ 43 │ 45 │ -└────┴────┴────┘ -``` +#### Limitation You may naturally try to `Select * from p`, but as noted above, this query will fail; use the preceding query. @@ -179,6 +164,7 @@ For more information about virtual columns see [here](../../../engines/table-eng `path` argument can specify multiple files using bash-like wildcards. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment). - `*` — Substitutes any number of any characters except `/` including empty string. +- `**` — Substitutes any number of any character include `/` including empty string. - `?` — Substitutes any single character. - `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. - `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. @@ -252,7 +238,7 @@ The following settings can be set before query execution or placed into configur - `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). - `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. - `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`. -- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. Default value us `500`. +- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3, `s3_min_upload_part_size` is multiplied by `s3_upload_part_size_multiply_factor`. Default value is `500`. - `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each in-flight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enough, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file. Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md new file mode 100644 index 00000000000..8988d8cde62 --- /dev/null +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -0,0 +1,225 @@ +--- +slug: /en/engines/table-engines/integrations/s3queue +sidebar_position: 7 +sidebar_label: S3Queue +--- + +# S3Queue Table Engine +This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem and allows streaming import. This engine is similar to the [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) engines, but provides S3-specific features. + +## Create Table {#creating-a-table} + +``` sql +CREATE TABLE s3_queue_engine_table (name String, value UInt32) + ENGINE = S3Queue(path [, NOSIGN | aws_access_key_id, aws_secret_access_key,] format, [compression]) + [SETTINGS] + [mode = 'unordered',] + [after_processing = 'keep',] + [keeper_path = '',] + [s3queue_loading_retries = 0,] + [s3queue_polling_min_timeout_ms = 1000,] + [s3queue_polling_max_timeout_ms = 10000,] + [s3queue_polling_backoff_ms = 0,] + [s3queue_tracked_files_limit = 1000,] + [s3queue_tracked_file_ttl_sec = 0,] + [s3queue_polling_size = 50,] +``` + +**Engine parameters** + +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). +- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. +- `format` — The [format](../../../interfaces/formats.md#formats) of the file. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). +- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension. + +**Example** + +```sql +CREATE TABLE s3queue_engine_table (name String, value UInt32) +ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip') +SETTINGS + mode = 'ordered'; +``` + +Using named collections: + +``` xml + + + + 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/* + test + test + + + +``` + +```sql +CREATE TABLE s3queue_engine_table (name String, value UInt32) +ENGINE=S3Queue(s3queue_conf, format = 'CSV', compression_method = 'gzip') +SETTINGS + mode = 'ordered'; +``` + +## Settings {#s3queue-settings} + +### mode {#mode} + +Possible values: + +- unordered — With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKeeper. +- ordered — With ordered mode, only the max name of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper. + +Default value: `unordered`. + +### after_processing {#after_processing} + +Delete or keep file after successful processing. +Possible values: + +- keep. +- delete. + +Default value: `keep`. + +### keeper_path {#keeper_path} + +The path in ZooKeeper can be specified as a table engine setting or default path can be formed from the global configuration-provided path and table UUID. +Possible values: + +- String. + +Default value: `/`. + +### s3queue_loading_retries {#s3queue_loading_retries} + +Retry file loading up to specified number of times. By default, there are no retries. +Possible values: + +- Positive integer. + +Default value: `0`. + +### s3queue_polling_min_timeout_ms {#s3queue_polling_min_timeout_ms} + +Minimal timeout before next polling (in milliseconds). + +Possible values: + +- Positive integer. + +Default value: `1000`. + +### s3queue_polling_max_timeout_ms {#s3queue_polling_max_timeout_ms} + +Maximum timeout before next polling (in milliseconds). + +Possible values: + +- Positive integer. + +Default value: `10000`. + +### s3queue_polling_backoff_ms {#s3queue_polling_backoff_ms} + +Polling backoff (in milliseconds). + +Possible values: + +- Positive integer. + +Default value: `0`. + +### s3queue_tracked_files_limit {#s3queue_tracked_files_limit} + +Allows to limit the number of Zookeeper nodes if the 'unordered' mode is used, does nothing for 'ordered' mode. +If limit reached the oldest processed files will be deleted from ZooKeeper node and processed again. + +Possible values: + +- Positive integer. + +Default value: `1000`. + +### s3queue_tracked_file_ttl_sec {#s3queue_tracked_file_ttl_sec} + +Maximum number of seconds to store processed files in ZooKeeper node (store forever by default) for 'unordered' mode, does nothing for 'ordered' mode. +After the specified number of seconds, the file will be re-imported. + +Possible values: + +- Positive integer. + +Default value: `0`. + +### s3queue_polling_size {#s3queue_polling_size} + +Maximum files to fetch from S3 with SELECT or in background task. +Engine takes files for processing from S3 in batches. +We limit the batch size to increase concurrency if multiple table engines with the same `keeper_path` consume files from the same path. + +Possible values: + +- Positive integer. + +Default value: `50`. + + +## S3-related Settings {#s3-settings} + +Engine supports all s3 related settings. For more information about S3 settings see [here](../../../engines/table-engines/integrations/s3.md). + + +## Description {#description} + +`SELECT` is not particularly useful for streaming import (except for debugging), because each file can be imported only once. It is more practical to create real-time threads using [materialized views](../../../sql-reference/statements/create/view.md). To do this: + +1. Use the engine to create a table for consuming from specified path in S3 and consider it a data stream. +2. Create a table with the desired structure. +3. Create a materialized view that converts data from the engine and puts it into a previously created table. + +When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. + +Example: + +``` sql + CREATE TABLE s3queue_engine_table (name String, value UInt32) + ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip') + SETTINGS + mode = 'unordered', + keeper_path = '/clickhouse/s3queue/'; + + CREATE TABLE stats (name String, value UInt32) + ENGINE = MergeTree() ORDER BY name; + + CREATE MATERIALIZED VIEW consumer TO stats + AS SELECT name, value FROM s3queue_engine_table; + + SELECT * FROM stats ORDER BY name; +``` + +## Virtual columns {#virtual-columns} + +- `_path` — Path to the file. +- `_file` — Name of the file. + +For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns). + + +## Wildcards In Path {#wildcards-in-path} + +`path` argument can specify multiple files using bash-like wildcards. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment). + +- `*` — Substitutes any number of any characters except `/` including empty string. +- `**` — Substitutes any number of any characters include `/` including empty string. +- `?` — Substitutes any single character. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. + +Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function. + +:::note +If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +::: diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md index 4a4ebb47bdc..b75a49d1cd1 100644 --- a/docs/en/engines/table-engines/mergetree-family/annindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md @@ -1,4 +1,4 @@ -# Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex} +# Approximate Nearest Neighbor Search Indexes [experimental] Nearest neighborhood search is the problem of finding the M closest points for a given point in an N-dimensional vector space. The most straightforward approach to solve this problem is a brute force search where the distance between all points in the vector space and the @@ -17,7 +17,7 @@ In terms of SQL, the nearest neighborhood problem can be expressed as follows: ``` sql SELECT * -FROM table +FROM table_with_ann_index ORDER BY Distance(vectors, Point) LIMIT N ``` @@ -32,7 +32,7 @@ An alternative formulation of the nearest neighborhood search problem looks as f ``` sql SELECT * -FROM table +FROM table_with_ann_index WHERE Distance(vectors, Point) < MaxDistance LIMIT N ``` @@ -45,12 +45,12 @@ With brute force search, both queries are expensive (linear in the number of poi `Point` must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer much quicker (in sub-linear time). -# Creating and Using ANN Indexes +# Creating and Using ANN Indexes {#creating_using_ann_indexes} Syntax to create an ANN index over an [Array](../../../sql-reference/data-types/array.md) column: ```sql -CREATE TABLE table +CREATE TABLE table_with_ann_index ( `id` Int64, `vectors` Array(Float32), @@ -63,7 +63,7 @@ ORDER BY id; Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column: ```sql -CREATE TABLE table +CREATE TABLE table_with_ann_index ( `id` Int64, `vectors` Tuple(Float32[, Float32[, ...]]), @@ -83,7 +83,7 @@ ANN indexes support two types of queries: ``` sql SELECT * - FROM table + FROM table_with_ann_index [WHERE ...] ORDER BY Distance(vectors, Point) LIMIT N @@ -93,7 +93,7 @@ ANN indexes support two types of queries: ``` sql SELECT * - FROM table + FROM table_with_ann_index WHERE Distance(vectors, Point) < MaxDistance LIMIT N ``` @@ -103,7 +103,7 @@ To avoid writing out large vectors, you can use [query parameters](/docs/en/interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g. ```bash -clickhouse-client --param_vec='hello' --query="SELECT * FROM table WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0" +clickhouse-client --param_vec='hello' --query="SELECT * FROM table_with_ann_index WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0" ``` ::: @@ -138,17 +138,19 @@ back to a smaller `GRANULARITY` values only in case of problems like excessive m was specified for ANN indexes, the default value is 100 million. -# Available ANN Indexes +# Available ANN Indexes {#available_ann_indexes} - [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy) +- [USearch](/docs/en/engines/table-engines/mergetree-family/annindexes.md#usearch-usearch) + ## Annoy {#annoy} Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently disabled on ARM due to memory safety problems with the algorithm. -This type of ANN index implements [the Annoy algorithm](https://github.com/spotify/annoy) which is based on a recursive division of the -space in random linear surfaces (lines in 2D, planes in 3D etc.). +This type of ANN index is based on the [Annoy library](https://github.com/spotify/annoy) which recursively divides the space into random +linear surfaces (lines in 2D, planes in 3D etc.).
+
+ +Syntax to create an USearch index over an [Array](../../../sql-reference/data-types/array.md) column: + +```sql +CREATE TABLE table_with_usearch_index +( + id Int64, + vectors Array(Float32), + INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N] +) +ENGINE = MergeTree +ORDER BY id; +``` + +Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column: + +```sql +CREATE TABLE table_with_usearch_index +( + id Int64, + vectors Tuple(Float32[, Float32[, ...]]), + INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N] +) +ENGINE = MergeTree +ORDER BY id; +``` + +USearch currently supports two distance functions: +- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space + ([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)). +- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors + ([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)). + +For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no +distance function was specified during index creation, `L2Distance` is used as default. \ No newline at end of file diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 4f506126682..89c5a499e07 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -1139,6 +1139,8 @@ Optional parameters: - `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. - `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). - `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. +- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). ### Configuring the cache @@ -1251,12 +1253,14 @@ Other parameters: * `cache_enabled` - Allows to cache mark and index files on local FS. Default value is `true`. * `cache_path` - Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks//cache/`. * `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`. +* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)). - :::note Zero-copy replication is not ready for production - Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. - ::: +:::note Zero-copy replication is not ready for production +Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. +::: ## HDFS storage {#hdfs-storage} diff --git a/docs/en/engines/table-engines/special/buffer.md b/docs/en/engines/table-engines/special/buffer.md index f7d84b9b452..a3bb11117cd 100644 --- a/docs/en/engines/table-engines/special/buffer.md +++ b/docs/en/engines/table-engines/special/buffer.md @@ -13,7 +13,7 @@ A recommended alternative to the Buffer Table Engine is enabling [asynchronous i ::: ``` sql -Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) +Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]]) ``` ### Engine parameters: diff --git a/docs/en/engines/table-engines/special/generate.md b/docs/en/engines/table-engines/special/generate.md index 9fcdb47e555..13efd3e444b 100644 --- a/docs/en/engines/table-engines/special/generate.md +++ b/docs/en/engines/table-engines/special/generate.md @@ -23,7 +23,7 @@ array or map columns and strings correspondingly in generated data. Generate table engine supports only `SELECT` queries. -It supports all [DataTypes](../../../sql-reference/data-types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`. +It supports all [DataTypes](../../../sql-reference/data-types/index.md) that can be stored in a table except `AggregateFunction`. ## Example {#example} diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index 26d4975954f..5a5e1564180 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -106,3 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. +- [enable_url_encoding](/docs/en/operations/settings/settings.md#enable_url_encoding) - allows to enable/disable decoding/encoding path in uri. Enabled by default. diff --git a/docs/en/getting-started/example-datasets/laion.md b/docs/en/getting-started/example-datasets/laion.md index 077adf016a3..0dbaceffc13 100644 --- a/docs/en/getting-started/example-datasets/laion.md +++ b/docs/en/getting-started/example-datasets/laion.md @@ -1,23 +1,21 @@ # Laion-400M dataset -The dataset contains 400 million images with English text. For more information follow this [link](https://laion.ai/blog/laion-400-open-dataset/). Laion provides even larger datasets (e.g. [5 billion](https://laion.ai/blog/laion-5b/)). Working with them will be similar. +The [Laion-400M dataset](https://laion.ai/blog/laion-400-open-dataset/) contains 400 million images with English image captions. Laion nowadays provides [an even larger dataset](https://laion.ai/blog/laion-5b/) but working with it will be similar. -The dataset has prepared embeddings for texts and images. This will be used to demonstrate [Approximate nearest neighbor search indexes](../../engines/table-engines/mergetree-family/annindexes.md). +The dataset contains the image URL, embeddings for both the image and the image caption, a similarity score between the image and the image caption, as well as metadata, e.g. the image width/height, the licence and a NSFW flag. We can use the dataset to demonstrate [approximate nearest neighbor search](../../engines/table-engines/mergetree-family/annindexes.md) in ClickHouse. -## Prepare data +## Data preparation -Embeddings are stored in `.npy` files, so we have to read them with python and merge with other data. - -Download data and process it with simple `download.sh` script: +The embeddings and the metadata are stored in separate files in the raw data. A data preparation step downloads the data, merges the files, +converts them to CSV and imports them into ClickHouse. You can use the following `download.sh` script for that: ```bash -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy -python3 process.py ${1} +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy # download image embedding +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy # download text embedding +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet # download metadata +python3 process.py ${1} # merge files and convert to CSV ``` - -Where `process.py`: +Script `process.py` is defined as follows: ```python import pandas as pd @@ -35,11 +33,11 @@ im_emb = np.load(npy_file) text_emb = np.load(text_npy) data = pd.read_parquet(metadata_file) -# combine them +# combine files data = pd.concat([data, pd.DataFrame({"image_embedding" : [*im_emb]}), pd.DataFrame({"text_embedding" : [*text_emb]})], axis=1, copy=False) -# you can save more columns -data = data[['url', 'caption', 'similarity', "image_embedding", "text_embedding"]] +# columns to be imported into ClickHouse +data = data[['url', 'caption', 'NSFW', 'similarity', "image_embedding", "text_embedding"]] # transform np.arrays to lists data['image_embedding'] = data['image_embedding'].apply(lambda x: list(x)) @@ -48,30 +46,34 @@ data['text_embedding'] = data['text_embedding'].apply(lambda x: list(x)) # this small hack is needed becase caption sometimes contains all kind of quotes data['caption'] = data['caption'].apply(lambda x: x.replace("'", " ").replace('"', " ")) -# save data to file +# export data as CSV file data.to_csv(str_i + '.csv', header=False) -# previous files can be removed +# removed raw data files os.system(f"rm {npy_file} {metadata_file} {text_npy}") ``` -You can download data with +To start the data preparation pipeline, run: + ```bash -seq 0 409 | xargs -P100 -I{} bash -c './download.sh {}' +seq 0 409 | xargs -P1 -I{} bash -c './download.sh {}' ``` -The dataset is divided into 409 files. If you want to work only with a certain part of the dataset, just change the limits. +The dataset is split into 410 files, each file contains ca. 1 million rows. If you like to work with a smaller subset of the data, simply adjust the limits, e.g. `seq 0 9 | ...`. -## Create table for laion +(The python script above is very slow (~2-10 minutes per file), takes a lot of memory (41 GB per file), and the resulting csv files are big (10 GB each), so be careful. If you have enough RAM, increase the `-P1` number for more parallelism. If this is still too slow, consider coming up with a better ingestion procedure - maybe converting the .npy files to parquet, then doing all the other processing with clickhouse.) -Without indexes table can be created by +## Create table + +To create a table without indexes, run: ```sql -CREATE TABLE laion_dataset +CREATE TABLE laion ( `id` Int64, `url` String, `caption` String, + `NSFW` String, `similarity` Float32, `image_embedding` Array(Float32), `text_embedding` Array(Float32) @@ -81,23 +83,23 @@ ORDER BY id SETTINGS index_granularity = 8192 ``` -Fill table with data: +To import the CSV files into ClickHouse: ```sql -INSERT INTO laion_dataset FROM INFILE '{path_to_csv_files}/*.csv' +INSERT INTO laion FROM INFILE '{path_to_csv_files}/*.csv' ``` -## Check data in table without indexes +## Run a brute-force ANN search (without ANN index) -Let's check the work of the following query on the part of the dataset (8 million records): +To run a brute-force approximate nearest neighbor search, run: ```sql -select url, caption from test_laion where similarity > 0.2 order by L2Distance(image_embedding, {target:Array(Float32)}) limit 30 +SELECT url, caption FROM laion ORDER BY L2Distance(image_embedding, {target:Array(Float32)}) LIMIT 30 ``` -Since the embeddings for images and texts may not match, let's also require a certain threshold of matching accuracy to get images that are more likely to satisfy our queries. The client parameter `target`, which is an array of 512 elements. See later in this article for a convenient way of obtaining such vectors. I used a random picture of a cat from the Internet as a target vector. +`target` is an array of 512 elements and a client parameter. A convenient way to obtain such arrays will be presented at the end of the article. For now, we can run the embedding of a random cat picture as `target`. -**The result** +**Result** ``` ┌─url───────────────────────────────────────────────────────────────────────────────────────────────────────────┬─caption────────────────────────────────────────────────────────────────┐ @@ -114,32 +116,34 @@ Since the embeddings for images and texts may not match, let's also require a ce 8 rows in set. Elapsed: 6.432 sec. Processed 19.65 million rows, 43.96 GB (3.06 million rows/s., 6.84 GB/s.) ``` -## Add indexes +## Run a ANN with an ANN index -Create a new table or follow instructions from [alter documentation](../../sql-reference/statements/alter/skipping-index.md). +Create a new table with an ANN index and insert the data from the existing table: ```sql -CREATE TABLE laion_dataset +CREATE TABLE laion_annoy ( `id` Int64, `url` String, `caption` String, + `NSFW` String, `similarity` Float32, `image_embedding` Array(Float32), `text_embedding` Array(Float32), - INDEX annoy_image image_embedding TYPE annoy(1000) GRANULARITY 1000, - INDEX annoy_text text_embedding TYPE annoy(1000) GRANULARITY 1000 + INDEX annoy_image image_embedding TYPE annoy(), + INDEX annoy_text text_embedding TYPE annoy() ) ENGINE = MergeTree ORDER BY id -SETTINGS index_granularity = 8192 +SETTINGS index_granularity = 8192; + +INSERT INTO laion_annoy SELECT * FROM laion; ``` -When created, the index will be built by L2Distance. You can read more about the parameters in the [annoy documentation](../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy). It makes sense to build indexes for a large number of granules. If you need good speed, then GRANULARITY should be several times larger than the expected number of results in the search. -Now let's check again with the same query: +By default, Annoy indexes use the L2 distance as metric. Further tuning knobs for index creation and search are described in the Annoy index [documentation](../../engines/table-engines/mergetree-family/annindexes.md). Let's check now again with the same query: ```sql -select url, caption from test_indexes_laion where similarity > 0.2 order by L2Distance(image_embedding, {target:Array(Float32)}) limit 8 +SELECT url, caption FROM laion_annoy ORDER BY l2Distance(image_embedding, {target:Array(Float32)}) LIMIT 8 ``` **Result** @@ -159,15 +163,18 @@ select url, caption from test_indexes_laion where similarity > 0.2 order by L2Di 8 rows in set. Elapsed: 0.641 sec. Processed 22.06 thousand rows, 49.36 MB (91.53 thousand rows/s., 204.81 MB/s.) ``` -The speed has increased significantly. But now, the results sometimes differ from what you are looking for. This is due to the approximation of the search and the quality of the constructed embedding. Note that the example was given for picture embeddings, but there are also text embeddings in the dataset, which can also be used for searching. +The speed increased significantly at the cost of less accurate results. This is because the ANN index only provide approximate search results. Note the example searched for similar image embeddings, yet it is also possible to search for positive image caption embeddings. -## Scripts for embeddings +## Creating embeddings with UDFs -Usually, we do not want to get embeddings from existing data, but to get them for new data and look for similar ones in old data. We can use [UDF](../../sql-reference/functions/index.md#sql-user-defined-functions) for this purpose. They will allow you to set the `target` vector without leaving the client. All of the following scripts will be written for the `ViT-B/32` model, as it was used for this dataset. You can use any model, but it is necessary to build embeddings in the dataset and for new objects using the same model. +One usually wants to create embeddings for new images or new image captions and search for similar image / image caption pairs in the data. We can use [UDF](../../sql-reference/functions/index.md#sql-user-defined-functions) to create the `target` vector without leaving the client. It is important to use the same model to create the data and new embeddings for searches. The following scripts utilize the `ViT-B/32` model which also underlies the dataset. ### Text embeddings +First, store the following Python script in the `user_scripts/` directory of your ClickHouse data path and make it executable (`chmod +x encode_text.py`). + `encode_text.py`: + ```python #!/usr/bin/python3 import clip @@ -182,10 +189,12 @@ if __name__ == '__main__': inputs = clip.tokenize(text) with torch.no_grad(): text_features = model.encode_text(inputs)[0].tolist() + print(text_features) sys.stdout.flush() ``` -`encode_text_function.xml`: +Then create `encode_text_function.xml` in a location referenced by `/path/to/*_function.xml` in your ClickHouse server configuration file. + ```xml @@ -203,19 +212,19 @@ if __name__ == '__main__': ``` -Now we can simply use: +You can now simply use: ```sql SELECT encode_text('cat'); ``` - -The first use will be slow because the model needs to be loaded. But repeated queries will be fast. Then we copy the results to ``set param_target=...`` and can easily write queries +The first run will be slow because it loads the model, but repeated runs will be fast. We can then copy the output to `SET param_target=...` and can easily write queries. ### Image embeddings -For pictures, the process is similar, but you send the path instead of the picture (if necessary, you can implement a download picture with processing, but it will take longer) +Image embeddings can be created similarly but we will provide the Python script the path to a local image instead of the image caption text. + +`encode_image.py` -`encode_picture.py` ```python #!/usr/bin/python3 import clip @@ -231,29 +240,31 @@ if __name__ == '__main__': image = preprocess(Image.open(text.strip())).unsqueeze(0).to(device) with torch.no_grad(): image_features = model.encode_image(image)[0].tolist() - print(image_features) + print(image_features) sys.stdout.flush() ``` -`encode_picture_function.xml` +`encode_image_function.xml` + ```xml executable_pool - encode_picture + encode_image Array(Float32) String path TabSeparated - encode_picture.py + encode_image.py 1000000 ``` -The query: +Then run this query: + ```sql -SELECT encode_picture('some/path/to/your/picture'); +SELECT encode_image('/path/to/your/image'); ``` diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 8779dd1a544..f018f3a248e 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -30,7 +30,7 @@ It may lack support for new features. ## Usage {#cli_usage} -The client can be used in interactive and non-interactive (batch) mode. +The client can be used in interactive and non-interactive (batch) mode. ### Gather your connection details @@ -177,8 +177,8 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--user, -u` – The username. Default value: default. - `--password` – The password. Default value: empty string. - `--ask-password` - Prompt the user to enter a password. -- `--query, -q` – The query to process when using non-interactive mode. Cannot be used simultaneously with `--queries-file`. -- `--queries-file` – file path with queries to execute. Cannot be used simultaneously with `--query`. +- `--query, -q` – The query to process when using non-interactive mode. `--query` can be specified multiple times, e.g. `--query "SELECT 1" --query "SELECT 2"`. Cannot be used simultaneously with `--queries-file`. +- `--queries-file` – file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--query queries1.sql --query queries2.sql`. Cannot be used simultaneously with `--query`. - `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`. - `--multiline, -m` – If specified, allow multiline queries (do not send the query on Enter). - `--database, -d` – Select the current default database. Default value: the current database from the server settings (‘default’ by default). @@ -323,9 +323,9 @@ clickhouse-client clickhouse://192.168.1.15,192.168.1.25 `clickhouse-client` uses the first existing file of the following: - Defined in the `--config-file` parameter. -- `./clickhouse-client.xml` -- `~/.clickhouse-client/config.xml` -- `/etc/clickhouse-client/config.xml` +- `./clickhouse-client.xml`, `.yaml`, `.yml` +- `~/.clickhouse-client/config.xml`, `.yaml`, `.yml` +- `/etc/clickhouse-client/config.xml`, `.yaml`, `.yml` Example of a config file: @@ -342,6 +342,17 @@ Example of a config file: ``` +Or the same config in a YAML format: + +```yaml +user: username +password: 'password' +secure: true +openSSL: + client: + caConfig: '/etc/ssl/cert.pem' +``` + ### Query ID Format {#query-id-format} In interactive mode `clickhouse-client` shows query ID for every query. By default, the ID is formatted like this: diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 15f9d1f47bf..d059d158d54 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -11,82 +11,83 @@ results of a `SELECT`, and to perform `INSERT`s into a file-backed table. The supported formats are: | Format | Input | Output | -|-------------------------------------------------------------------------------------------|------|--------| -| [TabSeparated](#tabseparated) | ✔ | ✔ | -| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | -| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | -| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | -| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ | -| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ | -| [Template](#format-template) | ✔ | ✔ | -| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | -| [CSV](#csv) | ✔ | ✔ | -| [CSVWithNames](#csvwithnames) | ✔ | ✔ | -| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ | -| [CustomSeparated](#format-customseparated) | ✔ | ✔ | -| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ | -| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ | -| [SQLInsert](#sqlinsert) | ✗ | ✔ | -| [Values](#data-format-values) | ✔ | ✔ | -| [Vertical](#vertical) | ✗ | ✔ | -| [JSON](#json) | ✔ | ✔ | -| [JSONAsString](#jsonasstring) | ✔ | ✗ | -| [JSONStrings](#jsonstrings) | ✔ | ✔ | -| [JSONColumns](#jsoncolumns) | ✔ | ✔ | -| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock)) | ✔ | ✔ | -| [JSONCompact](#jsoncompact) | ✔ | ✔ | -| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | -| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | -| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | -| [PrettyJSONEachRow](#prettyjsoneachrow) | ✗ | ✔ | -| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | -| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | -| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | -| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | -| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ | -| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | -| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ | -| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ | -| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ | -| [JSONObjectEachRow](#jsonobjecteachrow) | ✔ | ✔ | -| [BSONEachRow](#bsoneachrow) | ✔ | ✔ | -| [TSKV](#tskv) | ✔ | ✔ | -| [Pretty](#pretty) | ✗ | ✔ | -| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | -| [PrettyMonoBlock](#prettymonoblock) | ✗ | ✔ | -| [PrettyNoEscapesMonoBlock](#prettynoescapesmonoblock) | ✗ | ✔ | -| [PrettyCompact](#prettycompact) | ✗ | ✔ | -| [PrettyCompactNoEscapes](#prettycompactnoescapes) | ✗ | ✔ | -| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | -| [PrettyCompactNoEscapesMonoBlock](#prettycompactnoescapesmonoblock) | ✗ | ✔ | -| [PrettySpace](#prettyspace) | ✗ | ✔ | -| [PrettySpaceNoEscapes](#prettyspacenoescapes) | ✗ | ✔ | -| [PrettySpaceMonoBlock](#prettyspacemonoblock) | ✗ | ✔ | -| [PrettySpaceNoEscapesMonoBlock](#prettyspacenoescapesmonoblock) | ✗ | ✔ | -| [Prometheus](#prometheus) | ✗ | ✔ | -| [Protobuf](#protobuf) | ✔ | ✔ | -| [ProtobufSingle](#protobufsingle) | ✔ | ✔ | -| [Avro](#data-format-avro) | ✔ | ✔ | -| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | -| [Parquet](#data-format-parquet) | ✔ | ✔ | -| [ParquetMetadata](#data-format-parquet-metadata) | ✔ | ✗ | -| [Arrow](#data-format-arrow) | ✔ | ✔ | -| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | -| [ORC](#data-format-orc) | ✔ | ✔ | -| [RowBinary](#rowbinary) | ✔ | ✔ | -| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ | -| [Native](#native) | ✔ | ✔ | -| [Null](#null) | ✗ | ✔ | -| [XML](#xml) | ✗ | ✔ | -| [CapnProto](#capnproto) | ✔ | ✔ | -| [LineAsString](#lineasstring) | ✔ | ✔ | -| [Regexp](#data-format-regexp) | ✔ | ✗ | -| [RawBLOB](#rawblob) | ✔ | ✔ | -| [MsgPack](#msgpack) | ✔ | ✔ | -| [MySQLDump](#mysqldump) | ✔ | ✗ | -| [Markdown](#markdown) | ✗ | ✔ | +|-------------------------------------------------------------------------------------------|------|-------| +| [TabSeparated](#tabseparated) | ✔ | ✔ | +| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | +| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | +| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | +| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ | +| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ | +| [Template](#format-template) | ✔ | ✔ | +| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | +| [CSV](#csv) | ✔ | ✔ | +| [CSVWithNames](#csvwithnames) | ✔ | ✔ | +| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ | +| [CustomSeparated](#format-customseparated) | ✔ | ✔ | +| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ | +| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ | +| [SQLInsert](#sqlinsert) | ✗ | ✔ | +| [Values](#data-format-values) | ✔ | ✔ | +| [Vertical](#vertical) | ✗ | ✔ | +| [JSON](#json) | ✔ | ✔ | +| [JSONAsString](#jsonasstring) | ✔ | ✗ | +| [JSONStrings](#jsonstrings) | ✔ | ✔ | +| [JSONColumns](#jsoncolumns) | ✔ | ✔ | +| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock)) | ✔ | ✔ | +| [JSONCompact](#jsoncompact) | ✔ | ✔ | +| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | +| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | +| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | +| [PrettyJSONEachRow](#prettyjsoneachrow) | ✗ | ✔ | +| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | +| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | +| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | +| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | +| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ | +| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | +| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ | +| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ | +| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ | +| [JSONObjectEachRow](#jsonobjecteachrow) | ✔ | ✔ | +| [BSONEachRow](#bsoneachrow) | ✔ | ✔ | +| [TSKV](#tskv) | ✔ | ✔ | +| [Pretty](#pretty) | ✗ | ✔ | +| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | +| [PrettyMonoBlock](#prettymonoblock) | ✗ | ✔ | +| [PrettyNoEscapesMonoBlock](#prettynoescapesmonoblock) | ✗ | ✔ | +| [PrettyCompact](#prettycompact) | ✗ | ✔ | +| [PrettyCompactNoEscapes](#prettycompactnoescapes) | ✗ | ✔ | +| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | +| [PrettyCompactNoEscapesMonoBlock](#prettycompactnoescapesmonoblock) | ✗ | ✔ | +| [PrettySpace](#prettyspace) | ✗ | ✔ | +| [PrettySpaceNoEscapes](#prettyspacenoescapes) | ✗ | ✔ | +| [PrettySpaceMonoBlock](#prettyspacemonoblock) | ✗ | ✔ | +| [PrettySpaceNoEscapesMonoBlock](#prettyspacenoescapesmonoblock) | ✗ | ✔ | +| [Prometheus](#prometheus) | ✗ | ✔ | +| [Protobuf](#protobuf) | ✔ | ✔ | +| [ProtobufSingle](#protobufsingle) | ✔ | ✔ | +| [Avro](#data-format-avro) | ✔ | ✔ | +| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | +| [Parquet](#data-format-parquet) | ✔ | ✔ | +| [ParquetMetadata](#data-format-parquet-metadata) | ✔ | ✗ | +| [Arrow](#data-format-arrow) | ✔ | ✔ | +| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | +| [ORC](#data-format-orc) | ✔ | ✔ | +| [One](#data-format-one) | ✔ | ✗ | +| [RowBinary](#rowbinary) | ✔ | ✔ | +| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ | +| [Native](#native) | ✔ | ✔ | +| [Null](#null) | ✗ | ✔ | +| [XML](#xml) | ✗ | ✔ | +| [CapnProto](#capnproto) | ✔ | ✔ | +| [LineAsString](#lineasstring) | ✔ | ✔ | +| [Regexp](#data-format-regexp) | ✔ | ✗ | +| [RawBLOB](#rawblob) | ✔ | ✔ | +| [MsgPack](#msgpack) | ✔ | ✔ | +| [MySQLDump](#mysqldump) | ✔ | ✗ | +| [Markdown](#markdown) | ✗ | ✔ | You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](/docs/en/operations/settings/settings-formats.md) section. @@ -195,6 +196,7 @@ SELECT * FROM nestedt FORMAT TSV - [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. - [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`. - [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. +- [input_format_tsv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_allow_variable_number_of_columns) - allow variable number of columns in TSV format, ignore extra columns and use default values on missing columns. Default value - `false`. ## TabSeparatedRaw {#tabseparatedraw} @@ -472,7 +474,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. -- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`. +- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`. - [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`. ## CSVWithNames {#csvwithnames} @@ -501,9 +503,10 @@ the types from input data will be compared with the types of the corresponding c Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings-formats.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_after_delimiter) settings, not from format strings. -If setting [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, ClickHouse will automatically detect header with names and types if any. - -If setting [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, trailing empty lines at the end of file will be skipped. +Additional settings: +- [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) - enables automatic detection of header with names and types if any. Default value - `true`. +- [input_format_custom_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_skip_trailing_empty_lines) - skip trailing empty lines at the end of file . Default value - `false`. +- [input_format_custom_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_custom_allow_variable_number_of_columns) - allow variable number of columns in CustomSeparated format, ignore extra columns and use default values on missing columns. Default value - `false`. There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces). @@ -1261,6 +1264,7 @@ SELECT * FROM json_each_row_nested - [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`. - [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`. - [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - Ignore unknown keys in json object for named tuples. Default value - `false`. +- [input_format_json_compact_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_json_compact_allow_variable_number_of_columns) - allow variable number of columns in JSONCompact/JSONCompactEachRow format, ignore extra columns and use default values on missing columns. Default value - `false`. - [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. - [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. - [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. @@ -1723,6 +1727,34 @@ You can select data from a ClickHouse table and save them into some file in the ``` bash $ clickhouse-client --query = "SELECT * FROM test.hits FORMAT CapnProto SETTINGS format_schema = 'schema:Message'" ``` + +### Using autogenerated schema {#using-autogenerated-capn-proto-schema} + +If you don't have an external CapnProto schema for your data, you can still output/input data in CapnProto format using autogenerated schema. +For example: + +```sql +SELECT * FROM test.hits format CapnProto SETTINGS format_capn_proto_use_autogenerated_schema=1 +``` + +In this case ClickHouse will autogenerate CapnProto schema according to the table structure using function [structureToCapnProtoSchema](../sql-reference/functions/other-functions.md#structure_to_capn_proto_schema) and will use this schema to serialize data in CapnProto format. + +You can also read CapnProto file with autogenerated schema (in this case the file must be created using the same schema): + +```bash +$ cat hits.bin | clickhouse-client --query "INSERT INTO test.hits SETTINGS format_capn_proto_use_autogenerated_schema=1 FORMAT CapnProto" +``` + +The setting [format_capn_proto_use_autogenerated_schema](../operations/settings/settings-formats.md#format_capn_proto_use_autogenerated_schema) is enabled by default and applies if [format_schema](../operations/settings/settings-formats.md#formatschema-format-schema) is not set. + +You can also save autogenerated schema in the file during input/output using setting [output_format_schema](../operations/settings/settings-formats.md#outputformatschema-output-format-schema). For example: + +```sql +SELECT * FROM test.hits format CapnProto SETTINGS format_capn_proto_use_autogenerated_schema=1, output_format_schema='path/to/schema/schema.capnp' +``` + +In this case autogenerated CapnProto schema will be saved in file `path/to/schema/schema.capnp`. + ## Prometheus {#prometheus} Expose metrics in [Prometheus text-based exposition format](https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format). @@ -1861,6 +1893,33 @@ ClickHouse inputs and outputs protobuf messages in the `length-delimited` format It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). See also [how to read/write length-delimited protobuf messages in popular languages](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages). +### Using autogenerated schema {#using-autogenerated-protobuf-schema} + +If you don't have an external Protobuf schema for your data, you can still output/input data in Protobuf format using autogenerated schema. +For example: + +```sql +SELECT * FROM test.hits format Protobuf SETTINGS format_protobuf_use_autogenerated_schema=1 +``` + +In this case ClickHouse will autogenerate Protobuf schema according to the table structure using function [structureToProtobufSchema](../sql-reference/functions/other-functions.md#structure_to_protobuf_schema) and will use this schema to serialize data in Protobuf format. + +You can also read Protobuf file with autogenerated schema (in this case the file must be created using the same schema): + +```bash +$ cat hits.bin | clickhouse-client --query "INSERT INTO test.hits SETTINGS format_protobuf_use_autogenerated_schema=1 FORMAT Protobuf" +``` + +The setting [format_protobuf_use_autogenerated_schema](../operations/settings/settings-formats.md#format_protobuf_use_autogenerated_schema) is enabled by default and applies if [format_schema](../operations/settings/settings-formats.md#formatschema-format-schema) is not set. + +You can also save autogenerated schema in the file during input/output using setting [output_format_schema](../operations/settings/settings-formats.md#outputformatschema-output-format-schema). For example: + +```sql +SELECT * FROM test.hits format Protobuf SETTINGS format_protobuf_use_autogenerated_schema=1, output_format_schema='path/to/schema/schema.proto' +``` + +In this case autogenerated Protobuf schema will be saved in file `path/to/schema/schema.capnp`. + ## ProtobufSingle {#protobufsingle} Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters. @@ -2080,6 +2139,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t - [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. - [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. - [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. +- [input_format_parquet_local_file_min_bytes_for_seek](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_local_file_min_bytes_for_seek) - min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format. Default value - `8192`. - [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`. - [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`. - [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`. @@ -2281,7 +2341,6 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam - [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`. - [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. -- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. - [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. - [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. @@ -2347,7 +2406,6 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename. - [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. - [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`. -- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. - [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. - [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. @@ -2355,6 +2413,34 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename. To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/table-engines/integrations/hdfs.md). +## One {#data-format-one} + +Special input format that doesn't read any data from file and returns only one row with column of type `UInt8`, name `dummy` and value `0` (like `system.one` table). +Can be used with virtual columns `_file/_path` to list all files without reading actual data. + +Example: + +Query: +```sql +SELECT _file FROM file('path/to/files/data*', One); +``` + +Result: +```text +┌─_file────┐ +│ data.csv │ +└──────────┘ +┌─_file──────┐ +│ data.jsonl │ +└────────────┘ +┌─_file────┐ +│ data.tsv │ +└──────────┘ +┌─_file────────┐ +│ data.parquet │ +└──────────────┘ +``` + ## LineAsString {#lineasstring} In this format, every line of input data is interpreted as a single string value. This format can only be parsed for table with a single field of type [String](/docs/en/sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized), or omitted. diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 3a7f6d4d854..8789fe836cb 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -18,6 +18,8 @@ $ curl 'http://localhost:8123/' Ok. ``` +Also see: [HTTP response codes caveats](#http_response_codes_caveats). + Sometimes, `curl` command is not available on user operating systems. On Ubuntu or Debian, run `sudo apt install curl`. Please refer this [documentation](https://curl.se/download.html) to install it before running the examples. Web UI can be accessed here: `http://localhost:8123/play`. @@ -56,7 +58,7 @@ Connection: Close Content-Type: text/tab-separated-values; charset=UTF-8 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f -X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334,"peak_memory_usage":"0"} 1 ``` @@ -286,9 +288,9 @@ Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence: ``` text -X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"} +X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"} +X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"} +X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"} ``` Possible header fields: @@ -323,6 +325,27 @@ $ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000& Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client-side, the error can only be detected at the parsing stage. +## HTTP response codes caveats {#http_response_codes_caveats} + +Because of limitation of HTTP protocol, HTTP 200 response code does not guarantee that a query was successful. + +Here is an example: + +``` +curl -v -Ss "http://localhost:8123/?max_block_size=1&query=select+sleepEachRow(0.001),throwIf(number=2)from+numbers(5)" +* Trying 127.0.0.1:8123... +... +< HTTP/1.1 200 OK +... +Code: 395. DB::Exception: Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(equals(number, 2) :: 1) -> throwIf(equals(number, 2)) +``` + +The reason for this behavior is the nature of the HTTP protocol. The HTTP header is sent first with an HTTP code of 200, followed by the HTTP body, and then the error is injected into the body as plain text. +This behavior is independent of the format used, whether it's `Native`, `TSV`, or `JSON`; the error message will always be in the middle of the response stream. +You can mitigate this problem by enabling `wait_end_of_query=1` ([Response Buffering](#response-buffering)). In this case, the sending of the HTTP header is delayed until the entire query is resolved. +However, this does not completely solve the problem because the result must still fit within the `http_response_buffer_size`, and other settings like `send_progress_in_http_headers` can interfere with the delay of the header. +The only way to catch all errors is to analyze the HTTP body before parsing it using the required format. + ### Queries with Parameters {#cli-queries-with-parameters} You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](../interfaces/cli.md#cli-queries-with-parameters). @@ -416,7 +439,7 @@ $ curl -v 'http://localhost:8123/predefined_query' < X-ClickHouse-Format: Template < X-ClickHouse-Timezone: Asia/Shanghai < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} < # HELP "Query" "Number of executing queries" # TYPE "Query" counter @@ -581,7 +604,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact Say Hi!% @@ -621,7 +644,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' < Content-Type: text/plain; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact
% @@ -673,7 +696,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} < Absolute Path File * Connection #0 to host localhost left intact @@ -692,7 +715,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} < Relative Path File * Connection #0 to host localhost left intact diff --git a/docs/en/interfaces/images/mysql1.png b/docs/en/interfaces/images/mysql1.png new file mode 100644 index 00000000000..f5ac85b6e2c Binary files /dev/null and b/docs/en/interfaces/images/mysql1.png differ diff --git a/docs/en/interfaces/images/mysql2.png b/docs/en/interfaces/images/mysql2.png new file mode 100644 index 00000000000..7b999e41665 Binary files /dev/null and b/docs/en/interfaces/images/mysql2.png differ diff --git a/docs/en/interfaces/images/mysql3.png b/docs/en/interfaces/images/mysql3.png new file mode 100644 index 00000000000..be6cb963003 Binary files /dev/null and b/docs/en/interfaces/images/mysql3.png differ diff --git a/docs/en/interfaces/images/mysql4.png b/docs/en/interfaces/images/mysql4.png new file mode 100644 index 00000000000..3b5ce1e844d Binary files /dev/null and b/docs/en/interfaces/images/mysql4.png differ diff --git a/docs/en/interfaces/images/mysql5.png b/docs/en/interfaces/images/mysql5.png new file mode 100644 index 00000000000..fc026a8b753 Binary files /dev/null and b/docs/en/interfaces/images/mysql5.png differ diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index fab3ba42758..32c612dfa5f 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -6,7 +6,38 @@ sidebar_label: MySQL Interface # MySQL Interface -ClickHouse supports MySQL wire protocol. To enable the MySQL wire protocol, add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder: +ClickHouse supports the MySQL wire protocol. This allow tools that are MySQL-compatible to interact with ClickHouse seamlessly (e.g. [Looker Studio](../integrations/data-visualization/looker-studio-and-clickhouse.md)). + +## Enabling the MySQL Interface On ClickHouse Cloud + +:::note +The MySQL interface for ClickHouse Cloud is currently in private preview. Please contact support@clickhouse.com to enable this feature for your ClickHouse Cloud service. +::: + +1. After creating your ClickHouse Cloud Service, on the credentials screen, select the MySQL tab + +![Credentials screen - Prompt](./images/mysql1.png) + +2. Toggle the switch to enable the MySQL interface for this specific service. This will expose port `3306` for this service and prompt you with your MySQL connection screen that include your unique MySQL username. The password will be the same as the service's default user password. + +![Credentials screen - Enabled MySQL](./images/mysql2.png) + +Alternatively, in order to enable the MySQL interface for an existing service: + +1. Ensure your service is in `Running` state then click on the "View connection string" button for the service you want to enable the MySQL interface for + +![Connection screen - Prompt MySQL](./images/mysql3.png) + +2. Toggle the switch to enable the MySQL interface for this specific service. This will prompt you to enter the default password. + +![Connection screen - Prompt MySQL](./images/mysql4.png) + +3. After entering the password, you will get prompted the MySQL connection string for this service +![Connection screen - MySQL Enabled](./images/mysql5.png) + +## Enabling the MySQL Interface On Self-managed ClickHouse + +Add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d/` [folder](../operations/configuration-files): ``` xml @@ -20,7 +51,7 @@ Startup your ClickHouse server and look for a log message similar to the followi {} Application: Listening for MySQL compatibility protocol: 127.0.0.1:9004 ``` -## Connect mysql to ClickHouse +## Connect MySQL to ClickHouse The following command demonstrates how to connect the MySQL client `mysql` to ClickHouse: diff --git a/docs/en/interfaces/overview.md b/docs/en/interfaces/overview.md index 8f16dcf5f83..e60aff927c4 100644 --- a/docs/en/interfaces/overview.md +++ b/docs/en/interfaces/overview.md @@ -21,6 +21,11 @@ In most cases it is recommended to use an appropriate tool or library instead of - [ODBC driver](../interfaces/odbc.md) - [C++ client library](../interfaces/cpp.md) +ClickHouse server provides embedded visual interfaces for power users: + +- Play UI: open `/play` in the browser; +- Advanced Dashboard: open `/dashboard` in the browser; + There are also a wide range of third-party libraries for working with ClickHouse: - [Client libraries](../interfaces/third-party/client-libraries.md) diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index a4c7eb61e50..3022d5e05e9 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -228,8 +228,8 @@ For most input formats schema inference reads some data to determine its structu To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache. There are special settings that control this cache: -- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config. -- `schema_inference_use_cache_for_{file,s3,hdfs,url}` - allows turning on/off using cache for schema inference. These settings can be used in queries. +- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url/azure}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config. +- `schema_inference_use_cache_for_{file,s3,hdfs,url,azure}` - allows turning on/off using cache for schema inference. These settings can be used in queries. The schema of the file can be changed by modifying the data or by changing format settings. For this reason, the schema inference cache identifies the schema by file source, format name, used format settings, and the last modification time of the file. diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 3e1b1e84f5d..a9f1af93495 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -83,8 +83,8 @@ ClickHouse, Inc. does **not** maintain the tools and libraries listed below and - Python - [SQLAlchemy](https://www.sqlalchemy.org) - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - - [pandas](https://pandas.pydata.org) - - [pandahouse](https://github.com/kszucs/pandahouse) + - [PyArrow/Pandas](https://pandas.pydata.org) + - [Ibis](https://github.com/ibis-project/ibis) - PHP - [Doctrine](https://www.doctrine-project.org/) - [dbal-clickhouse](https://packagist.org/packages/friendsofdoctrine/dbal-clickhouse) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 62f931a76b4..687bd66056b 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -84,6 +84,8 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - `password` for the file on disk - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` - `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables + - `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family. + - `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD` ### Usage examples @@ -204,6 +206,55 @@ end_time: 2022-08-30 09:21:46 1 row in set. Elapsed: 0.002 sec. ``` +Along with `system.backups` table, all backup and restore operations are also tracked in the system log table [backup_log](../operations/system-tables/backup_log.md): +``` +SELECT * +FROM system.backup_log +WHERE id = '7678b0b3-f519-4e6e-811f-5a0781a4eb52' +ORDER BY event_time_microseconds ASC +FORMAT Vertical +``` +```response +Row 1: +────── +event_date: 2023-08-18 +event_time_microseconds: 2023-08-18 11:13:43.097414 +id: 7678b0b3-f519-4e6e-811f-5a0781a4eb52 +name: Disk('backups', '1.zip') +status: CREATING_BACKUP +error: +start_time: 2023-08-18 11:13:43 +end_time: 1970-01-01 03:00:00 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +Row 2: +────── +event_date: 2023-08-18 +event_time_microseconds: 2023-08-18 11:13:43.174782 +id: 7678b0b3-f519-4e6e-811f-5a0781a4eb52 +name: Disk('backups', '1.zip') +status: BACKUP_FAILED +#highlight-next-line +error: Code: 598. DB::Exception: Backup Disk('backups', '1.zip') already exists. (BACKUP_ALREADY_EXISTS) (version 23.8.1.1) +start_time: 2023-08-18 11:13:43 +end_time: 2023-08-18 11:13:43 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +2 rows in set. Elapsed: 0.075 sec. +``` + ## Configuring BACKUP/RESTORE to use an S3 Endpoint To write backups to an S3 bucket you need three pieces of information: diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index d3e21cb2364..dfe62d591e3 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -65,6 +65,58 @@ XML substitution example: Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element. +## Encrypting and Hiding Configuration {#encryption} + +You can use symmetric encryption to encrypt a configuration element, for example, a plaintext password or private key. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encrypted_by` with the name of the encryption codec as value to the element to encrypt. + +Unlike attributes `from_zk`, `from_env` and `incl` (or element `include`), no substitution, i.e. decryption of the encrypted value, is performed in the preprocessed file. Decryption happens only at runtime in the server process. + +Example: + +```xml + + + + + 00112233445566778899aabbccddeeff + + + + + admin + 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 + + + +``` + +To encrypt a value, you can use the (example) program `encrypt_decrypt`: + +Example: + +``` bash +./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd +``` + +``` text +961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 +``` + +Even with encrypted configuration elements, encrypted elements still appear in the preprocessed configuration file. If this is a problem for your ClickHouse deployment, we suggest two alternatives: either set file permissions of the preprocessed file to 600 or use the `hide_in_preprocessed` attribute. + +Example: + +```xml + + + + admin + secret + + + +``` + ## User Settings {#user-settings} The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`. @@ -104,12 +156,17 @@ Here you can see default config written in YAML: [config.yaml.example](https://g There are some differences between YAML and XML formats in terms of ClickHouse configurations. Here are some tips for writing a configuration in YAML format. -You should use a Scalar node to write a key-value pair: +An XML tag with a text value is represented by a YAML key-value pair ``` yaml key: value ``` -To create a node, containing other nodes you should use a Map: +Corresponding XML: +``` xml +value +``` + +A nested XML node is represented by a YAML map: ``` yaml map_key: key1: val1 @@ -117,7 +174,16 @@ map_key: key3: val3 ``` -To create a list of values or nodes assigned to one tag you should use a Sequence: +Corresponding XML: +``` xml + + val1 + val2 + val3 + +``` + +To create the same XML tag multiple times, use a YAML sequence: ``` yaml seq_key: - val1 @@ -128,8 +194,22 @@ seq_key: key3: val5 ``` -If you want to write an attribute for a Sequence or Map node, you should use a @ prefix before the attribute key. Note, that @ is reserved by YAML standard, so you should also to wrap it into double quotes: +Corresponding XML: +```xml +val1 +val2 + + val3 + + + + val4 + val5 + + +``` +To provide an XML attribute, you can use an attribute key with a `@` prefix. Note that `@` is reserved by YAML standard, so must be wrapped in double quotes: ``` yaml map: "@attr1": value1 @@ -137,16 +217,14 @@ map: key: 123 ``` -From that Map we will get these XML nodes: - +Corresponding XML: ``` xml 123 ``` -You can also set attributes for Sequence: - +It is also possible to use attributes in YAML sequence: ``` yaml seq: - "@attr1": value1 @@ -155,13 +233,25 @@ seq: - abc ``` -So, we can get YAML config equal to this XML one: - +Corresponding XML: ``` xml 123 abc ``` +The aforementioned syntax does not allow to express XML text nodes with XML attributes as YAML. This special case can be achieved using an +`#text` attribute key: +```yaml +map_key: + "@attr1": value1 + "#text": value2 +``` + +Corresponding XML: +```xml +value2 +``` + ## Implementation Details {#implementation-details} For each config file, the server also generates `file-preprocessed.xml` files when starting. These files contain all the completed substitutions and overrides, and they are intended for informational use. If ZooKeeper substitutions were used in the config files but ZooKeeper is not available on the server start, the server loads the configuration from the preprocessed file. diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index 02f52b6f8bf..6ed72152c1e 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -169,7 +169,6 @@ host = '127.0.0.1', port = 3306, database = 'test', connection_pool_size = 8, -on_duplicate_clause = 1, replace_query = 1 ``` @@ -185,7 +184,6 @@ replace_query = 1 3306 test 8 - 1 1 diff --git a/docs/en/operations/optimizing-performance/profile-guided-optimization.md b/docs/en/operations/optimizing-performance/profile-guided-optimization.md new file mode 100644 index 00000000000..cda21e3c604 --- /dev/null +++ b/docs/en/operations/optimizing-performance/profile-guided-optimization.md @@ -0,0 +1,26 @@ +--- +slug: /en/operations/optimizing-performance/profile-guided-optimization +sidebar_position: 54 +sidebar_label: Profile Guided Optimization (PGO) +--- +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md'; + +# Profile Guided Optimization + +Profile-Guided Optimization (PGO) is a compiler optimization technique where a program is optimized based on the runtime profile. + +According to the tests, PGO helps with achieving better performance for ClickHouse. According to the tests, we see improvements up to 15% in QPS on the ClickBench test suite. The more detailed results are available [here](https://pastebin.com/xbue3HMU). The performance benefits depend on your typical workload - you can get better or worse results. + +More information about PGO in ClickHouse you can read in the corresponding GitHub [issue](https://github.com/ClickHouse/ClickHouse/issues/44567). + +## How to build ClickHouse with PGO? + +There are two major kinds of PGO: [Instrumentation](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) and [Sampling](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) (also known as AutoFDO). In this guide is described the Instrumentation PGO with ClickHouse. + +1. Build ClickHouse in Instrumented mode. In Clang it can be done via passing `-fprofile-instr-generate` option to `CXXFLAGS`. +2. Run instrumented ClickHouse on a sample workload. Here you need to use your usual workload. One of the approaches could be using [ClickBench](https://github.com/ClickHouse/ClickBench) as a sample workload. ClickHouse in the instrumentation mode could work slowly so be ready for that and do not run instrumented ClickHouse in performance-critical environments. +3. Recompile ClickHouse once again with `-fprofile-instr-use` compiler flags and profiles that are collected from the previous step. + +A more detailed guide on how to apply PGO is in the Clang [documentation](https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization). + +If you are going to collect a sample workload directly from a production environment, we recommend trying to use Sampling PGO. diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md index f5d0e5d6aed..9988bfc44bc 100644 --- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md @@ -11,6 +11,8 @@ ClickHouse runs sampling profiler that allows analyzing query execution. Using p Query profiler is automatically enabled in ClickHouse Cloud and you can run a sample query as follows +:::note If you are running the following query in ClickHouse Cloud, make sure to change `FROM system.trace_log` to `FROM clusterAllReplicas(default, system.trace_log)` to select from all nodes of the cluster ::: + ``` sql SELECT count(), diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index bfa51650cd8..d0b785d8fda 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -61,9 +61,12 @@ use_query_cache = true`) but one should keep in mind that all `SELECT` queries i may return cached results then. The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table -`system.query_cache`. The number of query cache hits and misses are shown as events "QueryCacheHits" and "QueryCacheMisses" in system table -`system.events`. Both counters are only updated for `SELECT` queries which run with setting "use_query_cache = true". Other queries do not -affect the cache miss counter. +`system.query_cache`. The number of query cache hits and misses since database start are shown as events "QueryCacheHits" and +"QueryCacheMisses" in system table [system.events](system-tables/events.md). Both counters are only updated for `SELECT` queries which run +with setting `use_query_cache = true`, other queries do not affect "QueryCacheMisses". Field `query_log_usage` in system table +[system.query_log](system-tables/query_log.md) shows for each executed query whether the query result was written into or read from the +query cache. Asynchronous metrics "QueryCacheEntries" and "QueryCacheBytes" in system table +[system.asynchronous_metrics](system-tables/asynchronous_metrics.md) show how many entries / bytes the query cache currently contains. The query cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can be changed (see below) but doing so is not recommended for security reasons. diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index a6ae517e401..7d0ab494926 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -217,23 +217,61 @@ Type: UInt32 Default: 1024 +## index_mark_cache_policy + +Index mark cache policy name. + +Type: String + +Default: SLRU + ## index_mark_cache_size Size of cache for index marks. Zero means disabled. +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 0 +## index_mark_cache_size_ratio + +The size of the protected queue in the index mark cache relative to the cache's total size. + +Type: Double + +Default: 0.5 + +## index_uncompressed_cache_policy + +Index uncompressed cache policy name. + +Type: String + +Default: SLRU ## index_uncompressed_cache_size Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled. +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 0 +## index_uncompressed_cache_size_ratio + +The size of the protected queue in the index uncompressed cache relative to the cache's total size. + +Type: Double + +Default: 0.5 ## io_thread_pool_queue_size @@ -255,10 +293,22 @@ Default: SLRU Size of cache for marks (index of MergeTree family of tables). +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 5368709120 +## mark_cache_size_ratio + +The size of the protected queue in the mark cache relative to the cache's total size. + +Type: Double + +Default: 0.5 + ## max_backup_bandwidth_for_server The maximum read speed in bytes per second for all backups on server. Zero means unlimited. @@ -288,7 +338,7 @@ Default: 1000 Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited. :::note -These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. ::: Type: UInt64 @@ -300,7 +350,7 @@ Default: 0 Limit on total number of concurrent insert queries. Zero means Unlimited. :::note -These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. ::: Type: UInt64 @@ -312,7 +362,7 @@ Default: 0 Limit on total number of concurrently select queries. Zero means Unlimited. :::note -These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. ::: Type: UInt64 @@ -456,6 +506,10 @@ Sets the cache size (in bytes) for mapped files. This setting allows avoiding fr Note that the amount of data in mapped files does not consume memory directly and is not accounted for in query or server memory usage — because this memory can be discarded similar to the OS page cache. The cache is dropped (the files are closed) automatically on the removal of old parts in tables of the MergeTree family, also it can be dropped manually by the `SYSTEM DROP MMAP CACHE` query. +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 1000 @@ -512,7 +566,7 @@ Both the cache for `local_disk`, and temporary data will be stored in `/tiny_loc cache local_disk /tiny_local_cache/ - 10M + 10M 1M 1 0 @@ -605,10 +659,22 @@ There is one shared cache for the server. Memory is allocated on demand. The cac The uncompressed cache is advantageous for very short queries in individual cases. +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 0 +## uncompressed_cache_size_ratio + +The size of the protected queue in the uncompressed cache relative to the cache's total size. + +Type: Double + +Default: 0.5 + ## builtin_dictionaries_reload_interval {#builtin-dictionaries-reload-interval} The interval in seconds before reloading built-in dictionaries. @@ -1592,6 +1658,10 @@ To manually turn on metrics history collection [`system.metric_log`](../../opera metric_log
7500 1000 + 1048576 + 8192 + 524288 + false
``` @@ -1636,7 +1706,7 @@ Keys for server/client settings: - verificationMode (default: relaxed) – The method for checking the node’s certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: `none`, `relaxed`, `strict`, `once`. - verificationDepth (default: 9) – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. - loadDefaultCAFile (default: true) – Wether built-in CA certificates for OpenSSL will be used. ClickHouse assumes that builtin CA certificates are in the file `/etc/ssl/cert.pem` (resp. the directory `/etc/ssl/certs`) or in file (resp. directory) specified by the environment variable `SSL_CERT_FILE` (resp. `SSL_CERT_DIR`). -- cipherList (default: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`) - Supported OpenSSL encryptions. +- cipherList (default: `ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH`) - Supported OpenSSL encryptions. - cacheSessions (default: false) – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. Acceptable values: `true`, `false`. - sessionIdContext (default: `${application.name}`) – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: `${application.name}`. - sessionCacheSize (default: [1024\*20](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1978)) – The maximum number of sessions that the server caches. A value of 0 means unlimited sessions. @@ -1695,6 +1765,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1706,6 +1784,10 @@ Use the following parameters to configure logging: part_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1773,6 +1855,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1786,6 +1876,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the query_log
Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1831,6 +1925,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size_rows, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1844,6 +1946,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the query_thread_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1861,6 +1967,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1874,6 +1988,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the query_views_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1890,6 +2008,14 @@ Parameters: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1901,13 +2027,16 @@ Parameters: system text_log
7500 + 1048576 + 8192 + 524288 + false Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day ``` - ## trace_log {#server_configuration_parameters-trace_log} Settings for the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation. @@ -1920,6 +2049,12 @@ Parameters: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1931,6 +2066,10 @@ The default server configuration file `config.xml` contains the following settin trace_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1945,9 +2084,18 @@ Parameters: - `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) **Example** + ```xml @@ -1955,11 +2103,94 @@ Parameters: asynchronous_insert_log
7500 toYYYYMM(event_date) + 1048576 + 8192 + 524288 + false
``` +## crash_log {#server_configuration_parameters-crash_log} + +Settings for the [crash_log](../../operations/system-tables/crash-log.md) system table operation. + +Parameters: + +- `database` — Database for storing a table. +- `table` — Table name. +- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined. +- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. +- `storage_policy` – Name of storage policy to use for the table (optional) +- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). + +The default server configuration file `config.xml` contains the following settings section: + +``` xml + + system + crash_log
+ toYYYYMM(event_date) + 7500 + 1024 + 1024 + 512 + false +
+``` + +## backup_log {#server_configuration_parameters-backup_log} + +Settings for the [backup_log](../../operations/system-tables/backup_log.md) system table for logging `BACKUP` and `RESTORE` operations. + +Parameters: + +- `database` — Database name. +- `table` — Table name. +- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` is defined. +- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` is defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` is defined. +- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. +- `storage_policy` – Name of storage policy to use for the table (optional). +- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md#settings) that control the behavior of the MergeTree (optional). + +**Example** + +```xml + + + system + backup_log
+ 1000 + toYYYYMM(event_date) + 1048576 + 8192 + 524288 + false + +
+
+``` + ## query_masking_rules {#query-masking-rules} Regexp-based rules, which will be applied to queries as well as all log messages before storing them in server logs, @@ -2164,6 +2395,8 @@ This section contains the following parameters: - `session_timeout_ms` — Maximum timeout for the client session in milliseconds. - `operation_timeout_ms` — Maximum timeout for one operation in milliseconds. - `root` — The [znode](http://zookeeper.apache.org/doc/r3.5.5/zookeeperOver.html#Nodes+and+ephemeral+nodes) that is used as the root for znodes used by the ClickHouse server. Optional. +- `fallback_session_lifetime.min` - If the first zookeeper host resolved by zookeeper_load_balancing strategy is unavailable, limit the lifetime of a zookeeper session to the fallback node. This is done for load-balancing purposes to avoid excessive load on one of zookeeper hosts. This setting sets the minimal duration of the fallback session. Set in seconds. Optional. Default is 3 hours. +- `fallback_session_lifetime.max` - If the first zookeeper host resolved by zookeeper_load_balancing strategy is unavailable, limit the lifetime of a zookeeper session to the fallback node. This is done for load-balancing purposes to avoid excessive load on one of zookeeper hosts. This setting sets the maximum duration of the fallback session. Set in seconds. Optional. Default is 6 hours. - `identity` — User and password, that can be required by ZooKeeper to give access to requested znodes. Optional. - zookeeper_load_balancing - Specifies the algorithm of ZooKeeper node selection. * random - randomly selects one of ZooKeeper nodes. diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index 6863d7f3191..86d24c3a942 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -7,6 +7,10 @@ pagination_next: en/operations/settings/settings # Settings Overview +:::note +XML-based Settings Profiles and [configuration files](https://clickhouse.com/docs/en/operations/configuration-files) are currently not supported for ClickHouse Cloud. To specify settings for your ClickHouse Cloud service, you must use [SQL-driven Settings Profiles](https://clickhouse.com/docs/en/operations/access-rights#settings-profiles-management). +::: + There are two main groups of ClickHouse settings: - Global server settings diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 4122b4af40f..e746719b629 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -56,11 +56,11 @@ Possible values: - Any positive integer. -Default value: 300. +Default value: 3000. To achieve maximum performance of `SELECT` queries, it is necessary to minimize the number of parts processed, see [Merge Tree](../../development/architecture.md#merge-tree). -You can set a larger value to 600 (1200), this will reduce the probability of the `Too many parts` error, but at the same time `SELECT` performance might degrade. Also in case of a merge issue (for example, due to insufficient disk space) you will notice it later than it could be with the original 300. +Prior to 23.6 this setting was set to 300. You can set a higher different value, it will reduce the probability of the `Too many parts` error, but at the same time `SELECT` performance might degrade. Also in case of a merge issue (for example, due to insufficient disk space) you will notice it later than it could be with the original 300. ## parts_to_delay_insert {#parts-to-delay-insert} @@ -71,7 +71,7 @@ Possible values: - Any positive integer. -Default value: 150. +Default value: 1000. ClickHouse artificially executes `INSERT` longer (adds ‘sleep’) so that the background merge process can merge parts faster than they are added. @@ -623,6 +623,19 @@ Possible values: Default value: false +## number_of_free_entries_in_pool_to_execute_optimize_entire_partition {#number_of_free_entries_in_pool_to_execute_optimize_entire_partition} + +When there is less than specified number of free entries in pool, do not execute optimizing entire partition in the background (this task generated when set `min_age_to_force_merge_seconds` and enable `min_age_to_force_merge_on_partition_only`). This is to leave free threads for regular merges and avoid "Too many parts". + +Possible values: + +- Positive integer. + +Default value: 25 + +The value of the `number_of_free_entries_in_pool_to_execute_optimize_entire_partition` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_pool_size) * [background_merges_mutations_concurrency_ratio](/docs/en/operations/server-configuration-parameters/settings.md/#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception. + + ## allow_floating_point_partition_key {#allow_floating_point_partition_key} Enables to allow floating-point number as a partition key. diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 163ed5d5826..14d612be831 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -298,7 +298,7 @@ Default value: `THROW`. - [JOIN clause](../../sql-reference/statements/select/join.md#select-join) - [Join table engine](../../engines/table-engines/special/join.md) -## max_partitions_per_insert_block {#max-partitions-per-insert-block} +## max_partitions_per_insert_block {#settings-max_partitions_per_insert_block} Limits the maximum number of partitions in a single inserted block. @@ -309,9 +309,18 @@ Default value: 100. **Details** -When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse throws an exception with the following text: +When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse either logs a warning or throws an exception based on `throw_on_max_partitions_per_insert_block`. Exceptions have the following text: -> “Too many partitions for single INSERT block (more than” + toString(max_parts) + “). The limit is controlled by ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).” +> “Too many partitions for a single INSERT block (`partitions_count` partitions, limit is ” + toString(max_partitions) + “). The limit is controlled by the ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).” + +## throw_on_max_partitions_per_insert_block {#settings-throw_on_max_partition_per_insert_block} + +Allows you to control behaviour when `max_partitions_per_insert_block` is reached. + +- `true` - When an insert block reaches `max_partitions_per_insert_block`, an exception is raised. +- `false` - Logs a warning when `max_partitions_per_insert_block` is reached. + +Default value: `true` ## max_temporary_data_on_disk_size_for_user {#settings_max_temporary_data_on_disk_size_for_user} @@ -327,3 +336,39 @@ The maximum amount of data consumed by temporary files on disk in bytes for all Zero means unlimited. Default value: 0. + +## max_sessions_for_user {#max-sessions-per-user} + +Maximum number of simultaneous sessions per authenticated user to the ClickHouse server. + +Example: + +``` xml + + + 1 + + + 2 + + + 0 + + + + + + single_session_user + + + + two_sessions_profile + + + + unlimited_sessions_profile + + +``` + +Default value: 0 (Infinite count of simultaneous sessions). diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index ee8e0d547b8..507f4dd23b9 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -321,6 +321,10 @@ If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` ar This parameter is useful when you are using formats that require a schema definition, such as [Cap’n Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format. +## output_format_schema {#output-format-schema} + +The path to the file where the automatically generated schema will be saved in [Cap’n Proto](../../interfaces/formats.md#capnproto-capnproto) or [Protobuf](../../interfaces/formats.md#protobuf-protobuf) formats. + ## output_format_enable_streaming {#output_format_enable_streaming} Enable streaming in output formats that support it. @@ -623,6 +627,30 @@ Column type should be String. If value is empty, default names `row_{i}`will be Default value: ''. +### input_format_json_compact_allow_variable_number_of_columns {#input_format_json_compact_allow_variable_number_of_columns} + +Allow variable number of columns in rows in JSONCompact/JSONCompactEachRow input formats. +Ignore extra columns in rows with more columns than expected and treat missing columns as default values. + +Disabled by default. + +### output_format_markdown_escape_special_characters {#output_format_markdown_escape_special_characters} + +When enabled, escape special characters in Markdown. + +[Common Mark](https://spec.commonmark.org/0.30/#example-12) defines the following special characters that can be escaped by \: + +``` +! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ +``` + +Possible values: + ++ 0 — Disable. ++ 1 — Enable. + +Default value: 0. + ## TSV format settings {#tsv-format-settings} ### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default} @@ -760,6 +788,13 @@ When enabled, trailing empty lines at the end of TSV file will be skipped. Disabled by default. +### input_format_tsv_allow_variable_number_of_columns {#input_format_tsv_allow_variable_number_of_columns} + +Allow variable number of columns in rows in TSV input format. +Ignore extra columns in rows with more columns than expected and treat missing columns as default values. + +Disabled by default. + ## CSV format settings {#csv-format-settings} ### format_csv_delimiter {#format_csv_delimiter} @@ -951,9 +986,11 @@ Result ```text " string " ``` + ### input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns} -ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. +Allow variable number of columns in rows in CSV input format. +Ignore extra columns in rows with more columns than expected and treat missing columns as default values. Disabled by default. @@ -1108,17 +1145,6 @@ Default value: 1. ## Arrow format settings {#arrow-format-settings} -### input_format_arrow_import_nested {#input_format_arrow_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_arrow_case_insensitive_column_matching {#input_format_arrow_case_insensitive_column_matching} Ignore case when matching Arrow column names with ClickHouse column names. @@ -1164,21 +1190,10 @@ Enabled by default. Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed) -Default value: `none`. +Default value: `lz4_frame`. ## ORC format settings {#orc-format-settings} -### input_format_orc_import_nested {#input_format_orc_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_orc_row_batch_size {#input_format_orc_row_batch_size} Batch size when reading ORC stripes. @@ -1217,17 +1232,6 @@ Default value: `none`. ## Parquet format settings {#parquet-format-settings} -### input_format_parquet_import_nested {#input_format_parquet_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_parquet_case_insensitive_column_matching {#input_format_parquet_case_insensitive_column_matching} Ignore case when matching Parquet column names with ClickHouse column names. @@ -1252,6 +1256,12 @@ Allow skipping columns with unsupported types while schema inference for format Disabled by default. +### input_format_parquet_local_file_min_bytes_for_seek {#input_format_parquet_local_file_min_bytes_for_seek} + +min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format. + +Default value - `8192`. + ### output_format_parquet_string_as_string {#output_format_parquet_string_as_string} Use Parquet String type instead of Binary for String columns. @@ -1330,6 +1340,11 @@ When serializing Nullable columns with Google wrappers, serialize default values Disabled by default. +### format_protobuf_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema} + +Use autogenerated Protobuf schema when [format_schema](#formatschema-format-schema) is not set. +The schema is generated from ClickHouse table structure using function [structureToProtobufSchema](../../sql-reference/functions/other-functions.md#structure_to_protobuf_schema) + ## Avro format settings {#avro-format-settings} ### input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields} @@ -1589,6 +1604,13 @@ When enabled, trailing empty lines at the end of file in CustomSeparated format Disabled by default. +### input_format_custom_allow_variable_number_of_columns {#input_format_custom_allow_variable_number_of_columns} + +Allow variable number of columns in rows in CustomSeparated input format. +Ignore extra columns in rows with more columns than expected and treat missing columns as default values. + +Disabled by default. + ## Regexp format settings {#regexp-format-settings} ### format_regexp_escaping_rule {#format_regexp_escaping_rule} @@ -1626,6 +1648,11 @@ Possible values: Default value: `'by_values'`. +### format_capn_proto_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema} + +Use autogenerated CapnProto schema when [format_schema](#formatschema-format-schema) is not set. +The schema is generated from ClickHouse table structure using function [structureToCapnProtoSchema](../../sql-reference/functions/other-functions.md#structure_to_capnproto_schema) + ## MySQLDump format settings {#musqldump-format-settings} ### input_format_mysql_dump_table_name (#input_format_mysql_dump_table_name) diff --git a/docs/en/operations/settings/settings-profiles.md b/docs/en/operations/settings/settings-profiles.md index 2f39a75453c..d08266b8ef8 100644 --- a/docs/en/operations/settings/settings-profiles.md +++ b/docs/en/operations/settings/settings-profiles.md @@ -39,7 +39,7 @@ Example: 8
- + 1000000000 100000000000 @@ -67,6 +67,8 @@ Example: 50 100 + 4 + 1 diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 580b51a984d..a1989ab3d13 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -98,6 +98,18 @@ Default value: 0. ``` +## mutations_execute_nondeterministic_on_initiator {#mutations_execute_nondeterministic_on_initiator} + +If true constant nondeterministic functions (e.g. function `now()`) are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. It helps to keep data in sync on replicas while executing mutations with constant nondeterministic functions. Default value: `false`. + +## mutations_execute_subqueries_on_initiator {#mutations_execute_subqueries_on_initiator} + +If true scalar subqueries are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. Default value: `false`. + +## mutations_max_literal_size_to_replace {#mutations_max_literal_size_to_replace} + +The maximum size of serialized literal in bytes to replace in `UPDATE` and `DELETE` queries. Takes effect only if at least one the two settings above is enabled. Default value: 16384 (16 KiB). + ## distributed_product_mode {#distributed-product-mode} Changes the behaviour of [distributed subqueries](../../sql-reference/operators/in.md). @@ -2371,6 +2383,23 @@ See also: - [optimize_functions_to_subcolumns](#optimize-functions-to-subcolumns) +## optimize_count_from_files {#optimize_count_from_files} + +Enables or disables the optimization of counting number of rows from files in different input formats. It applies to table functions/engines `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`. + +Possible values: + +- 0 — Optimization disabled. +- 1 — Optimization enabled. + +Default value: `1`. + +## use_cache_for_count_from_files {#use_cache_for_count_from_files} + +Enables caching of rows number during count from files in table functions `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`. + +Enabled by default. + ## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life} - Type: seconds @@ -3468,6 +3497,12 @@ Possible values: Default value: `0`. +## enable_url_encoding {#enable_url_encoding} + +Allows to enable/disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables. + +Enabled by default. + ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously} Adds a modifier `SYNC` to all `DROP` and `DETACH` queries. @@ -4292,7 +4327,7 @@ Use this setting only for backward compatibility if your use cases depend on old ## session_timezone {#session_timezone} Sets the implicit time zone of the current session or query. -The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone. +The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone. The setting takes precedence over the globally configured (server-level) implicit time zone. A value of '' (empty string) means that the implicit time zone of the current session or query is equal to the [server time zone](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone). @@ -4327,7 +4362,7 @@ SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zuric ``` :::warning -Not all functions that parse DateTime/DateTime64 respect `session_timezone`. This can lead to subtle errors. +Not all functions that parse DateTime/DateTime64 respect `session_timezone`. This can lead to subtle errors. See the following example and explanation. ::: @@ -4572,3 +4607,63 @@ Type: Int64 Default: 0 +## rewrite_count_distinct_if_with_count_distinct_implementation + +Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#settings-count_distinct_implementation) setting. + +Possible values: + +- true — Allow. +- false — Disallow. + +Default value: `false`. + +## precise_float_parsing {#precise_float_parsing} + +Switches [Float32/Float64](../../sql-reference/data-types/float.md) parsing algorithms: +* If the value is `1`, then precise method is used. It is slower than fast method, but it always returns a number that is the closest machine representable number to the input. +* Otherwise, fast method is used (default). It usually returns the same value as precise, but in rare cases result may differ by one or two least significant digits. + +Possible values: `0`, `1`. + +Default value: `0`. + +Example: + +```sql +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 0; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7090999999999998 │ 15008753.000000002 │ +└─────────────────────┴──────────────────────────┘ + +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 1; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7091 │ 15008753 │ +└─────────────────────┴──────────────────────────┘ +``` + +## partial_result_update_duration_ms + +Interval (in milliseconds) for sending updates with partial data about the result table to the client (in interactive mode) during query execution. Setting to 0 disables partial results. Only supported for single-threaded GROUP BY without key, ORDER BY, LIMIT and OFFSET. + +## max_rows_in_partial_result + +Maximum rows to show in the partial result after every real-time update while the query runs (use partial result limit + OFFSET as a value in case of OFFSET in the query). + +## validate_tcp_client_information {#validate-tcp-client-information} + +Determines whether validation of client information enabled when query packet is received from a client using a TCP connection. + +If `true`, an exception will be thrown on invalid client information from the TCP client. + +If `false`, the data will not be validated. The server will work with clients of all versions. + +The default value is `false`. + +**Example** + +``` xml +true +``` diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index fe6e8e15b0c..fc2933aa2cf 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -114,7 +114,11 @@ Example of disk configuration: ## Using local cache {#using-local-cache} -It is possible to configure local cache over disks in storage configuration starting from version 22.3. For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. Cache uses `LRU` cache policy. +It is possible to configure local cache over disks in storage configuration starting from version 22.3. +For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. +For versions >= 23.5 cache is supported only for remote disk types: S3, Azure, HDFS. +Cache uses `LRU` cache policy. + Example of configuration for versions later or equal to 22.8: diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index f357341da67..e46b495239c 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -32,6 +32,10 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 └─────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` + + ## Metric descriptions @@ -483,6 +487,14 @@ The value is similar to `OSUserTime` but divided to the number of CPU cores to b Number of threads in the server of the PostgreSQL compatibility protocol. +### QueryCacheBytes + +Total size of the query cache cache in bytes. + +### QueryCacheEntries + +Total number of entries in the query cache. + ### ReplicasMaxAbsoluteDelay Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data. diff --git a/docs/en/operations/system-tables/backup_log.md b/docs/en/operations/system-tables/backup_log.md new file mode 100644 index 00000000000..7e088fcad94 --- /dev/null +++ b/docs/en/operations/system-tables/backup_log.md @@ -0,0 +1,145 @@ +--- +slug: /en/operations/system-tables/backup_log +--- +# backup_log + +Contains logging entries with the information about `BACKUP` and `RESTORE` operations. + +Columns: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the entry. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the entry with microseconds precision. +- `id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the backup or restore operation. +- `name` ([String](../../sql-reference/data-types/string.md)) — Name of the backup storage (the contents of the `FROM` or `TO` clause). +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Operation status. Possible values: + - `'CREATING_BACKUP'` + - `'BACKUP_CREATED'` + - `'BACKUP_FAILED'` + - `'RESTORING'` + - `'RESTORED'` + - `'RESTORE_FAILED'` +- `error` ([String](../../sql-reference/data-types/string.md)) — Error message of the failed operation (empty string for successful operations). +- `start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of the operation. +- `end_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time of the operation. +- `num_files` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of files stored in the backup. +- `total_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total size of files stored in the backup. +- `num_entries` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of entries in the backup, i.e. the number of files inside the folder if the backup is stored as a folder, or the number of files inside the archive if the backup is stored as an archive. It is not the same as `num_files` if it's an incremental backup or if it contains empty files or duplicates. The following is always true: `num_entries <= num_files`. +- `uncompressed_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Uncompressed size of the backup. +- `compressed_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Compressed size of the backup. If the backup is not stored as an archive it equals to `uncompressed_size`. +- `files_read` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of files read during the restore operation. +- `bytes_read` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total size of files read during the restore operation. + +**Example** + +```sql +BACKUP TABLE test_db.my_table TO Disk('backups_disk', '1.zip') +``` +```response +┌─id───────────────────────────────────┬─status─────────┐ +│ e5b74ecb-f6f1-426a-80be-872f90043885 │ BACKUP_CREATED │ +└──────────────────────────────────────┴────────────────┘ +``` +```sql +SELECT * FROM system.backup_log WHERE id = 'e5b74ecb-f6f1-426a-80be-872f90043885' ORDER BY event_date, event_time_microseconds \G +``` +```response +Row 1: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:05:21.998566 +id: e5b74ecb-f6f1-426a-80be-872f90043885 +name: Disk('backups_disk', '1.zip') +status: CREATING_BACKUP +error: +start_time: 2023-08-19 11:05:21 +end_time: 1970-01-01 03:00:00 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +Row 2: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:08:56.916192 +id: e5b74ecb-f6f1-426a-80be-872f90043885 +name: Disk('backups_disk', '1.zip') +status: BACKUP_CREATED +error: +start_time: 2023-08-19 11:05:21 +end_time: 2023-08-19 11:08:56 +num_files: 57 +total_size: 4290364870 +num_entries: 46 +uncompressed_size: 4290362365 +compressed_size: 3525068304 +files_read: 0 +bytes_read: 0 +``` +```sql +RESTORE TABLE test_db.my_table FROM Disk('backups_disk', '1.zip') +``` +```response +┌─id───────────────────────────────────┬─status───┐ +│ cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 │ RESTORED │ +└──────────────────────────────────────┴──────────┘ +``` +```sql +SELECT * FROM system.backup_log WHERE id = 'cdf1f731-52ef-42da-bc65-2e1bfcd4ce90' ORDER BY event_date, event_time_microseconds \G +``` +```response +Row 1: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:09:19.718077 +id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 +name: Disk('backups_disk', '1.zip') +status: RESTORING +error: +start_time: 2023-08-19 11:09:19 +end_time: 1970-01-01 03:00:00 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +Row 2: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:09:29.334234 +id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 +name: Disk('backups_disk', '1.zip') +status: RESTORED +error: +start_time: 2023-08-19 11:09:19 +end_time: 2023-08-19 11:09:29 +num_files: 57 +total_size: 4290364870 +num_entries: 46 +uncompressed_size: 4290362365 +compressed_size: 4290362365 +files_read: 57 +bytes_read: 4290364870 +``` + +This is essentially the same information that is written in the system table `system.backups`: + +```sql +SELECT * FROM system.backups ORDER BY start_time +``` +```response +┌─id───────────────────────────────────┬─name──────────────────────────┬─status─────────┬─error─┬──────────start_time─┬────────────end_time─┬─num_files─┬─total_size─┬─num_entries─┬─uncompressed_size─┬─compressed_size─┬─files_read─┬─bytes_read─┐ +│ e5b74ecb-f6f1-426a-80be-872f90043885 │ Disk('backups_disk', '1.zip') │ BACKUP_CREATED │ │ 2023-08-19 11:05:21 │ 2023-08-19 11:08:56 │ 57 │ 4290364870 │ 46 │ 4290362365 │ 3525068304 │ 0 │ 0 │ +│ cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 │ Disk('backups_disk', '1.zip') │ RESTORED │ │ 2023-08-19 11:09:19 │ 2023-08-19 11:09:29 │ 57 │ 4290364870 │ 46 │ 4290362365 │ 4290362365 │ 57 │ 4290364870 │ +└──────────────────────────────────────┴───────────────────────────────┴────────────────┴───────┴─────────────────────┴─────────────────────┴───────────┴────────────┴─────────────┴───────────────────┴─────────────────┴────────────┴────────────┘ +``` + +**See Also** + +- [Backup and Restore](../../operations/backup.md) diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md index deb9a0aaeb3..2659f80e338 100644 --- a/docs/en/operations/system-tables/clusters.md +++ b/docs/en/operations/system-tables/clusters.md @@ -23,6 +23,7 @@ Columns: - `database_shard_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database). - `database_replica_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database). - `is_active` ([Nullable(UInt8)](../../sql-reference/data-types/int-uint.md)) — The status of the `Replicated` database replica (for clusters that belong to a `Replicated` database): 1 means "replica is online", 0 means "replica is offline", `NULL` means "unknown". +- `name` ([String](../../sql-reference/data-types/string.md)) - An alias to cluster. **Example** diff --git a/docs/en/operations/system-tables/distributed_ddl_queue.md b/docs/en/operations/system-tables/distributed_ddl_queue.md index 8cccf946621..a552fd548a8 100644 --- a/docs/en/operations/system-tables/distributed_ddl_queue.md +++ b/docs/en/operations/system-tables/distributed_ddl_queue.md @@ -8,17 +8,21 @@ Contains information about [distributed ddl queries (ON CLUSTER clause)](../../s Columns: - `entry` ([String](../../sql-reference/data-types/string.md)) — Query id. -- `host_name` ([String](../../sql-reference/data-types/string.md)) — Hostname. -- `host_address` ([String](../../sql-reference/data-types/string.md)) — IP address that the Hostname resolves to. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Host Port. -- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the query. +- `entry_version` ([Nullable(UInt8)](../../sql-reference/data-types/int-uint.md)) - Version of the entry +- `initiator_host` ([Nullable(String)](../../sql-reference/data-types/string.md)) - Host that initiated the DDL operation +- `initiator_port` ([Nullable(UInt16)](../../sql-reference/data-types/int-uint.md)) - Port used by the initiator - `cluster` ([String](../../sql-reference/data-types/string.md)) — Cluster name. - `query` ([String](../../sql-reference/data-types/string.md)) — Query executed. -- `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query. -- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time. +- `settings` ([Map(String, String)](../../sql-reference/data-types/map.md)) - Settings used in the DDL operation +- `query_create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query created time. +- `host` ([String](../../sql-reference/data-types/string.md)) — Hostname +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Host Port. +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the query. +- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code. +- `exception_text` ([Nullable(String)](../../sql-reference/data-types/string.md)) - Exception message - `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time. - `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration of query execution (in milliseconds). -- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ClickHouse Keeper](../../operations/tips.md#zookeeper). + **Example** @@ -34,32 +38,38 @@ Query id: f544e72a-6641-43f1-836b-24baa1c9632a Row 1: ────── entry: query-0000000000 -host_name: clickhouse01 -host_address: 172.23.0.11 -port: 9000 -status: Finished +entry_version: 5 +initiator_host: clickhouse01 +initiator_port: 9000 cluster: test_cluster query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster -initiator: clickhouse01:9000 -query_start_time: 2020-12-30 13:07:51 -query_finish_time: 2020-12-30 13:07:51 -query_duration_ms: 6 -exception_code: ZOK +settings: {'max_threads':'16','use_uncompressed_cache':'0'} +query_create_time: 2023-09-01 16:15:14 +host: clickhouse-01 +port: 9000 +status: Finished +exception_code: 0 +exception_text: +query_finish_time: 2023-09-01 16:15:14 +query_duration_ms: 154 Row 2: ────── -entry: query-0000000000 -host_name: clickhouse02 -host_address: 172.23.0.12 -port: 9000 -status: Finished +entry: query-0000000001 +entry_version: 5 +initiator_host: clickhouse01 +initiator_port: 9000 cluster: test_cluster query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster -initiator: clickhouse01:9000 -query_start_time: 2020-12-30 13:07:51 -query_finish_time: 2020-12-30 13:07:51 -query_duration_ms: 6 -exception_code: ZOK +settings: {'max_threads':'16','use_uncompressed_cache':'0'} +query_create_time: 2023-09-01 16:15:14 +host: clickhouse-01 +port: 9000 +status: Finished +exception_code: 630 +exception_text: Code: 630. DB::Exception: Cannot drop or rename test_db, because some tables depend on it: +query_finish_time: 2023-09-01 16:15:14 +query_duration_ms: 154 2 rows in set. Elapsed: 0.025 sec. ``` diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md index ba5602ee292..d2b90a49b0d 100644 --- a/docs/en/operations/system-tables/events.md +++ b/docs/en/operations/system-tables/events.md @@ -10,6 +10,9 @@ Columns: - `event` ([String](../../sql-reference/data-types/string.md)) — Event name. - `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred. - `description` ([String](../../sql-reference/data-types/string.md)) — Event description. +- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `event`. + +You can find all supported events in source file [src/Common/ProfileEvents.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ProfileEvents.cpp). **Example** diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md index 1b720098fc7..df42f80275e 100644 --- a/docs/en/operations/system-tables/index.md +++ b/docs/en/operations/system-tables/index.md @@ -23,7 +23,7 @@ System tables: Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start. -Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md) and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one. +Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md), [text_log](../../operations/system-tables/text_log.md) and [backup_log](../../operations/system-tables/backup_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one. System log tables can be customized by creating a config file with the same name as the table under `/etc/clickhouse-server/config.d/`, or setting corresponding elements in `/etc/clickhouse-server/config.xml`. Elements can be customized are: @@ -32,7 +32,7 @@ System log tables can be customized by creating a config file with the same name - `partition_by`: specify [PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) expression. - `ttl`: specify table [TTL](../../sql-reference/statements/alter/ttl.md) expression. - `flush_interval_milliseconds`: interval of flushing data to disk. -- `engine`: provide full engine expression (starting with `ENGINE =` ) with parameters. This option is contradict with `partition_by` and `ttl`. If set together, the server would raise an exception and exit. +- `engine`: provide full engine expression (starting with `ENGINE =` ) with parameters. This option conflicts with `partition_by` and `ttl`. If set together, the server will raise an exception and exit. An example: @@ -47,6 +47,10 @@ An example: ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024 --> 7500 + 1048576 + 8192 + 524288 + false ``` diff --git a/docs/en/operations/system-tables/information_schema.md b/docs/en/operations/system-tables/information_schema.md index 35fd3a753b5..ee03441b9c1 100644 --- a/docs/en/operations/system-tables/information_schema.md +++ b/docs/en/operations/system-tables/information_schema.md @@ -3,12 +3,13 @@ slug: /en/operations/system-tables/information_schema --- # INFORMATION_SCHEMA -`INFORMATION_SCHEMA` (`information_schema`) is a system database that contains views. Using these views, you can get information about the metadata of database objects. These views read data from the columns of the [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) and [system.tables](../../operations/system-tables/tables.md) system tables. - -The structure and composition of system tables may change in different versions of the product, but the support of the `information_schema` makes it possible to change the structure of system tables without changing the method of access to metadata. Metadata requests do not depend on the DBMS used. +`INFORMATION_SCHEMA` (or: `information_schema`) is a system database which provides a (somewhat) standardized, [DBMS-agnostic view](https://en.wikipedia.org/wiki/Information_schema) on metadata of database objects. The views in `INFORMATION_SCHEMA` are generally inferior to normal system tables but tools can use them to obtain basic information in a cross-DBMS manner. The structure and content of views in `INFORMATION_SCHEMA` is supposed to evolves in a backwards-compatible way, i.e. only new functionality is added but existing functionality is not changed or removed. In terms of internal implementation, views in `INFORMATION_SCHEMA` usually map to to normal system tables like [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) and [system.tables](../../operations/system-tables/tables.md). ``` sql SHOW TABLES FROM INFORMATION_SCHEMA; + +-- or: +SHOW TABLES FROM information_schema; ``` ``` text @@ -17,6 +18,10 @@ SHOW TABLES FROM INFORMATION_SCHEMA; │ SCHEMATA │ │ TABLES │ │ VIEWS │ +│ columns │ +│ schemata │ +│ tables │ +│ views │ └──────────┘ ``` @@ -27,6 +32,8 @@ SHOW TABLES FROM INFORMATION_SCHEMA; - [TABLES](#tables) - [VIEWS](#views) +Case-insensitive equivalent views, e.g. `INFORMATION_SCHEMA.columns` are provided for reasons of compatibility with other databases. + ## COLUMNS {#columns} Contains columns read from the [system.columns](../../operations/system-tables/columns.md) system table and columns that are not supported in ClickHouse or do not make sense (always `NULL`), but must be by the standard. diff --git a/docs/en/operations/system-tables/kafka_consumers.md b/docs/en/operations/system-tables/kafka_consumers.md new file mode 100644 index 00000000000..7e28a251e26 --- /dev/null +++ b/docs/en/operations/system-tables/kafka_consumers.md @@ -0,0 +1,58 @@ +--- +slug: /en/operations/system-tables/kafka_consumers +--- +# kafka_consumers + +Contains information about Kafka consumers. +Applicable for [Kafka table engine](../../engines/table-engines/integrations/kafka) (native ClickHouse integration) + +Columns: + +- `database` (String) - database of the table with Kafka Engine. +- `table` (String) - name of the table with Kafka Engine. +- `consumer_id` (String) - Kafka consumer identifier. Note, that a table can have many consumers. Specified by `kafka_num_consumers` parameter. +- `assignments.topic` (Array(String)) - Kafka topic. +- `assignments.partition_id` (Array(Int32)) - Kafka partition id. Note, that only one consumer can be assigned to a partition. +- `assignments.current_offset` (Array(Int64)) - current offset. +- `exceptions.time`, (Array(DateTime)) - timestamp when the 10 most recent exceptions were generated. +- `exceptions.text`, (Array(String)) - text of 10 most recent exceptions. +- `last_poll_time`, (DateTime) - timestamp of the most recent poll. +- `num_messages_read`, (UInt64) - number of messages read by the consumer. +- `last_commit_time`, (DateTime) - timestamp of the most recent poll. +- `num_commits`, (UInt64) - total number of commits for the consumer. +- `last_rebalance_time`, (DateTime) - timestamp of the most recent Kafka rebalance +- `num_rebalance_revocations`, (UInt64) - number of times the consumer was revoked its partitions +- `num_rebalance_assignments`, (UInt64) - number of times the consumer was assigned to Kafka cluster +- `is_currently_used`, (UInt8) - consumer is in use +- `rdkafka_stat` (String) - library internal statistic. See https://github.com/ClickHouse/librdkafka/blob/master/STATISTICS.md . Set `statistics_interval_ms` to 0 disable, default is 3000 (once in three seconds). + +Example: + +``` sql +SELECT * +FROM system.kafka_consumers +FORMAT Vertical +``` + +``` text +Row 1: +────── +database: test +table: kafka +consumer_id: ClickHouse-instance-test-kafka-1caddc7f-f917-4bb1-ac55-e28bd103a4a0 +assignments.topic: ['system_kafka_cons'] +assignments.partition_id: [0] +assignments.current_offset: [18446744073709550615] +exceptions.time: [] +exceptions.text: [] +last_poll_time: 2006-11-09 18:47:47 +num_messages_read: 4 +last_commit_time: 2006-11-10 04:39:40 +num_commits: 1 +last_rebalance_time: 1970-01-01 00:00:00 +num_rebalance_revocations: 0 +num_rebalance_assignments: 1 +is_currently_used: 1 +rdkafka_stat: {...} + +``` diff --git a/docs/en/operations/system-tables/licenses.md b/docs/en/operations/system-tables/licenses.md index 0f09d559d8b..c436c0ade3b 100644 --- a/docs/en/operations/system-tables/licenses.md +++ b/docs/en/operations/system-tables/licenses.md @@ -1,7 +1,7 @@ --- slug: /en/operations/system-tables/licenses --- -# licenses +# licenses Contains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources. @@ -20,21 +20,10 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15 ``` text ┌─library_name───────┬─license_type─┬─license_path────────────────────────┐ -│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │ -│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │ -│ avro │ Apache │ /contrib/avro/LICENSE.txt │ │ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │ -│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │ -│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │ -│ aws │ Apache │ /contrib/aws/LICENSE.txt │ -│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │ -│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │ +│ base64 │ BSD 2-clause │ /contrib/aklomp-base64/LICENSE │ │ brotli │ MIT │ /contrib/brotli/LICENSE │ -│ capnproto │ MIT │ /contrib/capnproto/LICENSE │ -│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │ -│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │ -│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │ -│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │ +│ [...] │ [...] │ [...] │ └────────────────────┴──────────────┴─────────────────────────────────────┘ ``` diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 5a7dfd03eb4..ae0e7620d35 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -10,8 +10,9 @@ Columns: - `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. - `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value. - `description` ([String](../../sql-reference/data-types/string.md)) — Metric description. +- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `metric`. -The list of supported metrics you can find in the [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) source file of ClickHouse. +You can find all supported metrics in source file [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp). **Example** diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index b9fdd19c643..ced97166702 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -48,7 +48,7 @@ Columns: - `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends it’s `read_rows` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. - `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends it’s `read_bytes` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. - `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. -- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes (uncompressed). For other queries, the column value is 0. - `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query. - `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result. - `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query. @@ -101,7 +101,8 @@ Columns: - `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/map.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events) - `Settings` ([Map(String, String)](../../sql-reference/data-types/map.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. - `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined. -- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. +- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. These threads may not have run simultaneously. +- `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — Maximum count of simultaneous threads executing the query. - `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution. - `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution. - `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution. @@ -111,6 +112,11 @@ Columns: - `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution. - `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. - `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. +- `query_cache_usage` ([Enum8](../../sql-reference/data-types/enum.md)) — Usage of the [query cache](../query-cache.md) during query execution. Values: + - `'Unknown'` = Status unknown. + - `'None'` = The query result was neither written into nor read from the query cache. + - `'Write'` = The query result was written into the query cache. + - `'Read'` = The query result was read from the query cache. **Example** @@ -186,6 +192,7 @@ used_formats: [] used_functions: [] used_storages: [] used_table_functions: [] +query_cache_usage: None ``` **See Also** diff --git a/docs/en/operations/system-tables/scheduler.md b/docs/en/operations/system-tables/scheduler.md new file mode 100644 index 00000000000..168bfe708c4 --- /dev/null +++ b/docs/en/operations/system-tables/scheduler.md @@ -0,0 +1,64 @@ +--- +slug: /en/operations/system-tables/scheduler +--- +# scheduler + +Contains information and status for [scheduling nodes](/docs/en/operations/workload-scheduling.md/#hierarchy) residing on the local server. +This table can be used for monitoring. The table contains a row for every scheduling node. + +Example: + +``` sql +SELECT * +FROM system.scheduler +WHERE resource = 'network_read' AND path = '/prio/fair/prod' +FORMAT Vertical +``` + +``` text +Row 1: +────── +resource: network_read +path: /prio/fair/prod +type: fifo +weight: 5 +priority: 0 +is_active: 0 +active_children: 0 +dequeued_requests: 67 +dequeued_cost: 4692272 +busy_periods: 63 +vruntime: 938454.1999999989 +system_vruntime: ᴺᵁᴸᴸ +queue_length: 0 +queue_cost: 0 +budget: -60524 +is_satisfied: ᴺᵁᴸᴸ +inflight_requests: ᴺᵁᴸᴸ +inflight_cost: ᴺᵁᴸᴸ +max_requests: ᴺᵁᴸᴸ +max_cost: ᴺᵁᴸᴸ +``` + +Columns: + +- `resource` (`String`) - Resource name +- `path` (`String`) - Path to a scheduling node within this resource scheduling hierarchy +- `type` (`String`) - Type of a scheduling node. +- `weight` (`Float64`) - Weight of a node, used by a parent node of `fair`` type. +- `priority` (`Int64`) - Priority of a node, used by a parent node of 'priority' type (Lower value means higher priority). +- `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied. +- `active_children` (`UInt64`) - The number of children in active state. +- `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node. +- `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node. +- `busy_periods` (`UInt64`) - The total number of deactivations of this node. +- `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner. +- `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`. +- `queue_length` (`Nullable(UInt64)`) - For `fifo` nodes only. Current number of resource requests residing in the queue. +- `queue_cost` (`Nullable(UInt64)`) - For `fifo` nodes only. Sum of costs (e.g. size in bytes) of all requests residing in the queue. +- `budget` (`Nullable(Int64)`) - For `fifo` nodes only. The number of available "cost units" for new resource requests. Can appear in case of discrepancy of estimated and real costs of resource requests (e.g. after read/write failure) +- `is_satisfied` (`Nullable(UInt8)`) - For constraint nodes only (e.g. `inflight_limit`). Equals `1` if all the constraint of this node are satisfied. +- `inflight_requests` (`Nullable(Int64)`) - For `inflight_limit` nodes only. The number of resource requests dequeued from this node, that are currently in consumption state. +- `inflight_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. The sum of costs (e.g. bytes) of all resource requests dequeued from this node, that are currently in consumption state. +- `max_requests` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_requests` leading to constraint violation. +- `max_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_cost` leading to constraint violation. diff --git a/docs/en/operations/utilities/clickhouse-copier.md b/docs/en/operations/utilities/clickhouse-copier.md index a9b82404b90..473c1c628d1 100644 --- a/docs/en/operations/utilities/clickhouse-copier.md +++ b/docs/en/operations/utilities/clickhouse-copier.md @@ -43,7 +43,7 @@ Parameters: - `config` — The path to the `keeper.xml` file with the parameters for the connection to ClickHouse Keeper. - `task-path` — The path to the ClickHouse Keeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`. - `task-file` — Optional path to file with task configuration for initial upload to ClickHouse Keeper. -- `task-upload-force` — Force upload `task-file` even if node already exists. +- `task-upload-force` — Force upload `task-file` even if node already exists. Default is false. - `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched. ## Format of keeper.xml {#format-of-zookeeper-xml} diff --git a/docs/en/operations/utilities/clickhouse-disks.md b/docs/en/operations/utilities/clickhouse-disks.md new file mode 100644 index 00000000000..76db9e41836 --- /dev/null +++ b/docs/en/operations/utilities/clickhouse-disks.md @@ -0,0 +1,38 @@ +--- +slug: /en/operations/utilities/clickhouse-disks +sidebar_position: 59 +sidebar_label: clickhouse-disks +--- + +# clickhouse-disks + +A utility providing filesystem-like operations for ClickHouse disks. + +Program-wide options: + +* `--config-file, -C` -- path to ClickHouse config, defaults to `/etc/clickhouse-server/config.xml`. +* `--save-logs` -- Log progress of invoked commands to `/var/log/clickhouse-server/clickhouse-disks.log`. +* `--log-level` -- What [type](../server-configuration-parameters/settings#server_configuration_parameters-logger) of events to log, defaults to `none`. +* `--disk` -- what disk to use for `mkdir, move, read, write, remove` commands. Defaults to `default`. + +## Commands + +* `copy [--disk-from d1] [--disk-to d2] `. + Recursively copy data from `FROM_PATH` at disk `d1` (defaults to `disk` value if not provided) + to `TO_PATH` at disk `d2` (defaults to `disk` value if not provided). +* `move `. + Move file or directory from `FROM_PATH` to `TO_PATH`. +* `remove `. + Remove `PATH` recursively. +* `link `. + Create a hardlink from `FROM_PATH` to `TO_PATH`. +* `list [--recursive] ...` + List files at `PATH`s. Non-recursive by default. +* `list-disks`. + List disks names. +* `mkdir [--recursive] `. + Create a directory. Non-recursive by default. +* `read: []` + Read a file from `FROM_PATH` to `TO_PATH` (`stdout` if not supplied). +* `write [FROM_PATH] `. + Write a file from `FROM_PATH` (`stdin` if not supplied) to `TO_PATH`. diff --git a/docs/en/operations/utilities/clickhouse-keeper-client.md b/docs/en/operations/utilities/clickhouse-keeper-client.md index 77f816fe428..42fe4fd196e 100644 --- a/docs/en/operations/utilities/clickhouse-keeper-client.md +++ b/docs/en/operations/utilities/clickhouse-keeper-client.md @@ -11,18 +11,20 @@ A client application to interact with clickhouse-keeper by its native protocol. - `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode. - `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. -- `-p N`, `--port=N` — Server port. Default value: 2181 +- `-p N`, `--port=N` — Server port. Default value: 9181 - `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s. - `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s. - `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s. - `--history-file=FILE_PATH` — Set path of history file. Default value: `~/.keeper-client-history`. +- `--log-level=LEVEL` — Set log level. Default value: `information`. +- `--no-confirmation` — If set, will not require a confirmation on several commands. Default value `false` for interactive and `true` for query - `--help` — Shows the help message. ## Example {#clickhouse-keeper-client-example} ```bash -./clickhouse-keeper-client -h localhost:2181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30 -Connected to ZooKeeper at [::1]:2181 with session_id 137 +./clickhouse-keeper-client -h localhost:9181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30 +Connected to ZooKeeper at [::1]:9181 with session_id 137 / :) ls keeper foo bar / :) cd keeper @@ -44,10 +46,18 @@ keeper foo bar - `ls [path]` -- Lists the nodes for the given path (default: cwd) - `cd [path]` -- Change the working path (default `.`) +- `exists ` -- Returns `1` if node exists, `0` otherwise - `set [version]` -- Updates the node's value. Only update if version matches (default: -1) -- `create ` -- Creates new node +- `create [mode]` -- Creates new node with the set value +- `touch ` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists - `get ` -- Returns the node's value - `remove ` -- Remove the node - `rmr ` -- Recursively deletes path. Confirmation required - `flwc ` -- Executes four-letter-word command - `help` -- Prints this message +- `get_stat [path]` -- Returns the node's stat (default `.`) +- `find_super_nodes [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`) +- `delete_stale_backups` -- Deletes ClickHouse nodes used for backups that are now inactive +- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10) +- `sync ` -- Synchronizes node between processes and leader +- `reconfig "" [version]` -- Reconfigure Keeper cluster. See https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper#reconfiguration diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index 0443a80cf17..c863282efc1 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -30,11 +30,17 @@ curl https://clickhouse.com/ | sh The binary you just downloaded can run all sorts of ClickHouse tools and utilities. If you want to run ClickHouse as a database server, check out the [Quick Start](../../quick-start.mdx). ::: -## Query data in a CSV file using SQL +## Query data in a file using SQL {#query_data_in_file} A common use of `clickhouse-local` is to run ad-hoc queries on files: where you don't have to insert the data into a table. `clickhouse-local` can stream the data from a file into a temporary table and execute your SQL. -If the file is sitting on the same machine as `clickhouse-local`, use the `file` table engine. The following `reviews.tsv` file contains a sampling of Amazon product reviews: +If the file is sitting on the same machine as `clickhouse-local`, you can simple specify the file to load. The following `reviews.tsv` file contains a sampling of Amazon product reviews: + +```bash +./clickhouse local -q "SELECT * FROM 'reviews.tsv'" +``` + +This command is a shortcut of: ```bash ./clickhouse local -q "SELECT * FROM file('reviews.tsv')" @@ -51,6 +57,19 @@ The `file` table function creates a table, and you can use `DESCRIBE` to see the ./clickhouse local -q "DESCRIBE file('reviews.tsv')" ``` +:::tip +You are allowed to use globs in file name (See [glob substitutions](/docs/en/sql-reference/table-functions/file.md/#globs-in-path)). + +Examples: + +```bash +./clickhouse local -q "SELECT * FROM 'reviews*.jsonl'" +./clickhouse local -q "SELECT * FROM 'review_?.csv'" +./clickhouse local -q "SELECT * FROM 'review_{1..3}.csv'" +``` + +::: + ```response marketplace Nullable(String) customer_id Nullable(Int64) @@ -183,8 +202,8 @@ Arguments: - `-S`, `--structure` — table structure for input data. - `--input-format` — input format, `TSV` by default. - `-f`, `--file` — path to data, `stdin` by default. -- `-q`, `--query` — queries to execute with `;` as delimiter. Cannot be used simultaneously with `--queries-file`. -- `--queries-file` - file path with queries to execute. Cannot be used simultaneously with `--query`. +- `-q`, `--query` — queries to execute with `;` as delimiter. `--query` can be specified multiple times, e.g. `--query "SELECT 1" --query "SELECT 2"`. Cannot be used simultaneously with `--queries-file`. +- `--queries-file` - file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--query queries1.sql --query queries2.sql`. Cannot be used simultaneously with `--query`. - `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`. - `-N`, `--table` — table name where to put output data, `table` by default. - `--format`, `--output-format` — output format, `TSV` by default. diff --git a/docs/en/operations/utilities/index.md b/docs/en/operations/utilities/index.md index 112a51cfa97..5667f99b6fa 100644 --- a/docs/en/operations/utilities/index.md +++ b/docs/en/operations/utilities/index.md @@ -13,4 +13,6 @@ pagination_next: 'en/operations/utilities/clickhouse-copier' - [clickhouse-format](../../operations/utilities/clickhouse-format.md) — Enables formatting input queries. - [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — Obfuscates data. - [ClickHouse compressor](../../operations/utilities/clickhouse-compressor.md) — Compresses and decompresses data. +- [clickhouse-disks](../../operations/utilities/clickhouse-disks.md) -- Provides filesystem-like operations + on files among different ClickHouse disks. - [clickhouse-odbc-bridge](../../operations/utilities/odbc-bridge.md) — A proxy server for ODBC driver. diff --git a/docs/en/operations/workload-scheduling.md b/docs/en/operations/workload-scheduling.md new file mode 100644 index 00000000000..6ed6ced41b4 --- /dev/null +++ b/docs/en/operations/workload-scheduling.md @@ -0,0 +1,153 @@ +--- +slug: /en/operations/workload-scheduling +sidebar_position: 69 +sidebar_label: "Workload scheduling" +title: "Workload scheduling" +--- + +When ClickHouse execute multiple queries simultaneously, they may be using shared resources (e.g. disks). Scheduling constraints and policies can be applied to regulate how resources are utilized and shared between different workloads. For every resource a scheduling hierarchy can be configured. Hierarchy root represents a resource, while leafs are queues, holding requests that exceed resource capacity. + +:::note +Currently only remote disk IO can be scheduled using described method. For CPU scheduling see settings about thread pools and [`concurrent_threads_soft_limit_num`](server-configuration-parameters/settings.md#concurrent_threads_soft_limit_num). For flexible memory limits see [Memory overcommit](settings/memory-overcommit.md) +::: + +## Disk configuration {#disk-config} + +To enable IO scheduling for a specific disk, you have to specify `read_resource` and/or `write_resource` in storage configuration. It says ClickHouse what resource should be used for every read and write requests with given disk. Read and write resource can refer to the same resource name, which is useful for local SSDs or HDDs. Multiple different disks also can refer to the same resource, which is useful for remote disks: if you want to be able to allow fair division of network bandwidth between e.g. "production" and "development" workloads. + +Example: +```xml + + + ... + + + s3 + https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/ + your_access_key_id + your_secret_access_key + network_read + network_write + + + + + +
+ s3 +
+
+
+
+
+
+``` + +## Workload markup {#workload_markup} + +Queries can be marked with setting `workload` to distinguish different workloads. If `workload` is not set, than value "default" is used. Note that you are able to specify the other value using settings profiles. Setting constraints can be used to make `workload` constant if you want all queries from the user to be marked with fixed value of `workload` setting. + +Let's consider an example of a system with two different workloads: "production" and "development". + +```sql +SELECT count() FROM my_table WHERE value = 42 SETTINGS workload = 'production' +SELECT count() FROM my_table WHERE value = 13 SETTINGS workload = 'development' +``` + +## Resource scheduling hierarchy {#hierarchy} + +From the standpoint of scheduling subsystem a resource represents a hierarchy of scheduling nodes. + +```mermaid +graph TD + subgraph network_read + nr_root(("/")) + -->|100 concurrent requests| nr_fair("fair") + -->|75% bandwidth| nr_prod["prod"] + nr_fair + -->|25% bandwidth| nr_dev["dev"] + end + + subgraph network_write + nw_root(("/")) + -->|100 concurrent requests| nw_fair("fair") + -->|75% bandwidth| nw_prod["prod"] + nw_fair + -->|25% bandwidth| nw_dev["dev"] + end +``` + +**Possible node types:** +* `inflight_limit` (constraint) - blocks if either number of concurrent in-flight requests exceeds `max_requests`, or their total cost exceeds `max_cost`; must have a single child. +* `fair` (policy) - selects the next request to serve from one of its children nodes according to max-min fairness; children nodes can specify `weight` (default is 1). +* `priority` (policy) - selects the next request to serve from one of its children nodes according to static priorities (lower value means higher priority); children nodes can specify `priority` (default is 0). +* `fifo` (queue) - leaf of the hierarchy capable of holding requests that exceed resource capacity. + +The following example shows how to define IO scheduling hierarchies shown in the picture: + +```xml + + + + + inflight_limit + 100 + + + fair + + + fifo + 3 + + + fifo + + + + + inflight_limit + 100 + + + fair + + + fifo + 3 + + + fifo + + + + +``` + +## Workload classifiers {#workload_classifiers} + +Workload classifiers are used to define mapping from `workload` specified by a query into leaf-queues that should be used for specific resources. At the moment, workload classification is simple: only static mapping is available. + +Example: +```xml + + + + /fair/prod + /fair/prod + + + /fair/dev + /fair/dev + + + /fair/dev + /fair/dev + + + +``` + + +## See also + - [system.scheduler](/docs/en/operations/system-tables/scheduler.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index db19f524b31..f79fe66c05d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -12,3 +12,5 @@ To get a determinate result, you can use the ‘min’ or ‘max’ function ins In some cases, you can rely on the order of execution. This applies to cases when SELECT comes from a subquery that uses ORDER BY. When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function. + +- Alias: `any_value` diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md index 7ad7e37e5c2..0ddbf700bd3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md @@ -7,6 +7,10 @@ sidebar_position: 30 The result is equal to the square root of [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md). -:::note +Alias: +- `STD` +- `STDDEV_POP` + +:::note This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevPopStable` function. It works slower but provides a lower computational error. ::: \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md b/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md index 068725c4991..01484c2b02e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md @@ -7,6 +7,8 @@ sidebar_position: 31 The result is equal to the square root of [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md). -:::note +Alias: `STDDEV_SAMP`. + +:::note This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevSampStable` function. It works slower but provides a lower computational error. ::: \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md index 0a665c83e74..751688b0830 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -9,6 +9,8 @@ Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x In other words, dispersion for a set of values. Returns `Float64`. -:::note +Alias: `VAR_POP`. + +:::note This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error. ::: \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md index 76639d2d7a0..9b2b94936ec 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md @@ -11,6 +11,8 @@ It represents an unbiased estimate of the variance of a random variable if passe Returns `Float64`. When `n <= 1`, returns `+∞`. -:::note +Alias: `VAR_SAMP`. + +:::note This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error. ::: diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index fe279edb709..c99c8791542 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -140,8 +140,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse - [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) - [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) - [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format) +- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) diff --git a/docs/en/sql-reference/data-types/decimal.md b/docs/en/sql-reference/data-types/decimal.md index bba5ea74ebe..e082eb29fbd 100644 --- a/docs/en/sql-reference/data-types/decimal.md +++ b/docs/en/sql-reference/data-types/decimal.md @@ -4,15 +4,17 @@ sidebar_position: 42 sidebar_label: Decimal --- -# Decimal(P, S), Decimal32(S), Decimal64(S), Decimal128(S), Decimal256(S) +# Decimal, Decimal(P), Decimal(P, S), Decimal32(S), Decimal64(S), Decimal128(S), Decimal256(S) Signed fixed-point numbers that keep precision during add, subtract and multiply operations. For division least significant digits are discarded (not rounded). ## Parameters -- P - precision. Valid range: \[ 1 : 76 \]. Determines how many decimal digits number can have (including fraction). +- P - precision. Valid range: \[ 1 : 76 \]. Determines how many decimal digits number can have (including fraction). By default the precision is 10. - S - scale. Valid range: \[ 0 : P \]. Determines how many decimal digits fraction can have. +Decimal(P) is equivalent to Decimal(P, 0). Similarly, the syntax Decimal is equivalent to Decimal(10, 0). + Depending on P parameter value Decimal(P, S) is a synonym for: - P from \[ 1 : 9 \] - for Decimal32(S) - P from \[ 10 : 18 \] - for Decimal64(S) diff --git a/docs/en/sql-reference/data-types/geo.md b/docs/en/sql-reference/data-types/geo.md index 3b2787008d2..1d37b829dd5 100644 --- a/docs/en/sql-reference/data-types/geo.md +++ b/docs/en/sql-reference/data-types/geo.md @@ -26,9 +26,9 @@ SELECT p, toTypeName(p) FROM geo_point; Result: ``` text -┌─p─────┬─toTypeName(p)─┐ +┌─p───────┬─toTypeName(p)─┐ │ (10,10) │ Point │ -└───────┴───────────────┘ +└─────────┴───────────────┘ ``` ## Ring diff --git a/docs/en/sql-reference/data-types/tuple.md b/docs/en/sql-reference/data-types/tuple.md index dfe0eda2e21..3add754e6b6 100644 --- a/docs/en/sql-reference/data-types/tuple.md +++ b/docs/en/sql-reference/data-types/tuple.md @@ -4,7 +4,7 @@ sidebar_position: 54 sidebar_label: Tuple(T1, T2, ...) --- -# Tuple(t1, T2, …) +# Tuple(T1, T2, …) A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types). Tuple must contain at least one element. diff --git a/docs/en/sql-reference/data-types/uuid.md b/docs/en/sql-reference/data-types/uuid.md index b0f19f0d8be..40f756b9588 100644 --- a/docs/en/sql-reference/data-types/uuid.md +++ b/docs/en/sql-reference/data-types/uuid.md @@ -6,42 +6,42 @@ sidebar_label: UUID # UUID -A universally unique identifier (UUID) is a 16-byte number used to identify records. For detailed information about the UUID, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier). +A Universally Unique Identifier (UUID) is a 16-byte value used to identify records. For detailed information about UUIDs, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier). -The example of UUID type value is represented below: +While different UUID variants exist (see [here](https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis)), ClickHouse does not validate that inserted UUIDs conform to a particular variant. UUIDs are internally treated as a sequence of 16 random bytes with [8-4-4-4-12 representation](https://en.wikipedia.org/wiki/Universally_unique_identifier#Textual_representation) at SQL level. + +Example UUID value: ``` text 61f0c404-5cb3-11e7-907b-a6006ad3dba0 ``` -If you do not specify the UUID column value when inserting a new record, the UUID value is filled with zero: +The default UUID is all-zero. It is used, for example, when a new record is inserted but no value for a UUID column is specified: ``` text 00000000-0000-0000-0000-000000000000 ``` -## How to Generate +## Generating UUIDs -To generate the UUID value, ClickHouse provides the [generateUUIDv4](../../sql-reference/functions/uuid-functions.md) function. +ClickHouse provides the [generateUUIDv4](../../sql-reference/functions/uuid-functions.md) function to generate random UUID version 4 values. ## Usage Example **Example 1** -This example demonstrates creating a table with the UUID type column and inserting a value into the table. +This example demonstrates the creation of a table with a UUID column and the insertion of a value into the table. ``` sql CREATE TABLE t_uuid (x UUID, y String) ENGINE=TinyLog -``` -``` sql INSERT INTO t_uuid SELECT generateUUIDv4(), 'Example 1' -``` -``` sql SELECT * FROM t_uuid ``` +Result: + ``` text ┌────────────────────────────────────x─┬─y─────────┐ │ 417ddc5d-e556-4d27-95dd-a34d84e46a50 │ Example 1 │ @@ -50,13 +50,11 @@ SELECT * FROM t_uuid **Example 2** -In this example, the UUID column value is not specified when inserting a new record. +In this example, no UUID column value is specified when the record is inserted, i.e. the default UUID value is inserted: ``` sql INSERT INTO t_uuid (y) VALUES ('Example 2') -``` -``` sql SELECT * FROM t_uuid ``` diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index 6c3d80683db..80f728a33df 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -1092,7 +1092,7 @@ Types of sources (`source_type`): - [Local file](#local_file) - [Executable File](#executable) - [Executable Pool](#executable_pool) -- [HTTP(s)](#http) +- [HTTP(S)](#http) - DBMS - [ODBC](#odbc) - [MySQL](#mysql) @@ -1102,7 +1102,7 @@ Types of sources (`source_type`): - [Cassandra](#cassandra) - [PostgreSQL](#postgresql) -## Local File {#local_file} +### Local File {#local_file} Example of settings: @@ -1132,7 +1132,7 @@ When a dictionary with source `FILE` is created via DDL command (`CREATE DICTION - [Dictionary function](../../sql-reference/table-functions/dictionary.md#dictionary-function) -## Executable File {#executable} +### Executable File {#executable} Working with executable files depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data. @@ -1161,7 +1161,7 @@ Setting fields: That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node. -## Executable Pool {#executable_pool} +### Executable Pool {#executable_pool} Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts. @@ -1196,9 +1196,9 @@ Setting fields: That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node. -## Http(s) {#https} +### HTTP(S) {#https} -Working with an HTTP(s) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. +Working with an HTTP(S) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. Example of settings: @@ -1248,7 +1248,55 @@ Setting fields: When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server. -### Known Vulnerability of the ODBC Dictionary Functionality +### DBMS + +#### ODBC + +You can use this method to connect any database that has an ODBC driver. + +Example of settings: + +``` xml + + + DatabaseName + ShemaName.TableName
+ DSN=some_parameters + SQL_QUERY + SELECT id, value_1, value_2 FROM ShemaName.TableName +
+ +``` + +or + +``` sql +SOURCE(ODBC( + db 'DatabaseName' + table 'SchemaName.TableName' + connection_string 'DSN=some_parameters' + invalidate_query 'SQL_QUERY' + query 'SELECT id, value_1, value_2 FROM db_name.table_name' +)) +``` + +Setting fields: + +- `db` – Name of the database. Omit it if the database name is set in the `` parameters. +- `table` – Name of the table and schema if exists. +- `connection_string` – Connection string. +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `query` – The custom query. Optional parameter. + +:::note +The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared. +::: + +ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database. + +If you have a problems with encodings when using Oracle, see the corresponding [FAQ](/knowledgebase/oracle-odbc) item. + +##### Known Vulnerability of the ODBC Dictionary Functionality :::note When connecting to the database through the ODBC driver connection parameter `Servername` can be substituted. In this case values of `USERNAME` and `PASSWORD` from `odbc.ini` are sent to the remote server and can be compromised. @@ -1277,7 +1325,7 @@ SELECT * FROM odbc('DSN=gregtest;Servername=some-server.com', 'test_db'); ODBC driver will send values of `USERNAME` and `PASSWORD` from `odbc.ini` to `some-server.com`. -### Example of Connecting Postgresql +##### Example of Connecting Postgresql Ubuntu OS. @@ -1358,7 +1406,7 @@ LIFETIME(MIN 300 MAX 360) You may need to edit `odbc.ini` to specify the full path to the library with the driver `DRIVER=/usr/local/lib/psqlodbcw.so`. -### Example of Connecting MS SQL Server +##### Example of Connecting MS SQL Server Ubuntu OS. @@ -1462,55 +1510,7 @@ LAYOUT(FLAT()) LIFETIME(MIN 300 MAX 360) ``` -## DBMS - -### ODBC - -You can use this method to connect any database that has an ODBC driver. - -Example of settings: - -``` xml - - - DatabaseName - ShemaName.TableName
- DSN=some_parameters - SQL_QUERY - SELECT id, value_1, value_2 FROM ShemaName.TableName -
- -``` - -or - -``` sql -SOURCE(ODBC( - db 'DatabaseName' - table 'SchemaName.TableName' - connection_string 'DSN=some_parameters' - invalidate_query 'SQL_QUERY' - query 'SELECT id, value_1, value_2 FROM db_name.table_name' -)) -``` - -Setting fields: - -- `db` – Name of the database. Omit it if the database name is set in the `` parameters. -- `table` – Name of the table and schema if exists. -- `connection_string` – Connection string. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). -- `query` – The custom query. Optional parameter. - -:::note -The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared. -::: - -ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database. - -If you have a problems with encodings when using Oracle, see the corresponding [FAQ](/knowledgebase/oracle-odbc) item. - -### Mysql +#### Mysql Example of settings: @@ -1627,7 +1627,7 @@ SOURCE(MYSQL( )) ``` -### ClickHouse +#### ClickHouse Example of settings: @@ -1680,7 +1680,7 @@ Setting fields: The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. ::: -### Mongodb +#### Mongodb Example of settings: @@ -1723,7 +1723,7 @@ Setting fields: - `options` - MongoDB connection string options (optional parameter). -### Redis +#### Redis Example of settings: @@ -1756,7 +1756,7 @@ Setting fields: - `storage_type` – The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`. - `db_index` – The specific numeric index of Redis logical database. May be omitted, default value is 0. -### Cassandra +#### Cassandra Example of settings: @@ -1798,7 +1798,7 @@ Setting fields: The `column_family` or `where` fields cannot be used together with the `query` field. And either one of the `column_family` or `query` fields must be declared. ::: -### PostgreSQL +#### PostgreSQL Example of settings: @@ -1855,7 +1855,7 @@ Setting fields: The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. ::: -## Null +### Null A special source that can be used to create dummy (empty) dictionaries. Such dictionaries can useful for tests or with setups with separated data and query nodes at nodes with Distributed tables. @@ -2361,6 +2361,12 @@ Result: └────────────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────┘ ``` +#### Matching Modes + +Pattern matching behavior can be modified with certain dictionary settings: +- `regexp_dict_flag_case_insensitive`: Use case-insensitive matching (defaults to `false`). Can be overridden in individual expressions with `(?i)` and `(?-i)`. +- `regexp_dict_flag_dotall`: Allow '.' to match newline characters (defaults to `false`). + ### Use Regular Expression Tree Dictionary in ClickHouse Cloud Above used `YAMLRegExpTree` source works in ClickHouse Open Source but not in ClickHouse Cloud. To use regexp tree dictionaries in ClickHouse could, first create a regexp tree dictionary from a YAML file locally in ClickHouse Open Source, then dump this dictionary into a CSV file using the `dictionary` table function and the [INTO OUTFILE](../statements/select/into-outfile.md) clause. @@ -2476,52 +2482,3 @@ Dictionary updates (other than loading at first use) do not block queries. Durin We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server. There are also functions for working with OS identifiers and search engines, but they shouldn’t be used. - -## Embedded Dictionaries - - - -ClickHouse contains a built-in feature for working with a geobase. - -This allows you to: - -- Use a region’s ID to get its name in the desired language. -- Use a region’s ID to get the ID of a city, area, federal district, country, or continent. -- Check whether a region is part of another region. -- Get a chain of parent regions. - -All the functions support “translocality,” the ability to simultaneously use different perspectives on region ownership. For more information, see the section “Functions for working with web analytics dictionaries”. - -The internal dictionaries are disabled in the default package. -To enable them, uncomment the parameters `path_to_regions_hierarchy_file` and `path_to_regions_names_files` in the server configuration file. - -The geobase is loaded from text files. - -Place the `regions_hierarchy*.txt` files into the `path_to_regions_hierarchy_file` directory. This configuration parameter must contain the path to the `regions_hierarchy.txt` file (the default regional hierarchy), and the other files (`regions_hierarchy_ua.txt`) must be located in the same directory. - -Put the `regions_names_*.txt` files in the `path_to_regions_names_files` directory. - -You can also create these files yourself. The file format is as follows: - -`regions_hierarchy*.txt`: TabSeparated (no header), columns: - -- region ID (`UInt32`) -- parent region ID (`UInt32`) -- region type (`UInt8`): 1 - continent, 3 - country, 4 - federal district, 5 - region, 6 - city; other types do not have values -- population (`UInt32`) — optional column - -`regions_names_*.txt`: TabSeparated (no header), columns: - -- region ID (`UInt32`) -- region name (`String`) — Can’t contain tabs or line feeds, even escaped ones. - -A flat array is used for storing in RAM. For this reason, IDs shouldn’t be more than a million. - -Dictionaries can be updated without restarting the server. However, the set of available dictionaries is not updated. -For updates, the file modification times are checked. If a file has changed, the dictionary is updated. -The interval to check for changes is configured in the `builtin_dictionaries_reload_interval` parameter. -Dictionary updates (other than loading at first use) do not block queries. During updates, queries use the old versions of dictionaries. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries. - -We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server. - -There are also functions for working with OS identifiers and search engines, but they shouldn’t be used. diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 44d385312d0..dae8ff6d396 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -183,9 +183,8 @@ arrayConcat(arrays) **Arguments** - `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type. - **Example** - +**Example** ``` sql SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res @@ -1795,6 +1794,330 @@ Return value type is always [Float64](../../sql-reference/data-types/float.md). └─────┴──────────────────────────────────────────────────────────────────────────────────────────┘ ``` +## arrayRotateLeft + +Rotates an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements. +If the number of elements is negative, the array is rotated to the right. + +**Syntax** + +``` sql +arrayRotateLeft(arr, n) +``` + +**Arguments** + +- `arr` — [Array](../../sql-reference/data-types/array.md). +- `n` — Number of elements to rotate. + +**Returned value** + +- An array rotated to the left by the specified number of elements. + +Type: [Array](../../sql-reference/data-types/array.md). + +**Examples** + +Query: + +``` sql +SELECT arrayRotateLeft([1,2,3,4,5,6], 2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [3,4,5,6,1,2] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayRotateLeft([1,2,3,4,5,6], -2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [5,6,1,2,3,4] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayRotateLeft(['a','b','c','d','e'], 3) as res; +``` + +Result: + +``` text +┌─res───────────────────┐ +│ ['d','e','a','b','c'] │ +└───────────────────────┘ +``` + +## arrayRotateRight + +Rotates an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements. +If the number of elements is negative, the array is rotated to the left. + +**Syntax** + +``` sql +arrayRotateRight(arr, n) +``` + +**Arguments** + +- `arr` — [Array](../../sql-reference/data-types/array.md). +- `n` — Number of elements to rotate. + +**Returned value** + +- An array rotated to the right by the specified number of elements. + +Type: [Array](../../sql-reference/data-types/array.md). + +**Examples** + +Query: + +``` sql +SELECT arrayRotateRight([1,2,3,4,5,6], 2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [5,6,1,2,3,4] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayRotateRight([1,2,3,4,5,6], -2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [3,4,5,6,1,2] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayRotateRight(['a','b','c','d','e'], 3) as res; +``` + +Result: + +``` text +┌─res───────────────────┐ +│ ['c','d','e','a','b'] │ +└───────────────────────┘ +``` + +## arrayShiftLeft + +Shifts an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements. +New elements are filled with the provided argument or the default value of the array element type. +If the number of elements is negative, the array is shifted to the right. + +**Syntax** + +``` sql +arrayShiftLeft(arr, n[, default]) +``` + +**Arguments** + +- `arr` — [Array](../../sql-reference/data-types/array.md). +- `n` — Number of elements to shift. +- `default` — Optional. Default value for new elements. + +**Returned value** + +- An array shifted to the left by the specified number of elements. + +Type: [Array](../../sql-reference/data-types/array.md). + +**Examples** + +Query: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6], 2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [3,4,5,6,0,0] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6], -2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [0,0,1,2,3,4] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6], 2, 42) as res; +``` + +Result: + +``` text +┌─res─────────────┐ +│ [3,4,5,6,42,42] │ +└─────────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftLeft(['a','b','c','d','e','f'], 3, 'foo') as res; +``` + +Result: + +``` text +┌─res─────────────────────────────┐ +│ ['d','e','f','foo','foo','foo'] │ +└─────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res; +``` + +Result: + +``` text +┌─res─────────────────┐ +│ [3,4,5,6,4242,4242] │ +└─────────────────────┘ +``` + +## arrayShiftRight + +Shifts an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements. +New elements are filled with the provided argument or the default value of the array element type. +If the number of elements is negative, the array is shifted to the left. + +**Syntax** + +``` sql +arrayShiftRight(arr, n[, default]) +``` + +**Arguments** + +- `arr` — [Array](../../sql-reference/data-types/array.md). +- `n` — Number of elements to shift. +- `default` — Optional. Default value for new elements. + +**Returned value** + +- An array shifted to the right by the specified number of elements. + +Type: [Array](../../sql-reference/data-types/array.md). + +**Examples** + +Query: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6], 2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [0,0,1,2,3,4] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6], -2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [3,4,5,6,0,0] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6], 2, 42) as res; +``` + +Result: + +``` text +┌─res─────────────┐ +│ [42,42,1,2,3,4] │ +└─────────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftRight(['a','b','c','d','e','f'], 3, 'foo') as res; +``` + +Result: + +``` text +┌─res─────────────────────────────┐ +│ ['foo','foo','foo','a','b','c'] │ +└─────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res; +``` + +Result: + +``` text +┌─res─────────────────┐ +│ [4242,4242,1,2,3,4] │ +└─────────────────────┘ +``` + ## Distance functions All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 87d84425029..0a7be3142ee 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -237,6 +237,11 @@ type_samoa: DateTime('US/Samoa') int32samoa: 1546300800 ``` +**See Also** + +- [formatDateTime](#date_time_functions-formatDateTime) - supports non-constant timezone. +- [toString](type-conversion-functions.md#tostring) - supports non-constant timezone. + ## timeZoneOf Returns the timezone name of [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md) data types. @@ -720,6 +725,42 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d └────────────┴───────────┴───────────┴───────────┴───────────────┘ ``` +## toDaysSinceYearZero + +Returns for a given date, the number of days passed since [1 January 0000](https://en.wikipedia.org/wiki/Year_zero) in the [proleptic Gregorian calendar defined by ISO 8601](https://en.wikipedia.org/wiki/Gregorian_calendar#Proleptic_Gregorian_calendar). The calculation is the same as in MySQL's [`TO_DAYS()`](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_to-days) function. + +**Syntax** + +``` sql +toDaysSinceYearZero(date) +``` + +Aliases: `TO_DAYS` + +**Arguments** + +- `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md) or [Date32](../../sql-reference/data-types/date32.md). + +**Returned value** + +The number of days passed since date 0000-01-01. + +Type: [UInt32](../../sql-reference/data-types/int-uint.md). + +**Example** + +``` sql +SELECT toDaysSinceYearZero(toDate('2023-09-08')); +``` + +Result: + +``` text +┌─toDaysSinceYearZero(toDate('2023-09-08')))─┐ +│ 713569 │ +└────────────────────────────────────────────┘ +``` + ## age Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond. @@ -738,16 +779,16 @@ age('unit', startdate, enddate, [timezone]) - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: - - `microsecond` (possible abbreviations: `us`, `u`) - - `millisecond` (possible abbreviations: `ms`) - - `second` (possible abbreviations: `ss`, `s`) - - `minute` (possible abbreviations: `mi`, `n`) - - `hour` (possible abbreviations: `hh`, `h`) - - `day` (possible abbreviations: `dd`, `d`) - - `week` (possible abbreviations: `wk`, `ww`) - - `month` (possible abbreviations: `mm`, `m`) - - `quarter` (possible abbreviations: `qq`, `q`) - - `year` (possible abbreviations: `yyyy`, `yy`) + - `microsecond` `microseconds` `us` `u` + - `millisecond` `milliseconds` `ms` + - `second` `seconds` `ss` `s` + - `minute` `minutes` `mi` `n` + - `hour` `hours` `hh` `h` + - `day` `days` `dd` `d` + - `week` `weeks` `wk` `ww` + - `month` `months` `mm` `m` + - `quarter` `quarters` `qq` `q` + - `year` `years` `yyyy` `yy` - `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). @@ -815,16 +856,16 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: - - `microsecond` (possible abbreviations: `us`, `u`) - - `millisecond` (possible abbreviations: `ms`) - - `second` (possible abbreviations: `ss`, `s`) - - `minute` (possible abbreviations: `mi`, `n`) - - `hour` (possible abbreviations: `hh`, `h`) - - `day` (possible abbreviations: `dd`, `d`) - - `week` (possible abbreviations: `wk`, `ww`) - - `month` (possible abbreviations: `mm`, `m`) - - `quarter` (possible abbreviations: `qq`, `q`) - - `year` (possible abbreviations: `yyyy`, `yy`) + - `microsecond` `microseconds` `us` `u` + - `millisecond` `milliseconds` `ms` + - `second` `seconds` `ss` `s` + - `minute` `minutes` `mi` `n` + - `hour` `hours` `hh` `h` + - `day` `days` `dd` `d` + - `week` `weeks` `wk` `ww` + - `month` `months` `mm` `m` + - `quarter` `quarters` `qq` `q` + - `year` `years` `yyyy` `yy` - `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). @@ -1494,6 +1535,33 @@ Result: └─────────────────────────────────────────────────────────────────────┘ ``` +Additionally, the `formatDateTime` function can take a third String argument containing the name of the time zone. Example: `Asia/Istanbul`. In this case, the time is formatted according to the specified time zone. + +**Example** + +```sql +SELECT + now() AS ts, + time_zone, + formatDateTime(ts, '%T', time_zone) AS str_tz_time +FROM system.time_zones +WHERE time_zone LIKE 'Europe%' +LIMIT 10 + +┌──────────────────ts─┬─time_zone─────────┬─str_tz_time─┐ +│ 2023-09-08 19:13:40 │ Europe/Amsterdam │ 21:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Andorra │ 21:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Astrakhan │ 23:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Athens │ 22:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Belfast │ 20:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Belgrade │ 21:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Berlin │ 21:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Bratislava │ 21:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Brussels │ 21:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Bucharest │ 22:13:40 │ +└─────────────────────┴───────────────────┴─────────────┘ +``` + **See Also** - [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax) @@ -1819,6 +1887,72 @@ Result: └────────────────────────────────────┘ ``` +## toUTCTimestamp + +Convert DateTime/DateTime64 type value from other time zone to UTC timezone timestamp + +**Syntax** + +``` sql +toUTCTimestamp(time_val, time_zone) +``` + +**Arguments** + +- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) +- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md) + +**Returned value** + +- DateTime/DateTime64 in text form + +**Example** + +``` sql +SELECT toUTCTimestamp(toDateTime('2023-03-16'), 'Asia/Shanghai'); +``` + +Result: + +``` text +┌─toUTCTimestamp(toDateTime('2023-03-16'),'Asia/Shanghai')┐ +│ 2023-03-15 16:00:00 │ +└─────────────────────────────────────────────────────────┘ +``` + +## fromUTCTimestamp + +Convert DateTime/DateTime64 type value from UTC timezone to other time zone timestamp + +**Syntax** + +``` sql +fromUTCTimestamp(time_val, time_zone) +``` + +**Arguments** + +- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) +- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md) + +**Returned value** + +- DateTime/DateTime64 in text form + +**Example** + +``` sql +SELECT fromUTCTimestamp(toDateTime64('2023-03-16 10:00:00', 3), 'Asia/Shanghai'); +``` + +Result: + +``` text +┌─fromUTCTimestamp(toDateTime64('2023-03-16 10:00:00',3),'Asia/Shanghai')─┐ +│ 2023-03-16 18:00:00.000 │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + ## Related content - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/functions/files.md b/docs/en/sql-reference/functions/files.md index 73d72aa50e5..d62cd1db88d 100644 --- a/docs/en/sql-reference/functions/files.md +++ b/docs/en/sql-reference/functions/files.md @@ -18,7 +18,7 @@ file(path[, default]) **Arguments** -- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. +- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. - `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). **Example** diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index d57b799e94c..bde2a8a9505 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -92,6 +92,50 @@ Result: └───┘ ``` +## isZeroOrNull + +Returns whether the argument is 0 (zero) or [NULL](../../sql-reference/syntax.md#null-literal). + +``` sql +isZeroOrNull(x) +``` + +**Arguments:** + +- `x` — A value of non-compound data type. + +**Returned value** + +- `1` if `x` is 0 (zero) or `NULL`. +- `0` else. + +**Example** + +Table: + +``` text +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 2 │ 0 │ +│ 3 │ 3 │ +└───┴──────┘ +``` + +Query: + +``` sql +SELECT x FROM t_null WHERE isZeroOrNull(y); +``` + +Result: + +``` text +┌─x─┐ +│ 1 │ +│ 2 │ +└───┘ +``` + ## coalesce Returns the leftmost non-`NULL` argument. diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 06097d92480..556fe622c27 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -51,7 +51,7 @@ Calculates the MD5 from a string and returns the resulting set of bytes as Fixed If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead. If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))). -## sipHash64 (#hash_functions-siphash64) +## sipHash64 {#hash_functions-siphash64} Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. @@ -63,9 +63,9 @@ This is a cryptographic hash function. It works at least three times faster than The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: -1. The first and the second hash value are concatenated to an array which is hashed. -2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. -3. This calculation is repeated for all remaining hash values of the original input. +1. The first and the second hash value are concatenated to an array which is hashed. +2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. +3. This calculation is repeated for all remaining hash values of the original input. **Arguments** diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md index bb127a939f3..3e0458d226d 100644 --- a/docs/en/sql-reference/functions/nlp-functions.md +++ b/docs/en/sql-reference/functions/nlp-functions.md @@ -204,7 +204,7 @@ Other possible results: Query: ```sql -SELECT detectLanguageMixed('Je pense que je ne parviendrai jamais à parler français comme un natif. Where there’s a will, there’s a way.'); +SELECT detectLanguage('Je pense que je ne parviendrai jamais à parler français comme un natif. Where there’s a will, there’s a way.'); ``` Result: diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 26dcccfd42e..bfbd26551d3 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2552,3 +2552,187 @@ Result: This function can be used together with [generateRandom](../../sql-reference/table-functions/generate.md) to generate completely random tables. +## structureToCapnProtoSchema {#structure_to_capn_proto_schema} + +Converts ClickHouse table structure to CapnProto schema. + +**Syntax** + +``` sql +structureToCapnProtoSchema(structure) +``` + +**Arguments** + +- `structure` — Table structure in a format `column1_name column1_type, column2_name column2_type, ...`. +- `root_struct_name` — Name for root struct in CapnProto schema. Default value - `Message`; + +**Returned value** + +- CapnProto schema + +Type: [String](../../sql-reference/data-types/string.md). + +**Examples** + +Query: + +``` sql +SELECT structureToCapnProtoSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB +``` + +Result: + +``` text +@0xf96402dd754d0eb7; + +struct Message +{ + column1 @0 : Data; + column2 @1 : UInt32; + column3 @2 : List(Data); +} +``` + +Query: + +``` sql +SELECT structureToCapnProtoSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB +``` + +Result: + +``` text +@0xd1c8320fecad2b7f; + +struct Message +{ + struct Column1 + { + union + { + value @0 : Data; + null @1 : Void; + } + } + column1 @0 : Column1; + struct Column2 + { + element1 @0 : UInt32; + element2 @1 : List(Data); + } + column2 @1 : Column2; + struct Column3 + { + struct Entry + { + key @0 : Data; + value @1 : Data; + } + entries @0 : List(Entry); + } + column3 @2 : Column3; +} +``` + +Query: + +``` sql +SELECT structureToCapnProtoSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB +``` + +Result: + +``` text +@0x96ab2d4ab133c6e1; + +struct Root +{ + column1 @0 : Data; + column2 @1 : UInt32; +} +``` + +## structureToProtobufSchema {#structure_to_protobuf_schema} + +Converts ClickHouse table structure to Protobuf schema. + +**Syntax** + +``` sql +structureToProtobufSchema(structure) +``` + +**Arguments** + +- `structure` — Table structure in a format `column1_name column1_type, column2_name column2_type, ...`. +- `root_message_name` — Name for root message in Protobuf schema. Default value - `Message`; + +**Returned value** + +- Protobuf schema + +Type: [String](../../sql-reference/data-types/string.md). + +**Examples** + +Query: + +``` sql +SELECT structureToProtobufSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB +``` + +Result: + +``` text +syntax = "proto3"; + +message Message +{ + bytes column1 = 1; + uint32 column2 = 2; + repeated bytes column3 = 3; +} +``` + +Query: + +``` sql +SELECT structureToProtobufSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB +``` + +Result: + +``` text +syntax = "proto3"; + +message Message +{ + bytes column1 = 1; + message Column2 + { + uint32 element1 = 1; + repeated bytes element2 = 2; + } + Column2 column2 = 2; + map column3 = 3; +} +``` + +Query: + +``` sql +SELECT structureToProtobufSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB +``` + +Result: + +``` text +syntax = "proto3"; + +message Root +{ + bytes column1 = 1; + uint32 column2 = 2; +} +``` diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 7336e53fc24..c88643ef7cf 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -33,6 +33,13 @@ Returns an array of selected substrings. Empty substrings may be selected when: Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +:::note +The behavior of parameter `max_substrings` changed starting with ClickHouse v22.11. In versions older than that, `max_substrings` > 0 meant that `max_substring`-many splits were performed and that the remainder of the string was returned as the final element of the list. +For example, +- in v22.10: `SELECT splitByChar('=', 'a=b=c=d', 2); -- ['a','b','c=d']` +- in v22.11: `SELECT splitByChar('=', 'a=b=c=d', 2); -- ['a','b']` +::: + **Example** ``` sql @@ -63,7 +70,6 @@ splitByString(separator, s[, max_substrings])) - `s` — The string to split. [String](../../sql-reference/data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. - **Returned value(s)** Returns an array of selected substrings. Empty substrings may be selected when: diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 9890d257e84..47e16b67643 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -575,6 +575,42 @@ Alias: Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +## substringIndex(s, delim, count) + +Returns the substring of `s` before `count` occurrences of the delimiter `delim`, as in Spark or MySQL. + +**Syntax** + +```sql +substringIndex(s, delim, count) +``` +Alias: `SUBSTRING_INDEX` + + +**Arguments** + +- s: The string to extract substring from. [String](../../sql-reference/data-types/string.md). +- delim: The character to split. [String](../../sql-reference/data-types/string.md). +- count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) + +**Example** + +``` sql +SELECT substringIndex('www.clickhouse.com', '.', 2) +``` + +Result: +``` +┌─substringIndex('www.clickhouse.com', '.', 2)─┐ +│ www.clickhouse │ +└──────────────────────────────────────────────┘ +``` + +## substringIndexUTF8(s, delim, count) + +Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + ## appendTrailingCharIfAbsent Appends character `c` to string `s` if `s` is non-empty and does not end with character `c`. @@ -693,6 +729,30 @@ Returns whether string `str` ends with `suffix`. endsWith(str, suffix) ``` +## endsWithUTF8 + +Returns whether string `str` ends with `suffix`, the difference between `endsWithUTF8` and `endsWith` is that `endsWithUTF8` match `str` and `suffix` by UTF-8 characters. + +**Syntax** + +```sql +endsWithUTF8(str, suffix) +``` + +**Example** + +``` sql +SELECT endsWithUTF8('中国', '\xbd'), endsWith('中国', '\xbd') +``` + +Result: + +```result +┌─endsWithUTF8('中国', '½')─┬─endsWith('中国', '½')─┐ +│ 0 │ 1 │ +└──────────────────────────┴──────────────────────┘ +``` + ## startsWith Returns whether string `str` starts with `prefix`. @@ -709,6 +769,25 @@ startsWith(str, prefix) SELECT startsWith('Spider-Man', 'Spi'); ``` +## startsWithUTF8 + +Returns whether string `str` starts with `prefix`, the difference between `startsWithUTF8` and `startsWith` is that `startsWithUTF8` match `str` and `suffix` by UTF-8 characters. + + +**Example** + +``` sql +SELECT startsWithUTF8('中国', '\xe4'), startsWith('中国', '\xe4') +``` + +Result: + +```result +┌─startsWithUTF8('中国', '⥩─┬─startsWith('中国', '⥩─┐ +│ 0 │ 1 │ +└────────────────────────────┴────────────────────────┘ +``` + ## trim Removes the specified characters from the start or end of a string. If not specified otherwise, the function removes whitespace (ASCII-character 32). @@ -1151,6 +1230,42 @@ Result: < Σ > ``` +## decodeHTMLComponent + +Un-escapes substrings with special meaning in HTML. For example: `ℏ` `>` `♦` `♥` `<` etc. + +This function also replaces numeric character references with Unicode characters. Both decimal (like `✓`) and hexadecimal (`✓`) forms are supported. + +**Syntax** + +``` sql +decodeHTMComponent(x) +``` + +**Arguments** + +- `x` — An input string. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- The un-escaped string. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +``` sql +SELECT decodeHTMLComponent(''CH'); +SELECT decodeHMLComponent('I♥ClickHouse'); +``` + +Result: + +```result +'CH' +I♥ClickHouse' +``` + ## extractTextFromHTML This function extracts plain text from HTML or XHTML. diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 7ed2deaeda6..88e4ac03fdb 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -559,6 +559,29 @@ Result: └────────────────────────────┘ ``` +## tupleConcat + +Combines tuples passed as arguments. + +``` sql +tupleConcat(tuples) +``` + +**Arguments** + +- `tuples` – Arbitrary number of arguments of [Tuple](../../sql-reference/data-types/tuple.md) type. + +**Example** + +``` sql +SELECT tupleConcat((1, 2), (3, 4), (true, false)) AS res +``` + +``` text +┌─res──────────────────┐ +│ (1,2,3,4,true,false) │ +└──────────────────────┘ +``` ## Distance functions diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index c2bd525c483..0df72b5818c 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -892,16 +892,29 @@ Query: ``` sql SELECT - now() AS now_local, - toString(now(), 'Asia/Yekaterinburg') AS now_yekat; + now() AS ts, + time_zone, + toString(ts, time_zone) AS str_tz_datetime +FROM system.time_zones +WHERE time_zone LIKE 'Europe%' +LIMIT 10 ``` Result: ```response -┌───────────now_local─┬─now_yekat───────────┐ -│ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │ -└─────────────────────┴─────────────────────┘ +┌──────────────────ts─┬─time_zone─────────┬─str_tz_datetime─────┐ +│ 2023-09-08 19:14:59 │ Europe/Amsterdam │ 2023-09-08 21:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Andorra │ 2023-09-08 21:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Astrakhan │ 2023-09-08 23:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Athens │ 2023-09-08 22:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Belfast │ 2023-09-08 20:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Belgrade │ 2023-09-08 21:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Berlin │ 2023-09-08 21:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Bratislava │ 2023-09-08 21:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Brussels │ 2023-09-08 21:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Bucharest │ 2023-09-08 22:14:59 │ +└─────────────────────┴───────────────────┴─────────────────────┘ ``` Also see the `toUnixTimestamp` function. @@ -945,6 +958,44 @@ Result: └────────────┴───────┘ ``` +## toDecimalString + +Converts a numeric value to String with the number of fractional digits in the output specified by the user. + +**Syntax** + +``` sql +toDecimalString(number, scale) +``` + +**Parameters** + +- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md), +- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md). + * Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal), + * Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60. + +**Returned value** + +- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale). + The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale. + +**Example** + +Query: + +``` sql +SELECT toDecimalString(CAST('64.32', 'Float64'), 5); +``` + +Result: + +```response +┌toDecimalString(CAST('64.32', 'Float64'), 5)─┐ +│ 64.32000 │ +└─────────────────────────────────────────────┘ +``` + ## reinterpretAsUInt(8\|16\|32\|64) ## reinterpretAsInt(8\|16\|32\|64) diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index dae2c7dd1d3..6ceb9b5849e 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -213,7 +213,7 @@ Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC Syntax: ```sql -ALTER TABLE table_name MODIFY column_name REMOVE property; +ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property; ``` **Example** diff --git a/docs/en/sql-reference/statements/alter/comment.md b/docs/en/sql-reference/statements/alter/comment.md index cc49c6abf80..f6fb179d969 100644 --- a/docs/en/sql-reference/statements/alter/comment.md +++ b/docs/en/sql-reference/statements/alter/comment.md @@ -57,3 +57,9 @@ Output of a removed comment: │ │ └─────────┘ ``` + +**Caveats** + +For Replicated tables, the comment can be different on different replicas. Modifying the comment applies to a single replica. + +The feature is available since version 23.9. It does not work in previous ClickHouse versions. diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index 7dadc2be5b2..dca34d16f25 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -36,6 +36,8 @@ These `ALTER` statements modify entities related to role-based access control: [ALTER TABLE ... MODIFY COMMENT](/docs/en/sql-reference/statements/alter/comment.md) statement adds, modifies, or removes comments to the table, regardless if it was set before or not. +[ALTER NAMED COLLECTION](/docs/en/sql-reference/statements/alter/named-collection.md) statement modifies [Named Collections](/docs/en/operations/named-collections.md). + ## Mutations `ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE … DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE … UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts. diff --git a/docs/en/sql-reference/statements/alter/named-collection.md b/docs/en/sql-reference/statements/alter/named-collection.md new file mode 100644 index 00000000000..ac6752127c1 --- /dev/null +++ b/docs/en/sql-reference/statements/alter/named-collection.md @@ -0,0 +1,30 @@ +--- +slug: /en/sql-reference/statements/alter/named-collection +sidebar_label: NAMED COLLECTION +--- + +# ALTER NAMED COLLECTION + +This query intends to modify already existing named collections. + +**Syntax** + +```sql +ALTER NAMED COLLECTION [IF EXISTS] name [ON CLUSTER cluster] +[ SET +key_name1 = 'some value', +key_name2 = 'some value', +key_name3 = 'some value', +... ] | +[ DELETE key_name4, key_name5, ... ] +``` + +**Example** + +```sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; + +ALTER NAMED COLLECTION foobar SET a = '2', c = '3'; + +ALTER NAMED COLLECTION foobar DELETE b; +``` diff --git a/docs/en/sql-reference/statements/alter/projection.md b/docs/en/sql-reference/statements/alter/projection.md index fb438927089..428f4ea9aec 100644 --- a/docs/en/sql-reference/statements/alter/projection.md +++ b/docs/en/sql-reference/statements/alter/projection.md @@ -22,7 +22,7 @@ You can see more technical details about how projections work internally on this ## Example filtering without using primary keys Creating the table: -``` +```sql CREATE TABLE visits_order ( `user_id` UInt64, @@ -34,7 +34,7 @@ ENGINE = MergeTree() PRIMARY KEY user_agent ``` Using `ALTER TABLE`, we could add the Projection to an existing table: -``` +```sql ALTER TABLE visits_order ADD PROJECTION user_name_projection ( SELECT * @@ -44,7 +44,7 @@ ORDER BY user_name ALTER TABLE visits_order MATERIALIZE PROJECTION user_name_projection ``` Inserting the data: -``` +```sql INSERT INTO visits_order SELECT number, 'test', @@ -55,7 +55,7 @@ FROM numbers(1, 100); The Projection will allow us to filter by `user_name` fast even if in the original Table `user_name` was not defined as a `PRIMARY_KEY`. At query time ClickHouse determined that less data will be processed if the projection is used, as the data is ordered by `user_name`. -``` +```sql SELECT * FROM visits_order @@ -64,14 +64,14 @@ LIMIT 2 ``` To verify that a query is using the projection, we could review the `system.query_log` table. On the `projections` field we have the name of the projection used or empty if none has been used: -``` +```sql SELECT query, projections FROM system.query_log WHERE query_id='' ``` ## Example pre-aggregation query Creating the table with the Projection: -``` +```sql CREATE TABLE visits ( `user_id` UInt64, @@ -90,7 +90,7 @@ ENGINE = MergeTree() ORDER BY user_agent ``` Inserting the data: -``` +```sql INSERT INTO visits SELECT number, 'test', @@ -98,7 +98,7 @@ INSERT INTO visits SELECT 'Android' FROM numbers(1, 100); ``` -``` +```sql INSERT INTO visits SELECT number, 'test', @@ -107,7 +107,7 @@ INSERT INTO visits SELECT FROM numbers(100, 500); ``` We will execute a first query using `GROUP BY` using the field `user_agent`, this query will not use the projection defined as the pre-aggregation does not match. -``` +```sql SELECT user_agent, count(DISTINCT user_id) @@ -116,7 +116,7 @@ GROUP BY user_agent ``` To use the projection we could execute queries that select part of, or all of the pre-aggregation and `GROUP BY` fields. -``` +```sql SELECT user_agent FROM visits @@ -132,7 +132,7 @@ GROUP BY user_agent ``` As mentioned before, we could review the `system.query_log` table. On the `projections` field we have the name of the projection used or empty if none has been used: -``` +```sql SELECT query, projections FROM system.query_log WHERE query_id='' ``` diff --git a/docs/en/sql-reference/statements/create/index.md b/docs/en/sql-reference/statements/create/index.md index 14e29d051d7..fa39526a53e 100644 --- a/docs/en/sql-reference/statements/create/index.md +++ b/docs/en/sql-reference/statements/create/index.md @@ -8,13 +8,14 @@ sidebar_label: CREATE Create queries make a new entity of one of the following kinds: -- [DATABASE](../../../sql-reference/statements/create/database.md) -- [TABLE](../../../sql-reference/statements/create/table.md) -- [VIEW](../../../sql-reference/statements/create/view.md) -- [DICTIONARY](../../../sql-reference/statements/create/dictionary.md) -- [FUNCTION](../../../sql-reference/statements/create/function.md) -- [USER](../../../sql-reference/statements/create/user.md) -- [ROLE](../../../sql-reference/statements/create/role.md) -- [ROW POLICY](../../../sql-reference/statements/create/row-policy.md) -- [QUOTA](../../../sql-reference/statements/create/quota.md) -- [SETTINGS PROFILE](../../../sql-reference/statements/create/settings-profile.md) +- [DATABASE](/docs/en/sql-reference/statements/create/database.md) +- [TABLE](/docs/en/sql-reference/statements/create/table.md) +- [VIEW](/docs/en/sql-reference/statements/create/view.md) +- [DICTIONARY](/docs/en/sql-reference/statements/create/dictionary.md) +- [FUNCTION](/docs/en/sql-reference/statements/create/function.md) +- [USER](/docs/en/sql-reference/statements/create/user.md) +- [ROLE](/docs/en/sql-reference/statements/create/role.md) +- [ROW POLICY](/docs/en/sql-reference/statements/create/row-policy.md) +- [QUOTA](/docs/en/sql-reference/statements/create/quota.md) +- [SETTINGS PROFILE](/docs/en/sql-reference/statements/create/settings-profile.md) +- [NAMED COLLECTION](/docs/en/sql-reference/statements/create/named-collection.md) diff --git a/docs/en/sql-reference/statements/create/named-collection.md b/docs/en/sql-reference/statements/create/named-collection.md new file mode 100644 index 00000000000..1fc7b11c554 --- /dev/null +++ b/docs/en/sql-reference/statements/create/named-collection.md @@ -0,0 +1,34 @@ +--- +slug: /en/sql-reference/statements/create/named-collection +sidebar_label: NAMED COLLECTION +--- + +# CREATE NAMED COLLECTION + +Creates a new named collection. + +**Syntax** + +```sql +CREATE NAMED COLLECTION [IF NOT EXISTS] name [ON CLUSTER cluster] AS +key_name1 = 'some value', +key_name2 = 'some value', +key_name3 = 'some value', +... +``` + +**Example** + +```sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; +``` + +**Related statements** + +- [CREATE NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/alter/named-collection) +- [DROP NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/drop#drop-function) + + +**See Also** + +- [Named collections guide](/docs/en/operations/named-collections.md) diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md index c69285171ab..a6ced870c18 100644 --- a/docs/en/sql-reference/statements/create/quota.md +++ b/docs/en/sql-reference/statements/create/quota.md @@ -11,6 +11,7 @@ Syntax: ``` sql CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name] + [IN access_storage_type] [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED] [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year} {MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] | diff --git a/docs/en/sql-reference/statements/create/role.md b/docs/en/sql-reference/statements/create/role.md index 9b14e220e1f..4b6fffe4f60 100644 --- a/docs/en/sql-reference/statements/create/role.md +++ b/docs/en/sql-reference/statements/create/role.md @@ -11,6 +11,7 @@ Syntax: ``` sql CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] + [IN access_storage_type] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...] ``` diff --git a/docs/en/sql-reference/statements/create/row-policy.md b/docs/en/sql-reference/statements/create/row-policy.md index 83bb2e6bb9a..cd7718793bd 100644 --- a/docs/en/sql-reference/statements/create/row-policy.md +++ b/docs/en/sql-reference/statements/create/row-policy.md @@ -16,6 +16,7 @@ Syntax: ``` sql CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1|db1.* [, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2|db2.* ...] + [IN access_storage_type] [FOR SELECT] USING condition [AS {PERMISSIVE | RESTRICTIVE}] [TO {role1 [, role2 ...] | ALL | ALL EXCEPT role1 [, role2 ...]}] diff --git a/docs/en/sql-reference/statements/create/settings-profile.md b/docs/en/sql-reference/statements/create/settings-profile.md index 8e221a4d82f..d8afce9d6de 100644 --- a/docs/en/sql-reference/statements/create/settings-profile.md +++ b/docs/en/sql-reference/statements/create/settings-profile.md @@ -12,6 +12,7 @@ Syntax: ``` sql CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] + [IN access_storage_type] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...] ``` diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 1a72f89fb1f..5e94a5fdc6f 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -391,15 +391,19 @@ DEFLATE_QPL is not available in ClickHouse Cloud. ### Specialized Codecs -These codecs are designed to make compression more effective by using specific features of data. Some of these codecs do not compress data themself. Instead, they prepare the data for a common purpose codec, which compresses it better than without this preparation. +These codecs are designed to make compression more effective by exploiting specific features of the data. Some of these codecs do not compress data themself, they instead preprocess the data such that a second compression stage using a general-purpose codec can achieve a higher data compression rate. #### Delta -`Delta(delta_bytes)` — Compression approach in which raw values are replaced by the difference of two neighboring values, except for the first value that stays unchanged. Up to `delta_bytes` are used for storing delta values, so `delta_bytes` is the maximum size of raw values. Possible `delta_bytes` values: 1, 2, 4, 8. The default value for `delta_bytes` is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. +`Delta(delta_bytes)` — Compression approach in which raw values are replaced by the difference of two neighboring values, except for the first value that stays unchanged. Up to `delta_bytes` are used for storing delta values, so `delta_bytes` is the maximum size of raw values. Possible `delta_bytes` values: 1, 2, 4, 8. The default value for `delta_bytes` is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. Delta is a data preparation codec, i.e. it cannot be used stand-alone. #### DoubleDelta -`DoubleDelta(bytes_size)` — Calculates delta of deltas and writes it in compact binary form. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-bit deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). +`DoubleDelta(bytes_size)` — Calculates delta of deltas and writes it in compact binary form. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-bit deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). DoubleDelta is a data preparation codec, i.e. it cannot be used stand-alone. + +#### GCD + +`GCD()` - - Calculates the greatest common denominator (GCD) of the values in the column, then divides each value by the GCD. Can be used with integer, decimal and date/time columns. The codec is well suited for columns with values that change (increase or decrease) in multiples of the GCD, e.g. 24, 28, 16, 24, 8, 24 (GCD = 4). GCD is a data preparation codec, i.e. it cannot be used stand-alone. #### Gorilla diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index d168be63c36..6f07630298a 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -14,6 +14,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] + [IN access_storage_type] [DEFAULT ROLE role [,...]] [DEFAULT DATABASE database | NONE] [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]] @@ -39,6 +40,32 @@ There are multiple ways of user identification: - `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'` - `IDENTIFIED BY 'qwerty'` +Password complexity requirements can be edited in [config.xml](/docs/en/operations/configuration-files). Below is an example configuration that requires passwords to be at least 12 characters long and contain 1 number. Each password complexity rule requires a regex to match against passwords and a description of the rule. + +```xml + + + + .{12} + be at least 12 characters long + + + \p{N} + contain at least 1 numeric character + + + +``` + +:::note +In ClickHouse Cloud, by default, passwords must meet the following complexity requirements: +- Be at least 12 characters long +- Contain at least 1 numeric character +- Contain at least 1 uppercase character +- Contain at least 1 lowercase character +- Contain at least 1 special character +::: + ## Examples 1. The following username is `name1` and does not require a password - which obviously doesn't provide much security: diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index b6208c2fd52..eed76dbcd5c 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -49,7 +49,7 @@ Deletes a user. Syntax: ``` sql -DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP ROLE @@ -59,7 +59,7 @@ Deletes a role. The deleted role is revoked from all the entities where it was a Syntax: ``` sql -DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP ROW POLICY @@ -69,7 +69,7 @@ Deletes a row policy. Deleted row policy is revoked from all the entities where Syntax: ``` sql -DROP [ROW] POLICY [IF EXISTS] name [,...] ON [database.]table [,...] [ON CLUSTER cluster_name] +DROP [ROW] POLICY [IF EXISTS] name [,...] ON [database.]table [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP QUOTA @@ -79,7 +79,7 @@ Deletes a quota. The deleted quota is revoked from all the entities where it was Syntax: ``` sql -DROP QUOTA [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP QUOTA [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP SETTINGS PROFILE @@ -89,7 +89,7 @@ Deletes a settings profile. The deleted settings profile is revoked from all the Syntax: ``` sql -DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP VIEW @@ -119,3 +119,20 @@ DROP FUNCTION [IF EXISTS] function_name [on CLUSTER cluster] CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b; DROP FUNCTION linear_equation; ``` + +## DROP NAMED COLLECTION + +Deletes a named collection. + +**Syntax** + +``` sql +DROP NAMED COLLECTION [IF EXISTS] name [on CLUSTER cluster] +``` + +**Example** + +``` sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; +DROP NAMED COLLECTION foobar; +``` diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index d6e30827f9b..e0cc98c2351 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -11,7 +11,7 @@ Inserts data into a table. **Syntax** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). @@ -107,7 +107,7 @@ If table has [constraints](../../sql-reference/statements/create/table.md#constr **Syntax** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ... ``` Columns are mapped according to their position in the SELECT clause. However, their names in the SELECT expression and the table for INSERT may differ. If necessary, type casting is performed. @@ -126,7 +126,7 @@ To insert a default value instead of `NULL` into a column with not nullable data **Syntax** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name ``` Use the syntax above to insert data from a file, or files, stored on the **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause. diff --git a/docs/en/sql-reference/statements/move.md b/docs/en/sql-reference/statements/move.md new file mode 100644 index 00000000000..fac738ff711 --- /dev/null +++ b/docs/en/sql-reference/statements/move.md @@ -0,0 +1,32 @@ +--- +slug: /en/sql-reference/statements/move +sidebar_position: 54 +sidebar_label: MOVE +--- + +# MOVE access entity statement + +This statement allows to move an access entity from one access storage to another. + +Syntax: + +```sql +MOVE {USER, ROLE, QUOTA, SETTINGS PROFILE, ROW POLICY} name1 [, name2, ...] TO access_storage_type +``` + +Currently, there are five access storages in ClickHouse: + - `local_directory` + - `memory` + - `replicated` + - `users_xml` (ro) + - `ldap` (ro) + +Examples: + +```sql +MOVE USER test TO local_directory +``` + +```sql +MOVE ROLE test TO memory +``` diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 45d336c42f2..49843eaff9a 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -94,8 +94,10 @@ Result: │ 1 │ 1 │ 3 │ 3 │ └─────────────┴───────────────┴───────┴───────────────┘ ``` +All following examples are executed against this state with 5 rows. -When columns for deduplication are not specified, all of them are taken into account. Row is removed only if all values in all columns are equal to corresponding values in previous row: +#### `DEDUPLICATE` +When columns for deduplication are not specified, all of them are taken into account. The row is removed only if all values in all columns are equal to corresponding values in the previous row: ``` sql OPTIMIZE TABLE example FINAL DEDUPLICATE; @@ -116,7 +118,7 @@ Result: │ 1 │ 1 │ 3 │ 3 │ └─────────────┴───────────────┴───────┴───────────────┘ ``` - +#### `DEDUPLICATE BY *` When columns are specified implicitly, the table is deduplicated by all columns that are not `ALIAS` or `MATERIALIZED`. Considering the table above, these are `primary_key`, `secondary_key`, `value`, and `partition_key` columns: ```sql OPTIMIZE TABLE example FINAL DEDUPLICATE BY *; @@ -137,7 +139,7 @@ Result: │ 1 │ 1 │ 3 │ 3 │ └─────────────┴───────────────┴───────┴───────────────┘ ``` - +#### `DEDUPLICATE BY * EXCEPT` Deduplicate by all columns that are not `ALIAS` or `MATERIALIZED` and explicitly not `value`: `primary_key`, `secondary_key`, and `partition_key` columns. ``` sql @@ -158,7 +160,7 @@ Result: │ 1 │ 1 │ 2 │ 3 │ └─────────────┴───────────────┴───────┴───────────────┘ ``` - +#### `DEDUPLICATE BY ` Deduplicate explicitly by `primary_key`, `secondary_key`, and `partition_key` columns: ```sql OPTIMIZE TABLE example FINAL DEDUPLICATE BY primary_key, secondary_key, partition_key; @@ -178,8 +180,8 @@ Result: │ 1 │ 1 │ 2 │ 3 │ └─────────────┴───────────────┴───────┴───────────────┘ ``` - -Deduplicate by any column matching a regex: `primary_key`, `secondary_key`, and `partition_key` columns: +#### `DEDUPLICATE BY COLUMNS()` +Deduplicate by all columns matching a regex: `primary_key`, `secondary_key`, and `partition_key` columns: ```sql OPTIMIZE TABLE example FINAL DEDUPLICATE BY COLUMNS('.*_key'); ``` diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 2863c5c0116..86a4e9639f5 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -60,9 +60,9 @@ Specifics of each optional clause are covered in separate sections, which are li If you want to include all columns in the result, use the asterisk (`*`) symbol. For example, `SELECT * FROM ...`. -### COLUMNS expression +### Dynamic column selection -To match some columns in the result with a [re2](https://en.wikipedia.org/wiki/RE2_(software)) regular expression, you can use the `COLUMNS` expression. +Dynamic column selection (also known as a COLUMNS expression) allows you to match some columns in a result with a [re2](https://en.wikipedia.org/wiki/RE2_(software)) regular expression. ``` sql COLUMNS('regexp') diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 7971b3ba275..38922e964b8 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -163,6 +163,61 @@ Result: │ 4 │ -4 │ 4 │ └───┴────┴─────┘ ``` + +## NULL values in JOIN keys + +The NULL is not equal to any value, including itself. It means that if a JOIN key has a NULL value in one table, it won't match a NULL value in the other table. + +**Example** + +Table `A`: + +``` +┌───id─┬─name────┐ +│ 1 │ Alice │ +│ 2 │ Bob │ +│ ᴺᵁᴸᴸ │ Charlie │ +└──────┴─────────┘ +``` + +Table `B`: + +``` +┌───id─┬─score─┐ +│ 1 │ 90 │ +│ 3 │ 85 │ +│ ᴺᵁᴸᴸ │ 88 │ +└──────┴───────┘ +``` + +```sql +SELECT A.name, B.score FROM A LEFT JOIN B ON A.id = B.id +``` + +``` +┌─name────┬─score─┐ +│ Alice │ 90 │ +│ Bob │ 0 │ +│ Charlie │ 0 │ +└─────────┴───────┘ +``` + +Notice that the row with `Charlie` from table `A` and the row with score 88 from table `B` are not in the result because of the NULL value in the JOIN key. + +In case you want to match NULL values, use the `isNotDistinctFrom` function to compare the JOIN keys. + +```sql +SELECT A.name, B.score FROM A LEFT JOIN B ON isNotDistinctFrom(A.id, B.id) +``` + +``` +┌─name────┬─score─┐ +│ Alice │ 90 │ +│ Bob │ 0 │ +│ Charlie │ 88 │ +└─────────┴───────┘ +``` + ## ASOF JOIN Usage `ASOF JOIN` is useful when you need to join records that have no exact match. diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 1c399d2072b..e94718394de 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -638,3 +638,16 @@ Outputs the content of the [system.table_engines](../../operations/system-tables **See Also** - [system.table_engines](../../operations/system-tables/table_engines.md) table + +## SHOW FUNCTIONS + +``` sql +SHOW FUNCTIONS [LIKE | ILIKE ''] +``` + +Outputs the content of the [system.functions](../../operations/system-tables/functions.md) table. + +If either `LIKE` or `ILIKE` clause is specified, the query returns a list of system functions whose names match the provided ``. + +**See Also** +- [system.functions](../../operations/system-tables/functions.md) table diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 65a35f03fbe..443db7c5ac2 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -66,13 +66,13 @@ RELOAD FUNCTION [ON CLUSTER cluster_name] function_name ## DROP DNS CACHE -Resets ClickHouse’s internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries). +Clears ClickHouse’s internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries). For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_update_period parameters. ## DROP MARK CACHE -Resets the mark cache. +Clears the mark cache. ## DROP REPLICA @@ -106,22 +106,18 @@ Similar to `SYSTEM DROP REPLICA`, but removes the `Replicated` database replica ## DROP UNCOMPRESSED CACHE -Reset the uncompressed data cache. +Clears the uncompressed data cache. The uncompressed data cache is enabled/disabled with the query/user/profile-level setting [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache). Its size can be configured using the server-level setting [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size). ## DROP COMPILED EXPRESSION CACHE -Reset the compiled expression cache. +Clears the compiled expression cache. The compiled expression cache is enabled/disabled with the query/user/profile-level setting [compile_expressions](../../operations/settings/settings.md#compile-expressions). ## DROP QUERY CACHE -Resets the [query cache](../../operations/query-cache.md). - -```sql -SYSTEM DROP QUERY CACHE [ON CLUSTER cluster_name] -``` +Clears the [query cache](../../operations/query-cache.md). ## FLUSH LOGS @@ -314,6 +310,22 @@ Provides possibility to start background fetch tasks from replication queues whi SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] ``` +### STOP PULLING REPLICATION LOG + +Stops loading new entries from replication log to replication queue in a `ReplicatedMergeTree` table. + +``` sql +SYSTEM STOP PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] +``` + +### START PULLING REPLICATION LOG + +Cancels `SYSTEM STOP PULLING REPLICATION LOG`. + +``` sql +SYSTEM START PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] +``` + ### SYNC REPLICA Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster, but no more than `receive_timeout` seconds. @@ -414,3 +426,29 @@ Will do sync syscall. ```sql SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name] ``` + + +### SYSTEM STOP LISTEN + +Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol. + +However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect. + +```sql +SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +``` + +- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped. +- If `QUERIES ALL [EXCEPT .. [,..]]` modifier is specified, all protocols are stopped, unless specified with `EXCEPT` clause. +- If `QUERIES DEFAULT [EXCEPT .. [,..]]` modifier is specified, all default protocols are stopped, unless specified with `EXCEPT` clause. +- If `QUERIES CUSTOM [EXCEPT .. [,..]]` modifier is specified, all custom protocols are stopped, unless specified with `EXCEPT` clause. + +### SYSTEM START LISTEN + +Allows new connections to be established on the specified protocols. + +However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect. + +```sql +SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +``` diff --git a/docs/en/sql-reference/statements/truncate.md b/docs/en/sql-reference/statements/truncate.md index 457031a2157..4b46210aa09 100644 --- a/docs/en/sql-reference/statements/truncate.md +++ b/docs/en/sql-reference/statements/truncate.md @@ -4,8 +4,9 @@ sidebar_position: 52 sidebar_label: TRUNCATE --- -# TRUNCATE Statement +# TRUNCATE Statements +## TRUNCATE TABLE ``` sql TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` @@ -21,3 +22,10 @@ You can specify how long (in seconds) to wait for inactive replicas to execute ` :::note If the `alter_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. ::: + +## TRUNCATE DATABASE +``` sql +TRUNCATE DATBASE [IF EXISTS] [db.]name [ON CLUSTER cluster] +``` + +Removes all tables from a database but keeps the database itself. When the clause `IF EXISTS` is omitted, the query returns an error if the database does not exist. diff --git a/docs/en/sql-reference/table-functions/azureBlobStorage.md b/docs/en/sql-reference/table-functions/azureBlobStorage.md index 7bb5d892c47..59c92e1327e 100644 --- a/docs/en/sql-reference/table-functions/azureBlobStorage.md +++ b/docs/en/sql-reference/table-functions/azureBlobStorage.md @@ -19,7 +19,7 @@ azureBlobStorage(- connection_string|storage_account_url, container_name, blobpa - `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key) - `container_name` - Container name -- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. - `account_name` - if storage_account_url is used, then account name can be specified here - `account_key` - if storage_account_url is used, then account key can be specified here - `format` — The [format](../../interfaces/formats.md#formats) of the file. diff --git a/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md b/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md new file mode 100644 index 00000000000..20dfd35d5db --- /dev/null +++ b/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md @@ -0,0 +1,47 @@ +--- +slug: /en/sql-reference/table-functions/azureBlobStorageCluster +sidebar_position: 55 +sidebar_label: azureBlobStorageCluster +title: "azureBlobStorageCluster Table Function" +--- + +Allows processing files from [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. +This table function is similar to the [s3Cluster function](../../sql-reference/table-functions/s3Cluster.md). + +**Syntax** + +``` sql +azureBlobStorageCluster(cluster_name, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) +``` + +**Arguments** + +- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key) +- `container_name` - Container name +- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `account_name` - if storage_account_url is used, then account name can be specified here +- `account_key` - if storage_account_url is used, then account key can be specified here +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `compression` — Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. (same as setting to `auto`). +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. + +**Returned value** + +A table with the specified structure for reading or writing data in the specified file. + +**Examples** + +Select the count for the file `test_cluster_*.csv`, using all the nodes in the `cluster_simple` cluster: + +``` sql +SELECT count(*) from azureBlobStorageCluster( + 'cluster_simple', 'http://azurite1:10000/devstoreaccount1', 'test_container', 'test_cluster_count.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto', 'key UInt64') +``` + +**See Also** + +- [AzureBlobStorage engine](../../engines/table-engines/integrations/azureBlobStorage.md) +- [azureBlobStorage table function](../../sql-reference/table-functions/azureBlobStorage.md) diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md index 7362c433e0e..a083c6b89a6 100644 --- a/docs/en/sql-reference/table-functions/cluster.md +++ b/docs/en/sql-reference/table-functions/cluster.md @@ -16,14 +16,14 @@ All available clusters are listed in the [system.clusters](../../operations/syst **Syntax** ``` sql -cluster('cluster_name', db.table[, sharding_key]) -cluster('cluster_name', db, table[, sharding_key]) -clusterAllReplicas('cluster_name', db.table[, sharding_key]) -clusterAllReplicas('cluster_name', db, table[, sharding_key]) +cluster(['cluster_name', db.table, sharding_key]) +cluster(['cluster_name', db, table, sharding_key]) +clusterAllReplicas(['cluster_name', db.table, sharding_key]) +clusterAllReplicas(['cluster_name', db, table, sharding_key]) ``` **Arguments** -- `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers, set `default` if not specified. - `db.table` or `db`, `table` - Name of a database and a table. - `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard. diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 00917414e0c..4db9494502e 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -13,16 +13,18 @@ The `file` function can be used in `SELECT` and `INSERT` queries to read from or **Syntax** ``` sql -file(path [,format] [,structure] [,compression]) +file([path_to_archive ::] path [,format] [,structure] [,compression]) ``` **Parameters** - `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings. +- `path_to_archive` - The relative path to zip/tar/7z archive. Path to archive support the same globs as `path`. - `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`. + **Returned value** A table with the specified structure for reading or writing data in the specified file. @@ -128,6 +130,11 @@ file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32'); └─────────┴─────────┴─────────┘ ``` +Getting data from table in table.csv, located in archive1.zip or/and archive2.zip +``` sql +SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv'); +``` + ## Globs in Path Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix). diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md index 01b4e4f6a69..48c2381696e 100644 --- a/docs/en/sql-reference/table-functions/gcs.md +++ b/docs/en/sql-reference/table-functions/gcs.md @@ -22,7 +22,7 @@ The GCS Table Function integrates with Google Cloud Storage by using the GCS XML **Arguments** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. :::note GCS The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: diff --git a/docs/en/sql-reference/table-functions/hdfsCluster.md b/docs/en/sql-reference/table-functions/hdfsCluster.md index 832be46d05f..75100eeb4f3 100644 --- a/docs/en/sql-reference/table-functions/hdfsCluster.md +++ b/docs/en/sql-reference/table-functions/hdfsCluster.md @@ -17,7 +17,7 @@ hdfsCluster(cluster_name, URI, format, structure) **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/en/sql-reference/table-functions/iceberg.md b/docs/en/sql-reference/table-functions/iceberg.md index 30db0ef00aa..fa86b436a5e 100644 --- a/docs/en/sql-reference/table-functions/iceberg.md +++ b/docs/en/sql-reference/table-functions/iceberg.md @@ -21,7 +21,7 @@ iceberg(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure]) - `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. By default `Parquet` is used. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -Engine parameters can be specified using [Named Collections](../../operations/named-collections.md) +Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md). **Returned value** diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index fba3ea55653..59ed4bf1985 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -13,10 +13,10 @@ Both functions can be used in `SELECT` and `INSERT` queries. ## Syntax ``` sql -remote('addresses_expr', db, table[, 'user'[, 'password'], sharding_key]) -remote('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) -remoteSecure('addresses_expr', db, table[, 'user'[, 'password'], sharding_key]) -remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) +remote('addresses_expr', [db, table, 'user'[, 'password'], sharding_key]) +remote('addresses_expr', [db.table, 'user'[, 'password'], sharding_key]) +remoteSecure('addresses_expr', [db, table, 'user'[, 'password'], sharding_key]) +remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key]) ``` ## Parameters @@ -29,6 +29,8 @@ remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) The port is required for an IPv6 address. + If only specify this parameter, `db` and `table` will use `system.one` by default. + Type: [String](../../sql-reference/data-types/string.md). - `db` — Database name. Type: [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 55c825b8b9b..07addafcf58 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -23,7 +23,7 @@ For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_ **Arguments** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). :::note GCS The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index d5bdc85f9f8..675aef54d34 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -16,7 +16,7 @@ s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [, **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 2ab43f1b895..859de86f019 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -56,6 +56,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. +- [enable_url_encoding](/docs/en/operations/settings/settings.md#enable_url_encoding) - allows to enable/disable decoding/encoding path in uri. Enabled by default. **See Also** diff --git a/docs/en/sql-reference/transactions.md b/docs/en/sql-reference/transactions.md index 68fbfe0b22a..cb89a091d68 100644 --- a/docs/en/sql-reference/transactions.md +++ b/docs/en/sql-reference/transactions.md @@ -3,23 +3,46 @@ slug: /en/guides/developer/transactional --- # Transactional (ACID) support -INSERT into one partition* in one table* of MergeTree* family up to max_insert_block_size rows* is transactional (ACID): -- Atomic: INSERT is succeeded or rejected as a whole: if confirmation is sent to the client, all rows INSERTed; if error is sent to the client, no rows INSERTed. +## Case 1: INSERT into one partition, of one table, of the MergeTree* family + +This is transactional (ACID) if the inserted rows are packed and inserted as a single block (see Notes): +- Atomic: an INSERT succeeds or is rejected as a whole: if a confirmation is sent to the client, then all rows were inserted; if an error is sent to the client, then no rows were inserted. - Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted. -- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as if before INSERT or after successful INSERT; no partial state is seen; -- Durable: successful INSERT is written to the filesystem before answering to the client, on single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting). -* If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own; -* INSERT into multiple tables with one statement is possible if materialized views are involved; -* INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional; -* another example: insert into Buffer tables is neither atomic nor isolated or consistent or durable; -* atomicity is ensured even if `async_insert` is enabled, but it can be turned off by the wait_for_async_insert setting; -* max_insert_block_size is 1 000 000 by default and can be adjusted as needed; -* if client did not receive the answer from the server, the client does not know if transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties; -* ClickHouse is using MVCC with snapshot isolation internally; -* all ACID properties are valid even in case of server kill / crash; -* either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in typical setup; -* "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency) -* this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. +- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen +- Durable: a successful INSERT is written to the filesystem before answering to the client, on a single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting). +- INSERT into multiple tables with one statement is possible if materialized views are involved (the INSERT from the client is to a table which has associate materialized views). + +## Case 2: INSERT into multiple partitions, of one table, of the MergeTree* family + +Same as Case 1 above, with this detail: +- If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own + + +## Case 3: INSERT into one distributed table of the MergeTree* family + +Same as Case 1 above, with this detail: +- INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional + +## Case 4: Using a Buffer table + +- insert into Buffer tables is neither atomic nor isolated nor consistent nor durable + +## Case 5: Using async_insert + +Same as Case 1 above, with this detail: +- atomicity is ensured even if `async_insert` is enabled and `wait_for_async_insert` is set to 1 (the default), but if `wait_for_async_insert` is set to 0, then atomicity is not ensured. + +## Notes +- rows inserted from the client in some data format are packed into a single block when: + - the insert format is row-based (like CSV, TSV, Values, JSONEachRow, etc) and the data contains less then `max_insert_block_size` rows (~1 000 000 by default) or less then `min_chunk_bytes_for_parallel_parsing` bytes (10 MB by default) in case of parallel parsing is used (enabled by default) + - the insert format is column-based (like Native, Parquet, ORC, etc) and the data contains only one block of data +- the size of the inserted block in general may depend on many settings (for example: `max_block_size`, `max_insert_block_size`, `min_insert_block_size_rows`, `min_insert_block_size_bytes`, `preferred_block_size_bytes`, etc) +- if the client did not receive an answer from the server, the client does not know if the transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties +- ClickHouse is using MVCC with snapshot isolation internally +- all ACID properties are valid even in the case of server kill/crash +- either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in the typical setup +- "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency) +- this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. (see the next section on Transactions, Commit, and Rollback) ## Transactions, Commit, and Rollback diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md index a4b99987d03..1a6fbade06f 100644 --- a/docs/ru/development/architecture.md +++ b/docs/ru/development/architecture.md @@ -156,9 +156,9 @@ ClickHouse имеет сильную типизацию, поэтому нет Мы поддерживаем полную обратную и прямую совместимость для TCP интерфейса: старые клиенты могут общаться с новыми серверами, а новые клиенты могут общаться со старыми серверами. Но мы не хотим поддерживать его вечно и прекращаем поддержку старых версий примерно через год. - :::note "Note" - Для всех сторонних приложений мы рекомендуем использовать HTTP интерфейс, потому что он прост и удобен в использовании. TCP интерфейс тесно связан с внутренними структурами данных: он использует внутренний формат для передачи блоков данных и использует специальное кадрирование для сжатых данных. Мы не выпустили библиотеку C для этого протокола, потому что потребовала бы линковки большей части кодовой базы ClickHouse, что непрактично. - ::: +:::note Примечание +Для всех сторонних приложений мы рекомендуем использовать HTTP интерфейс, потому что он прост и удобен в использовании. TCP интерфейс тесно связан с внутренними структурами данных: он использует внутренний формат для передачи блоков данных и использует специальное кадрирование для сжатых данных. Мы не выпустили библиотеку C для этого протокола, потому что потребовала бы линковки большей части кодовой базы ClickHouse, что непрактично. +::: ## Выполнение распределенных запросов (Distributed Query Execution) {#distributed-query-execution} Сервера в кластере в основном независимы. Вы можете создать `Распределенную` (`Distributed`) таблицу на одном или всех серверах в кластере. Такая таблица сама по себе не хранит данные - она только предоставляет возможность "просмотра" всех локальных таблиц на нескольких узлах кластера. При выполнении `SELECT` распределенная таблица переписывает запрос, выбирает удаленные узлы в соответствии с настройками балансировки нагрузки и отправляет им запрос. Распределенная таблица просит удаленные сервера обработать запрос до той стадии, когда промежуточные результаты с разных серверов могут быть объединены. Затем он получает промежуточные результаты и объединяет их. Распределенная таблица пытается возложить как можно больше работы на удаленные серверы и сократить объем промежуточных данных, передаваемых по сети. @@ -197,7 +197,8 @@ ClickHouse имеет сильную типизацию, поэтому нет Кроме того, каждая реплика сохраняет свое состояние в `ZooKeeper` в виде набора частей и его контрольных сумм. Когда состояние в локальной файловой системе расходится с эталонным состоянием в `ZooKeeper`, реплика восстанавливает свою согласованность путем загрузки отсутствующих и поврежденных частей из других реплик. Когда в локальной файловой системе есть неожиданные или испорченные данные, ClickHouse не удаляет их, а перемещает в отдельный каталог и забывает об этом. - :::note "Note" - Кластер ClickHouse состоит из независимых шардов, а каждый шард состоит из реплик. Кластер **не является эластичным** (not elastic), поэтому после добавления нового шарда данные не будут автоматически распределены между ними. Вместо этого нужно изменить настройки, чтобы выровнять нагрузку на кластер. Эта реализация дает вам больший контроль, и вполне приемлема для относительно небольших кластеров, таких как десятки узлов. Но для кластеров с сотнями узлов, которые мы используем в эксплуатации, такой подход становится существенным недостатком. Движки таблиц, которые охватывают весь кластер с динамически реплицируемыми областями, которые могут быть автоматически разделены и сбалансированы между кластерами, еще предстоит реализовать. - ::: +:::note Примечание +Кластер ClickHouse состоит из независимых шардов, а каждый шард состоит из реплик. Кластер **не является эластичным** (not elastic), поэтому после добавления нового шарда данные не будут автоматически распределены между ними. Вместо этого нужно изменить настройки, чтобы выровнять нагрузку на кластер. Эта реализация дает вам больший контроль, и вполне приемлема для относительно небольших кластеров, таких как десятки узлов. Но для кластеров с сотнями узлов, которые мы используем в эксплуатации, такой подход становится существенным недостатком. Движки таблиц, которые охватывают весь кластер с динамически реплицируемыми областями, которые могут быть автоматически разделены и сбалансированы между кластерами, еще предстоит реализовать. +::: + {## [Original article](https://clickhouse.com/docs/ru/development/architecture/) ##} diff --git a/docs/ru/development/build-osx.md b/docs/ru/development/build-osx.md index 6b4e612b13f..033f42c1fd0 100644 --- a/docs/ru/development/build-osx.md +++ b/docs/ru/development/build-osx.md @@ -6,9 +6,9 @@ sidebar_label: Сборка на Mac OS X # Как собрать ClickHouse на Mac OS X {#how-to-build-clickhouse-on-mac-os-x} -:::info "Вам не нужно собирать ClickHouse самостоятельно" - Вы можете установить предварительно собранный ClickHouse, как описано в [Быстром старте](https://clickhouse.com/#quick-start). - Следуйте инструкциям по установке для `macOS (Intel)` или `macOS (Apple Silicon)`. +:::info Вам не нужно собирать ClickHouse самостоятельно +Вы можете установить предварительно собранный ClickHouse, как описано в [Быстром старте](https://clickhouse.com/#quick-start). +Следуйте инструкциям по установке для `macOS (Intel)` или `macOS (Apple Silicon)`. ::: Сборка должна запускаться с x86_64 (Intel) на macOS версии 10.15 (Catalina) и выше в последней версии компилятора Xcode's native AppleClang, Homebrew's vanilla Clang или в GCC-компиляторах. @@ -90,8 +90,8 @@ $ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/ Если будете запускать `clickhouse-server`, убедитесь, что увеличили системную переменную `maxfiles`. -:::info "Note" - Вам понадобится команда `sudo`. +:::info Примечание +Вам понадобится команда `sudo`. ::: 1. Создайте файл `/Library/LaunchDaemons/limit.maxfiles.plist` и поместите в него следующее: diff --git a/docs/ru/engines/database-engines/materialized-mysql.md b/docs/ru/engines/database-engines/materialized-mysql.md index df56b7a0bd6..f019bc7e3fc 100644 --- a/docs/ru/engines/database-engines/materialized-mysql.md +++ b/docs/ru/engines/database-engines/materialized-mysql.md @@ -49,9 +49,9 @@ CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', - `default_authentication_plugin = mysql_native_password` — `MaterializedMySQL` может авторизоваться только с помощью этого метода. - `gtid_mode = on` — ведение журнала на основе GTID является обязательным для обеспечения правильной репликации. - :::note "Внимание" - При включении `gtid_mode` вы также должны указать `enforce_gtid_consistency = on`. - ::: +:::note Внимание +При включении `gtid_mode` вы также должны указать `enforce_gtid_consistency = on`. +::: ## Виртуальные столбцы {#virtual-columns} При работе с движком баз данных `MaterializedMySQL` используются таблицы семейства [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) с виртуальными столбцами `_sign` и `_version`. diff --git a/docs/ru/engines/database-engines/materialized-postgresql.md b/docs/ru/engines/database-engines/materialized-postgresql.md index f2268e9ba0f..6dcb5a3475f 100644 --- a/docs/ru/engines/database-engines/materialized-postgresql.md +++ b/docs/ru/engines/database-engines/materialized-postgresql.md @@ -96,9 +96,10 @@ FROM pg_class WHERE oid = 'postgres_table'::regclass; ``` -:::danger "Предупреждение" - Репликация **TOAST**-значений не поддерживается. Для типа данных будет использоваться значение по умолчанию. - +:::danger Предупреждение +Репликация **TOAST**-значений не поддерживается. Для типа данных будет использоваться значение по умолчанию. +::: + ## Пример использования {#example-of-use} ``` sql diff --git a/docs/ru/engines/database-engines/mysql.md b/docs/ru/engines/database-engines/mysql.md index fb5c9d16ee4..3e3355d8348 100644 --- a/docs/ru/engines/database-engines/mysql.md +++ b/docs/ru/engines/database-engines/mysql.md @@ -60,8 +60,9 @@ ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') - `version` - `max_allowed_packet` -:::danger "Предупреждение" - В настоящее время эти переменные реализованы только как "заглушки" и не содержат актуальных данных. +:::danger Предупреждение +В настоящее время эти переменные реализованы только как "заглушки" и не содержат актуальных данных. +::: Пример: diff --git a/docs/ru/engines/database-engines/replicated.md b/docs/ru/engines/database-engines/replicated.md index 05e38e774fd..fcf2b9f4019 100644 --- a/docs/ru/engines/database-engines/replicated.md +++ b/docs/ru/engines/database-engines/replicated.md @@ -12,7 +12,7 @@ sidebar_label: Replicated ## Создание базы данных {#creating-a-database} ``` sql - CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...] +CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...] ``` **Параметры движка** @@ -21,9 +21,9 @@ sidebar_label: Replicated - `shard_name` — Имя шарда. Реплики базы данных группируются в шарды по имени. - `replica_name` — Имя реплики. Имена реплик должны быть разными для всех реплик одного и того же шарда. - :::note "Предупреждение" - Для таблиц [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) если аргументы не заданы, то используются аргументы по умолчанию: `/clickhouse/tables/{uuid}/{shard}` и `{replica}`. Они могут быть изменены в серверных настройках: [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) и [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Макрос `{uuid}` раскрывается в `UUID` таблицы, `{shard}` и `{replica}` — в значения из конфига сервера. В будущем появится возможность использовать значения `shard_name` и `replica_name` аргументов движка базы данных `Replicated`. - ::: +:::note Предупреждение +Для таблиц [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) если аргументы не заданы, то используются аргументы по умолчанию: `/clickhouse/tables/{uuid}/{shard}` и `{replica}`. Они могут быть изменены в серверных настройках: [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) и [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Макрос `{uuid}` раскрывается в `UUID` таблицы, `{shard}` и `{replica}` — в значения из конфига сервера. В будущем появится возможность использовать значения `shard_name` и `replica_name` аргументов движка базы данных `Replicated`. +::: ## Особенности и рекомендации {#specifics-and-recommendations} DDL-запросы с базой данных `Replicated` работают похожим образом на [ON CLUSTER](../../sql-reference/distributed-ddl.md) запросы, но с небольшими отличиями. diff --git a/docs/ru/engines/table-engines/integrations/hdfs.md b/docs/ru/engines/table-engines/integrations/hdfs.md index aed90d53f2a..72087b56652 100644 --- a/docs/ru/engines/table-engines/integrations/hdfs.md +++ b/docs/ru/engines/table-engines/integrations/hdfs.md @@ -97,8 +97,9 @@ CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = HDFS( CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV') ``` -:::danger "Warning" - Если список файлов содержит числовые интервалы с ведущими нулями, используйте конструкцию с фигурными скобочками для каждой цифры или используйте `?`. +:::danger Предупреждение +Если список файлов содержит числовые интервалы с ведущими нулями, используйте конструкцию с фигурными скобочками для каждой цифры или используйте `?`. +::: **Example** diff --git a/docs/ru/engines/table-engines/integrations/kafka.md b/docs/ru/engines/table-engines/integrations/kafka.md index 832486c038a..18f6c7cd1f9 100644 --- a/docs/ru/engines/table-engines/integrations/kafka.md +++ b/docs/ru/engines/table-engines/integrations/kafka.md @@ -102,7 +102,7 @@ SETTINGS Устаревший способ создания таблицы -:::note "Attention" +:::note Важно Не используйте этот метод в новых проектах. По возможности переключите старые проекты на метод, описанный выше. ::: diff --git a/docs/ru/engines/table-engines/integrations/materialized-postgresql.md b/docs/ru/engines/table-engines/integrations/materialized-postgresql.md index bc10066f6a6..c1f7fde1ab4 100644 --- a/docs/ru/engines/table-engines/integrations/materialized-postgresql.md +++ b/docs/ru/engines/table-engines/integrations/materialized-postgresql.md @@ -52,5 +52,6 @@ PRIMARY KEY key; SELECT key, value, _version FROM postgresql_db.postgresql_replica; ``` -:::danger "Предупреждение" - Репликация **TOAST**-значений не поддерживается. Для типа данных будет использоваться значение по умолчанию. +:::danger Предупреждение +Репликация **TOAST**-значений не поддерживается. Для типа данных будет использоваться значение по умолчанию. +::: diff --git a/docs/ru/engines/table-engines/integrations/postgresql.md b/docs/ru/engines/table-engines/integrations/postgresql.md index d4fc9470a0e..4f3654baaf2 100644 --- a/docs/ru/engines/table-engines/integrations/postgresql.md +++ b/docs/ru/engines/table-engines/integrations/postgresql.md @@ -48,8 +48,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] PostgreSQL массивы конвертируются в массивы ClickHouse. -:::info "Внимание" - Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустимы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы. +:::info Внимание +Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустимы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы. ::: Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например: diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 24735a35382..720aa589122 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -68,8 +68,8 @@ SELECT * FROM s3_engine_table LIMIT 2; Конструкции с `{}` аналогичны функции [remote](../../../sql-reference/table-functions/remote.md). -:::danger "Примечание" - Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. +:::danger Примечание +Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. **Пример подстановки 1** diff --git a/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md index 596926ea47d..f60735eac9f 100644 --- a/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -39,7 +39,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Устаревший способ создания таблицы -:::note "Attention" +:::note Важно Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. ::: diff --git a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md index e3b4238a200..cfafddf0bc2 100644 --- a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -43,7 +43,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Устаревший способ создания таблицы -:::note "Attention" +:::note Важно Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. ::: diff --git a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md index 7c360333837..46597c94370 100644 --- a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -39,8 +39,8 @@ ORDER BY (CounterID, StartDate, intHash32(UserID)); Каждая партиция состоит из отдельных фрагментов или так называемых *кусков данных*. Каждый кусок отсортирован по первичному ключу. При вставке данных в таблицу каждая отдельная запись сохраняется в виде отдельного куска. Через некоторое время после вставки (обычно до 10 минут), ClickHouse выполняет в фоновом режиме слияние данных — в результате куски для одной и той же партиции будут объединены в более крупный кусок. -:::info "Info" - Не рекомендуется делать слишком гранулированное партиционирование – то есть задавать партиции по столбцу, в котором будет слишком большой разброс значений (речь идет о порядке более тысячи партиций). Это приведет к скоплению большого числа файлов и файловых дескрипторов в системе, что может значительно снизить производительность запросов `SELECT`. +:::info Примечание +Не рекомендуется делать слишком гранулированное партиционирование – то есть задавать партиции по столбцу, в котором будет слишком большой разброс значений (речь идет о порядке более тысячи партиций). Это приведет к скоплению большого числа файлов и файловых дескрипторов в системе, что может значительно снизить производительность запросов `SELECT`. ::: Чтобы получить набор кусков и партиций таблицы, можно воспользоваться системной таблицей [system.parts](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md#system_tables-parts). В качестве примера рассмотрим таблицу `visits`, в которой задано партиционирование по месяцам. Выполним `SELECT` для таблицы `system.parts`: @@ -80,8 +80,8 @@ WHERE table = 'visits' - `2` – уровень куска (глубина дерева слияний, которыми этот кусок образован). - `11` - версия мутации (если парт мутировал) -:::info "Info" - Названия кусков для таблиц старого типа образуются следующим образом: `20190117_20190123_2_2_0` (минимальная дата _ максимальная дата _ номер минимального блока _ номер максимального блока _ уровень). +:::info Примечание +Названия кусков для таблиц старого типа образуются следующим образом: `20190117_20190123_2_2_0` (минимальная дата _ максимальная дата _ номер минимального блока _ номер максимального блока _ уровень). ::: Как видно из примера выше, таблица содержит несколько отдельных кусков для одной и той же партиции (например, куски `201901_1_3_1` и `201901_1_9_2` принадлежат партиции `201901`). Это означает, что эти куски еще не были объединены – в файловой системе они хранятся отдельно. После того как будет выполнено автоматическое слияние данных (выполняется примерно спустя 10 минут после вставки данных), исходные куски будут объединены в один более крупный кусок и помечены как неактивные. diff --git a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md index 324a3fd1633..ff77a29767d 100644 --- a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md @@ -55,7 +55,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Устаревший способ создания таблицы -:::note "Attention" +:::note Важно Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. ::: @@ -123,12 +123,13 @@ default ... ``` -:::danger "Внимание" - Правила должны быть строго упорядочены: +:::danger Внимание +Правила должны быть строго упорядочены: - 1. Правила без `function` или `retention`. - 1. Правила одновремено содержащие `function` и `retention`. - 1. Правило `default`. +1. Правила без `function` или `retention`. +1. Правила одновремено содержащие `function` и `retention`. +1. Правило `default`. +::: При обработке строки ClickHouse проверяет правила в разделе `pattern`. Каждый `pattern` (включая `default`) может содержать параметр агрегации `function`, параметр `retention`, или оба параметра одновременно. Если имя метрики соответствует шаблону `regexp`, то применяются правила `pattern`, в противном случае правило `default`. @@ -257,5 +258,6 @@ default ``` -:::danger "Внимание" - Прореживание данных производится во время слияний. Обычно для старых партиций слияния не запускаются, поэтому для прореживания надо инициировать незапланированное слияние используя [optimize](../../../sql-reference/statements/optimize.md). Или использовать дополнительные инструменты, например [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer). +:::danger Внимание +Прореживание данных производится во время слияний. Обычно для старых партиций слияния не запускаются, поэтому для прореживания надо инициировать незапланированное слияние используя [optimize](../../../sql-reference/statements/optimize.md). Или использовать дополнительные инструменты, например [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer). +::: diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 812b0c0a2d4..1b0dab2b3a5 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -20,9 +20,9 @@ sidebar_label: MergeTree - **Поддерживает сэмплирование данных.** При необходимости можно задать способ сэмплирования данных в таблице. - :::info - Движок [Merge](../special/merge.md#merge) не относится к семейству `*MergeTree`. - ::: +:::info Примечание +Движок [Merge](../special/merge.md#merge) не относится к семейству `*MergeTree`. +::: ## Создание таблицы {#table_engine-mergetree-creating-a-table} ``` sql @@ -115,7 +115,7 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa Устаревший способ создания таблицы -:::note "Attention" +:::note Важно Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ, описанный выше. ::: diff --git a/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md index 86626d92863..cd8b55b0259 100644 --- a/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md @@ -29,7 +29,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Описание параметров запроса смотрите в [описании запроса](../../../engines/table-engines/mergetree-family/replacingmergetree.md). -:::warning "Внимание" +:::warning Внимание Уникальность строк определяется `ORDER BY` секцией таблицы, а не `PRIMARY KEY`. ::: @@ -95,7 +95,7 @@ SELECT * FROM mySecondReplacingMT FINAL; Устаревший способ создания таблицы -:::warning "Внимание" +:::warning Внимание Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ, описанный выше. ::: diff --git a/docs/ru/engines/table-engines/mergetree-family/replication.md b/docs/ru/engines/table-engines/mergetree-family/replication.md index 2b4d89dbe0a..dd94288882c 100644 --- a/docs/ru/engines/table-engines/mergetree-family/replication.md +++ b/docs/ru/engines/table-engines/mergetree-family/replication.md @@ -32,9 +32,9 @@ ClickHouse хранит метаинформацию о репликах в [Apa Для использовании репликации, установите параметры в секции [zookeeper](../../../operations/server-configuration-parameters/settings.md#server-settings_zookeeper) конфигурации сервера. - :::note "Внимание" - Не пренебрегайте настройками безопасности. ClickHouse поддерживает [ACL схему](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) `digest` подсистемы безопасности ZooKeeper. - ::: +:::note Внимание +Не пренебрегайте настройками безопасности. ClickHouse поддерживает [ACL схему](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) `digest` подсистемы безопасности ZooKeeper. +::: Пример указания адресов кластера ZooKeeper: ``` xml diff --git a/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md index abbcfcc5be2..a88de9d1e26 100644 --- a/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md @@ -42,7 +42,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Устаревший способ создания таблицы -:::note "Attention" +:::note Важно Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. ::: diff --git a/docs/ru/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index 1cc5752f04b..cc1bf09e564 100644 --- a/docs/ru/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -54,10 +54,10 @@ VersionedCollapsingMergeTree(sign, version) Устаревший способ создания таблицы - :::danger "Внимание" - Не используйте этот метод в новых проектах. По возможности переключите старые проекты на метод, описанный выше. - ::: - +:::danger Внимание +Не используйте этот метод в новых проектах. По возможности переключите старые проекты на метод, описанный выше. +::: + ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md index 574d9273088..1fd8483e54d 100644 --- a/docs/ru/engines/table-engines/special/buffer.md +++ b/docs/ru/engines/table-engines/special/buffer.md @@ -9,7 +9,7 @@ sidebar_label: Buffer Буферизует записываемые данные в оперативке, периодически сбрасывая их в другую таблицу. При чтении, производится чтение данных одновременно из буфера и из другой таблицы. ``` sql -Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) +Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]]) ``` Параметры движка: @@ -49,9 +49,9 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10 Если у одного из столбцов таблицы Buffer и подчинённой таблицы не совпадает тип, то в лог сервера будет записано сообщение об ошибке и буфер будет очищен. То же самое происходит, если подчинённая таблица не существует в момент сброса буфера. - :::note "Внимание" - В релизах до 26 октября 2021 года выполнение ALTER на таблице Buffer ломает структуру блоков и вызывает ошибку (см. [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) и [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), поэтому удаление буфера и его пересоздание — единственный вариант миграции для данного движка. Перед выполнением ALTER на таблице Buffer убедитесь, что в вашей версии эта ошибка устранена. - ::: +:::note Внимание +В релизах до 26 октября 2021 года выполнение ALTER на таблице Buffer ломает структуру блоков и вызывает ошибку (см. [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) и [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), поэтому удаление буфера и его пересоздание — единственный вариант миграции для данного движка. Перед выполнением ALTER на таблице Buffer убедитесь, что в вашей версии эта ошибка устранена. +::: При нештатном перезапуске сервера, данные, находящиеся в буфере, будут потеряны. Для таблиц типа Buffer неправильно работают FINAL и SAMPLE. Эти условия пробрасываются в таблицу назначения, но не используются для обработки данных в буфере. В связи с этим, рекомендуется использовать таблицу типа Buffer только для записи, а читать из таблицы назначения. diff --git a/docs/ru/engines/table-engines/special/distributed.md b/docs/ru/engines/table-engines/special/distributed.md index 341acf92803..0c445791704 100644 --- a/docs/ru/engines/table-engines/special/distributed.md +++ b/docs/ru/engines/table-engines/special/distributed.md @@ -141,9 +141,10 @@ logs - имя кластера в конфигурационном файле с - `_shard_num` — содержит значение `shard_num` из таблицы `system.clusters`. Тип: [UInt32](../../../sql-reference/data-types/int-uint.md). - :::note "Примечание" - Так как табличные функции [remote](../../../sql-reference/table-functions/remote.md) и [cluster](../../../sql-reference/table-functions/cluster.md) создают временную таблицу на движке `Distributed`, то в ней также доступен столбец `_shard_num`. - ::: +:::note Примечание +Так как табличные функции [remote](../../../sql-reference/table-functions/remote.md) и [cluster](../../../sql-reference/table-functions/cluster.md) создают временную таблицу на движке `Distributed`, то в ней также доступен столбец `_shard_num`. +::: + **См. также** - общее описание [виртуальных столбцов](../../../engines/table-engines/index.md#table_engines-virtual_columns) diff --git a/docs/ru/engines/table-engines/special/file.md b/docs/ru/engines/table-engines/special/file.md index fb65c93c48c..2c5d998fd0b 100644 --- a/docs/ru/engines/table-engines/special/file.md +++ b/docs/ru/engines/table-engines/special/file.md @@ -28,8 +28,9 @@ File(Format) Можно вручную создать в хранилище каталог таблицы, поместить туда файл, затем на сервере ClickHouse добавить ([ATTACH](../../../sql-reference/statements/attach.md#attach)) информацию о таблице, соответствующей имени каталога и прочитать из файла данные. -:::danger "Warning" - Будьте аккуратны с этой функциональностью, поскольку сервер ClickHouse не отслеживает внешние изменения данных. Если в файл будет производиться запись одновременно со стороны сервера ClickHouse и с внешней стороны, то результат непредсказуем. +:::danger Предупреждение +Будьте аккуратны с этой функциональностью, поскольку сервер ClickHouse не отслеживает внешние изменения данных. Если в файл будет производиться запись одновременно со стороны сервера ClickHouse и с внешней стороны, то результат непредсказуем. +::: **Пример:** diff --git a/docs/ru/faq/general/dbms-naming.md b/docs/ru/faq/general/dbms-naming.md index 875d24dea2a..32f681aaef9 100644 --- a/docs/ru/faq/general/dbms-naming.md +++ b/docs/ru/faq/general/dbms-naming.md @@ -13,6 +13,6 @@ sidebar_position: 10 - единственно правильный способ написания — Click**H**ouse — с заглавной буквой H; - если нужно сокращеннное название, используйте **CH**. Исторически сложилось, что в Китае также популярно сокращение CK — в основном, из-за того, что это название использовалось в одном из первых обсуждений ClickHouse на китайском языке. -:::info "Забавный факт" - Спустя годы после того, как ClickHouse получил свое название, принцип комбинирования двух слов, каждое из которых имеет подходящий смысл, был признан лучшим способом назвать базу данных в [исследовании Andy Pavlo](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html), Associate Professor of Databases в Carnegie Mellon University. ClickHouse разделил награду "за лучшее название СУБД" с Postgres. +:::info Забавный факт +Спустя годы после того, как ClickHouse получил свое название, принцип комбинирования двух слов, каждое из которых имеет подходящий смысл, был признан лучшим способом назвать базу данных в [исследовании Andy Pavlo](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html), Associate Professor of Databases в Carnegie Mellon University. ClickHouse разделил награду "за лучшее название СУБД" с Postgres. ::: \ No newline at end of file diff --git a/docs/ru/faq/general/index.md b/docs/ru/faq/general/index.md index 6aaccbe5826..b4a2f18cb02 100644 --- a/docs/ru/faq/general/index.md +++ b/docs/ru/faq/general/index.md @@ -19,6 +19,6 @@ sidebar_label: Общие вопросы - [Что такое столбцовая база данных?](columnar-database.md) - [Почему бы не использовать системы типа MapReduce?](mapreduce.md) -:::info "Если вы не нашли то, что искали:" - Загляните в другие категории F.A.Q. или поищите в остальных разделах документации, ориентируясь по оглавлению слева. +:::info Если вы не нашли то, что искали: +Загляните в другие категории F.A.Q. или поищите в остальных разделах документации, ориентируясь по оглавлению слева. ::: diff --git a/docs/ru/faq/general/why-clickhouse-is-so-fast.md b/docs/ru/faq/general/why-clickhouse-is-so-fast.md index 2652e93c35e..8bb8fb78e05 100644 --- a/docs/ru/faq/general/why-clickhouse-is-so-fast.md +++ b/docs/ru/faq/general/why-clickhouse-is-so-fast.md @@ -49,16 +49,14 @@ sidebar_position: 8 Ну и последнее, но тем не менее важное условие: команда ClickHouse постоянно отслеживает в интернете сообщения пользователей о найденных ими удачных реализациях, алгоритмах или структурах данных, анализирует и пробует новые идеи. Иногда в этом потоке сообщений попадаются действительно ценные предложения. -:::info "Советы о том, как создать собственную высокопроизводительную систему" - - - - При проектировании системы обращайте внимание на мельчайшие детали реализации. - - Учитывайте возможности аппаратного обеспечения. - - Выбирайте структуры и представления данных исходя из требований конкретной задачи. - - Для особых случаев разрабатывайте специализированные решения. - - Пробуйте новые алгоритмы, о которых вы вчера прочитали в интернете. Ищите возможности для совершенствования. - - Выбирайте алгоритмы динамически, в процессе выполнения, на основе статистики. - - Ориентируйтесь на показатели, собранные при работе с реальными данными. - - Проверяйте производительность в процессе CI. - - Измеряйте и анализируйте всё, что только возможно. +:::info Советы о том, как создать собственную высокопроизводительную систему +- При проектировании системы обращайте внимание на мельчайшие детали реализации. +- Учитывайте возможности аппаратного обеспечения. +- Выбирайте структуры и представления данных исходя из требований конкретной задачи. +- Для особых случаев разрабатывайте специализированные решения. +- Пробуйте новые алгоритмы, о которых вы вчера прочитали в интернете. Ищите возможности для совершенствования. +- Выбирайте алгоритмы динамически, в процессе выполнения, на основе статистики. +- Ориентируйтесь на показатели, собранные при работе с реальными данными. +- Проверяйте производительность в процессе CI. +- Измеряйте и анализируйте всё, что только возможно. ::: diff --git a/docs/ru/faq/integration/index.md b/docs/ru/faq/integration/index.md index aa0579b9375..168da82de53 100644 --- a/docs/ru/faq/integration/index.md +++ b/docs/ru/faq/integration/index.md @@ -14,8 +14,8 @@ sidebar_label: Интеграция - [Как импортировать JSON в ClickHouse?](json-import.md) - [Что делать, если у меня проблема с кодировками при использовании Oracle через ODBC?](oracle-odbc.md) -:::info "Если вы не нашли то, что искали" - Загляните в другие подразделы F.A.Q. или поищите в остальных разделах документации, ориентируйтесь по оглавлению слева. +:::info Если вы не нашли то, что искали +Загляните в другие подразделы F.A.Q. или поищите в остальных разделах документации, ориентируйтесь по оглавлению слева. ::: [Original article](https://clickhouse.com/docs/ru/faq/integration/) diff --git a/docs/ru/faq/integration/json-import.md b/docs/ru/faq/integration/json-import.md index a3c89aed429..230d6ab74c0 100644 --- a/docs/ru/faq/integration/json-import.md +++ b/docs/ru/faq/integration/json-import.md @@ -29,6 +29,6 @@ $ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSO - `input_format_skip_unknown_fields` позволяет импортировать JSON, даже если он содержит дополнительные поля, которых нет в таблице (отбрасывая лишние поля). - `input_format_import_nested_json` позволяет импортировать вложенные JSON-объекты в столбцы типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md). - :::note "Примечание" - В HTTP-интерфейсе настройки передаются через параметры `GET` запроса, в `CLI` interface — как дополнительные аргументы командной строки, начинающиеся с `--`. - ::: +:::note Примечание +В HTTP-интерфейсе настройки передаются через параметры `GET` запроса, в `CLI` interface — как дополнительные аргументы командной строки, начинающиеся с `--`. +::: diff --git a/docs/ru/faq/operations/delete-old-data.md b/docs/ru/faq/operations/delete-old-data.md index aad33ce0333..031d2881ae2 100644 --- a/docs/ru/faq/operations/delete-old-data.md +++ b/docs/ru/faq/operations/delete-old-data.md @@ -14,9 +14,10 @@ ClickHouse позволяет автоматически удалять данн Ключевое преимущество такого подхода в том, что не нужно использовать внешнюю систему, чтобы запустить процесс — когда заданы условия TTL, удаление данных выполняется автоматически в фоновом режиме. - :::note - TTL можно использовать не только для перемещения в [/dev/null](https://en.wikipedia.org/wiki/Null_device), но еще и между дисками, например, с SSD на HDD. - ::: +:::note Примечание +TTL можно использовать не только для перемещения в [/dev/null](https://en.wikipedia.org/wiki/Null_device), но еще и между дисками, например, с SSD на HDD. +::: + [Подробнее о конфигурировании TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). ## ALTER DELETE {#alter-delete} diff --git a/docs/ru/faq/operations/index.md b/docs/ru/faq/operations/index.md index c1135716f39..2dc788908fb 100644 --- a/docs/ru/faq/operations/index.md +++ b/docs/ru/faq/operations/index.md @@ -13,8 +13,8 @@ sidebar_label: Операции - [Какую версию ClickHouse использовать?](production.md) - [Возможно ли удалить старые записи из таблицы ClickHouse?](delete-old-data.md) -:::info "Если вы не нашли то, что искали" - Загляните в другие подразделы F.A.Q. или поищите в остальных разделах документации, ориентируйтесь по оглавлению слева. +:::info Если вы не нашли то, что искали +Загляните в другие подразделы F.A.Q. или поищите в остальных разделах документации, ориентируйтесь по оглавлению слева. ::: [Original article](https://clickhouse.com/docs/en/faq/operations/) diff --git a/docs/ru/faq/operations/production.md b/docs/ru/faq/operations/production.md index 5bbc0741d8c..70186a476b4 100644 --- a/docs/ru/faq/operations/production.md +++ b/docs/ru/faq/operations/production.md @@ -60,5 +60,6 @@ sidebar_position: 10 Часто компании, которые изначально ориентировались на релизы `lts`, позднее переходят на `stable`, поскольку хотят быстрее получать доступ к новым возможностям. -:::danger "Важно" - Мы всегда стремимся поддерживать совместимость релизов, но иногда это правило нарушается, и какие-то отдельные возможности в новых релизах становятся недоступны. Перед обновлением ClickHouse обязательно изучите [журнал изменений](../../whats-new/changelog/index.mdx), чтобы убедиться, что в нем нет объявлений о нарушении обратной совместимости. +:::danger Важно +Мы всегда стремимся поддерживать совместимость релизов, но иногда это правило нарушается, и какие-то отдельные возможности в новых релизах становятся недоступны. Перед обновлением ClickHouse обязательно изучите [журнал изменений](../../whats-new/changelog/index.mdx), чтобы убедиться, что в нем нет объявлений о нарушении обратной совместимости. +::: diff --git a/docs/ru/getting-started/example-datasets/nyc-taxi.md b/docs/ru/getting-started/example-datasets/nyc-taxi.md index 7fe6a998a63..12d0c18c3a1 100644 --- a/docs/ru/getting-started/example-datasets/nyc-taxi.md +++ b/docs/ru/getting-started/example-datasets/nyc-taxi.md @@ -291,9 +291,8 @@ $ sudo service clickhouse-server restart $ clickhouse-client --query "SELECT COUNT(*) FROM datasets.trips_mergetree" ``` -:::info "Info" - Если вы собираетесь выполнять запросы, приведенные ниже, то к имени таблицы -нужно добавить имя базы, `datasets.trips_mergetree`. +:::info Примечание +Если вы собираетесь выполнять запросы, приведенные ниже, то к имени таблицы нужно добавить имя базы, `datasets.trips_mergetree`. ::: ## Результаты на одном сервере {#rezultaty-na-odnom-servere} diff --git a/docs/ru/getting-started/example-datasets/ontime.md b/docs/ru/getting-started/example-datasets/ontime.md index a3cd116217e..4e3c5a1e7e5 100644 --- a/docs/ru/getting-started/example-datasets/ontime.md +++ b/docs/ru/getting-started/example-datasets/ontime.md @@ -155,9 +155,8 @@ $ sudo service clickhouse-server restart $ clickhouse-client --query "SELECT COUNT(*) FROM datasets.ontime" ``` -:::info "Info" - Если вы собираетесь выполнять запросы, приведенные ниже, то к имени таблицы -нужно добавить имя базы, `datasets.ontime`. +:::info Примечание +Если вы собираетесь выполнять запросы, приведенные ниже, то к имени таблицы нужно добавить имя базы, `datasets.ontime`. ::: ## Запросы: {#zaprosy} diff --git a/docs/ru/getting-started/example-datasets/star-schema.md b/docs/ru/getting-started/example-datasets/star-schema.md index a37545317e3..a444a0d9ec2 100644 --- a/docs/ru/getting-started/example-datasets/star-schema.md +++ b/docs/ru/getting-started/example-datasets/star-schema.md @@ -16,9 +16,9 @@ $ make Генерация данных: -:::warning "Внимание" - -s 100 – dbgen генерирует 600 миллионов строк (67 ГБ) --s 1000 – dbgen генерирует 6 миллиардов строк (занимает много времени) +:::warning Внимание +`-s 100` – dbgen генерирует 600 миллионов строк (67 ГБ) +`-s 1000` – dbgen генерирует 6 миллиардов строк (занимает много времени) ::: ``` bash diff --git a/docs/ru/getting-started/example-datasets/wikistat.md b/docs/ru/getting-started/example-datasets/wikistat.md deleted file mode 100644 index 479616d667b..00000000000 --- a/docs/ru/getting-started/example-datasets/wikistat.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -slug: /ru/getting-started/example-datasets/wikistat -sidebar_position: 17 -sidebar_label: WikiStat ---- - -# WikiStat {#wikistat} - -См: http://dumps.wikimedia.org/other/pagecounts-raw/ - -Создание таблицы: - -``` sql -CREATE TABLE wikistat -( - date Date, - time DateTime, - project String, - subproject String, - path String, - hits UInt64, - size UInt64 -) ENGINE = MergeTree(date, (path, time), 8192); -``` - -Загрузка данных: - -``` bash -$ for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt -$ cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done -$ ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done -``` diff --git a/docs/ru/getting-started/example-datasets/wikistat.md b/docs/ru/getting-started/example-datasets/wikistat.md new file mode 120000 index 00000000000..2d429d00984 --- /dev/null +++ b/docs/ru/getting-started/example-datasets/wikistat.md @@ -0,0 +1 @@ +../../../en/getting-started/example-datasets/wikistat.md \ No newline at end of file diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index f041db43d0f..63bd7ee8eb6 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -69,10 +69,10 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password. - `clickhouse-client` — Создает символические ссылки для `clickhouse-client` и других клиентских инструментов и устанавливает конфигурационные файлы `clickhouse-client`. - `clickhouse-common-static-dbg` — Устанавливает исполняемые файлы ClickHouse собранные с отладочной информацией. - :::note "Внимание" - Если вам нужно установить ClickHouse определенной версии, вы должны установить все пакеты одной версии: - `sudo apt-get install clickhouse-server=21.8.5.7 clickhouse-client=21.8.5.7 clickhouse-common-static=21.8.5.7` - ::: +:::note Внимание +Если вам нужно установить ClickHouse определенной версии, вы должны установить все пакеты одной версии: +`sudo apt-get install clickhouse-server=21.8.5.7 clickhouse-client=21.8.5.7 clickhouse-common-static=21.8.5.7` +::: ### Из RPM пакетов {#from-rpm-packages} Команда ClickHouse в Яндексе рекомендует использовать официальные предкомпилированные `rpm` пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm. diff --git a/docs/ru/getting-started/tutorial.md b/docs/ru/getting-started/tutorial.md index 60a7463f70f..6ebdcda8bef 100644 --- a/docs/ru/getting-started/tutorial.md +++ b/docs/ru/getting-started/tutorial.md @@ -91,7 +91,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv ## Import Sample Dataset {#import-sample-dataset} -Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one. +Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use some anonymized metric data. There are [multiple ways to import the dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one. ### Download and Extract Table Data {#download-and-extract-table-data} @@ -116,7 +116,7 @@ Syntax for creating tables is way more complicated compared to databases (see [r 2. Table schema, i.e. list of columns and their [data types](../sql-reference/data-types/index.md). 3. [Table engine](../engines/table-engines/index.md) and its settings, which determines all the details on how queries to this table will be physically executed. -Yandex.Metrica is a web analytics service, and sample dataset doesn’t cover its full functionality, so there are only two tables to create: +There are only two tables to create: - `hits` is a table with each action done by all users on all websites covered by the service. - `visits` is a table that contains pre-built sessions instead of individual actions. @@ -523,7 +523,7 @@ SELECT sumIf(Sign, has(Goals.ID, 1105530)) AS goal_visits, (100. * goal_visits) / visits AS goal_percent FROM tutorial.visits_v1 -WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru') +WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) ``` ## Cluster Deployment {#cluster-deployment} @@ -544,19 +544,19 @@ Example config for a cluster with three shards, one replica each: - example-perftest01j.yandex.ru + example-perftest01j.clickhouse.com 9000 - example-perftest02j.yandex.ru + example-perftest02j.clickhouse.com 9000 - example-perftest03j.yandex.ru + example-perftest03j.clickhouse.com 9000 @@ -585,8 +585,9 @@ Let’s run [INSERT SELECT](../sql-reference/statements/insert-into.md) into the INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; ``` -:::danger "Notice" - This approach is not suitable for the sharding of large tables. There’s a separate tool [clickhouse-copier](../operations/utilities/clickhouse-copier.md) that can re-shard arbitrary large tables. +:::danger Notice +This approach is not suitable for the sharding of large tables. There’s a separate tool [clickhouse-copier](../operations/utilities/clickhouse-copier.md) that can re-shard arbitrary large tables. +::: As you could expect, computationally heavy queries run N times faster if they utilize 3 servers instead of one. @@ -602,15 +603,15 @@ Example config for a cluster of one shard containing three replicas: - example-perftest01j.yandex.ru + example-perftest01j.clickhouse.com 9000 - example-perftest02j.yandex.ru + example-perftest02j.clickhouse.com 9000 - example-perftest03j.yandex.ru + example-perftest03j.clickhouse.com 9000 @@ -620,23 +621,24 @@ Example config for a cluster of one shard containing three replicas: To enable native replication [ZooKeeper](http://zookeeper.apache.org/) is required. ClickHouse takes care of data consistency on all replicas and runs restore procedure after failure automatically. It’s recommended to deploy the ZooKeeper cluster on separate servers (where no other processes including ClickHouse are running). - :::note "Note" - ZooKeeper is not a strict requirement: in some simple cases, you can duplicate the data by writing it into all the replicas from your application code. This approach is **not** recommended, in this case, ClickHouse won’t be able to guarantee data consistency on all replicas. Thus it becomes the responsibility of your application. - ::: +:::note Note +ZooKeeper is not a strict requirement: in some simple cases, you can duplicate the data by writing it into all the replicas from your application code. This approach is **not** recommended, in this case, ClickHouse won’t be able to guarantee data consistency on all replicas. Thus it becomes the responsibility of your application. +::: + ZooKeeper locations are specified in the configuration file: ``` xml - zoo01.yandex.ru + zoo01.clickhouse.com 2181 - zoo02.yandex.ru + zoo02.clickhouse.com 2181 - zoo03.yandex.ru + zoo03.clickhouse.com 2181 diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index aa6ae3629e8..47ab6474fc0 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -128,7 +128,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe - `--port` — порт для подключения, по умолчанию — 9000. Обратите внимание: для HTTP-интерфейса и нативного интерфейса используются разные порты. - `--user, -u` — имя пользователя, по умолчанию — ‘default’. - `--password` — пароль, по умолчанию — пустая строка. -- `--query, -q` — запрос для выполнения, при использовании в неинтерактивном режиме. +- `--query, -q` — запрос для выполнения, при использовании в неинтерактивном режиме. Допускается указание `--query` несколько раз (`--query "SELECT 1;" --query "SELECT 2;"...`). - `--queries-file` - путь к файлу с запросами для выполнения. Необходимо указать только одну из опций: `query` или `queries-file`. - `--database, -d` — выбрать текущую БД. Без указания значение берется из настроек сервера (по умолчанию — БД ‘default’). - `--multiline, -m` — если указано — разрешить многострочные запросы, не отправлять запрос по нажатию Enter. diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index e232b63f049..b4794b02743 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -772,9 +772,9 @@ CREATE TABLE IF NOT EXISTS example_table - Если `input_format_defaults_for_omitted_fields = 0`, то значение по умолчанию для `x` и `a` равняется `0` (поскольку это значение по умолчанию для типа данных `UInt32`.) - Если `input_format_defaults_for_omitted_fields = 1`, то значение по умолчанию для `x` равно `0`, а значение по умолчанию `a` равно `x * 2`. - :::note "Предупреждение" - При добавлении данных с помощью `input_format_defaults_for_omitted_fields = 1`, ClickHouse потребляет больше вычислительных ресурсов по сравнению с `input_format_defaults_for_omitted_fields = 0`. - ::: +:::note Предупреждение +При добавлении данных с помощью `input_format_defaults_for_omitted_fields = 1`, ClickHouse потребляет больше вычислительных ресурсов по сравнению с `input_format_defaults_for_omitted_fields = 0`. +::: ### Выборка данных {#vyborka-dannykh} Рассмотрим в качестве примера таблицу `UserActivity`: @@ -795,9 +795,10 @@ CREATE TABLE IF NOT EXISTS example_table В отличие от формата [JSON](#json), для `JSONEachRow` ClickHouse не заменяет невалидные UTF-8 последовательности. Значения экранируются так же, как и для формата `JSON`. - :::note "Примечание" - В строках может выводиться произвольный набор байт. Используйте формат `JSONEachRow`, если вы уверены, что данные в таблице могут быть представлены в формате JSON без потери информации. - ::: +:::note Примечание +В строках может выводиться произвольный набор байт. Используйте формат `JSONEachRow`, если вы уверены, что данные в таблице могут быть представлены в формате JSON без потери информации. +::: + ### Использование вложенных структур {#jsoneachrow-nested} Если у вас есть таблица со столбцами типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md), то в неё можно вставить данные из JSON-документа с такой же структурой. Функциональность включается настройкой [input_format_import_nested_json](../operations/settings/settings.md#settings-input_format_import_nested_json). @@ -1305,9 +1306,9 @@ SET format_avro_schema_registry_url = 'http://schema-registry'; SELECT * FROM topic1_stream; ``` - :::note "Внимание" - `format_avro_schema_registry_url` необходимо настроить в `users.xml`, чтобы сохранить значение после перезапуска. Также можно использовать настройку `format_avro_schema_registry_url` табличного движка `Kafka`. - ::: +:::note Внимание +`format_avro_schema_registry_url` необходимо настроить в `users.xml`, чтобы сохранить значение после перезапуска. Также можно использовать настройку `format_avro_schema_registry_url` табличного движка `Kafka`. +::: ## Parquet {#data-format-parquet} [Apache Parquet](https://parquet.apache.org/) — формат поколоночного хранения данных, который распространён в экосистеме Hadoop. Для формата `Parquet` ClickHouse поддерживает операции чтения и записи. @@ -1353,8 +1354,6 @@ ClickHouse поддерживает настраиваемую точность $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested). - Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Parquet, используйте команду следующего вида: ``` bash @@ -1413,8 +1412,6 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested). - ### Вывод данных {#selecting-data-arrow} Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Arrow, используйте команду следующего вида: @@ -1471,8 +1468,6 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested). - ### Вывод данных {#selecting-data-2} Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата ORC, используйте команду следующего вида: diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index b8c5ee77f0c..ae3f100a6e8 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -50,7 +50,7 @@ Connection: Close Content-Type: text/tab-separated-values; charset=UTF-8 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f -X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} 1 ``` @@ -173,9 +173,10 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @- Для отправки сжатого запроса `POST` добавьте заголовок `Content-Encoding: compression_method`. Чтобы ClickHouse сжимал ответ, разрешите сжатие настройкой [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) и добавьте заголовок `Accept-Encoding: compression_method`. Уровень сжатия данных для всех методов сжатия можно задать с помощью настройки [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level). - :::note "Примечание" - Некоторые HTTP-клиенты могут по умолчанию распаковывать данные (`gzip` и `deflate`) с сервера в фоновом режиме и вы можете получить распакованные данные, даже если правильно используете настройки сжатия. - ::: +:::note Примечание +Некоторые HTTP-клиенты могут по умолчанию распаковывать данные (`gzip` и `deflate`) с сервера в фоновом режиме и вы можете получить распакованные данные, даже если правильно используете настройки сжатия. +::: + **Примеры** ``` bash @@ -266,9 +267,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 Прогресс выполнения запроса можно отслеживать с помощью заголовков ответа `X-ClickHouse-Progress`. Для этого включите [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Пример последовательности заголовков: ``` text -X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"} +X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"} +X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"} +X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"} ``` Возможные поля заголовка: @@ -425,9 +426,9 @@ $ curl -v 'http://localhost:8123/predefined_query' В следующем примере определяются настройки [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_final_threads`, а затем запрашивается системная таблица, чтобы проверить, были ли эти параметры успешно установлены. - :::note "Предупреждение" - Чтобы сохранить стандартные `handlers` такие как `query`, `play`, `ping`, используйте правило ``. - ::: +:::note Предупреждение +Чтобы сохранить стандартные `handlers` такие как `query`, `play`, `ping`, используйте правило ``. +::: Пример: ``` xml @@ -455,9 +456,9 @@ $ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost: max_final_threads 2 ``` - :::note "Предупреждение" - В одном `predefined_query_handler` поддерживается только один запрос типа `INSERT`. - ::: +:::note Предупреждение +В одном `predefined_query_handler` поддерживается только один запрос типа `INSERT`. +::: ### dynamic_query_handler {#dynamic_query_handler} В `dynamic_query_handler`, запрос пишется в виде параметров HTTP-запроса. Разница в том, что в `predefined_query_handler`, запрос записывается в конфигурационный файл. Вы можете настроить `query_param_name` в `dynamic_query_handler`. @@ -529,7 +530,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact Say Hi!% @@ -569,7 +570,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' < Content-Type: text/plain; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact
% @@ -621,7 +622,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} < Absolute Path File * Connection #0 to host localhost left intact @@ -640,7 +641,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} < Relative Path File * Connection #0 to host localhost left intact diff --git a/docs/ru/interfaces/third-party/client-libraries.md b/docs/ru/interfaces/third-party/client-libraries.md index a4659e9ac4e..08961e8dd2e 100644 --- a/docs/ru/interfaces/third-party/client-libraries.md +++ b/docs/ru/interfaces/third-party/client-libraries.md @@ -6,8 +6,8 @@ sidebar_label: "Клиентские библиотеки от сторонни # Клиентские библиотеки от сторонних разработчиков {#klientskie-biblioteki-ot-storonnikh-razrabotchikov} -:::danger "Disclaimer" - Яндекс не поддерживает перечисленные ниже библиотеки и не проводит тщательного тестирования для проверки их качества. +:::danger Предупреждение +ClickHouse Inc. не поддерживает перечисленные ниже библиотеки и не проводит тщательного тестирования для проверки их качества. ::: - Python: diff --git a/docs/ru/interfaces/third-party/index.md b/docs/ru/interfaces/third-party/index.md index 45f29d781b2..c0beaf14340 100644 --- a/docs/ru/interfaces/third-party/index.md +++ b/docs/ru/interfaces/third-party/index.md @@ -13,6 +13,6 @@ sidebar_position: 24 - [GUI](../../interfaces/third-party/gui.md) - [Proxies](../../interfaces/third-party/proxy.md) - :::note "Примечание" - С ClickHouse работают также универсальные инструменты, поддерживающие общий API, такие как [ODBC](../../interfaces/odbc.md) или [JDBC](../../interfaces/jdbc.md). +:::note Примечание +С ClickHouse работают также универсальные инструменты, поддерживающие общий API, такие как [ODBC](../../interfaces/odbc.md) или [JDBC](../../interfaces/jdbc.md). ::: \ No newline at end of file diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index 86e0dc15aac..3f7ded897ea 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -6,8 +6,9 @@ sidebar_label: "Библиотеки для интеграции от сторо # Библиотеки для интеграции от сторонних разработчиков {#biblioteki-dlia-integratsii-ot-storonnikh-razrabotchikov} -:::danger "Disclaimer" - ClickHouse, Inc. не занимается поддержкой перечисленных ниже инструментов и библиотек и не проводит тщательного тестирования для проверки их качества. +:::danger Предупреждение +ClickHouse Inc. не занимается поддержкой перечисленных ниже инструментов и библиотек и не проводит тщательного тестирования для проверки их качества. +::: ## Инфраструктурные продукты {#infrastrukturnye-produkty} diff --git a/docs/ru/operations/access-rights.md b/docs/ru/operations/access-rights.md index f268165f10e..0793e49edac 100644 --- a/docs/ru/operations/access-rights.md +++ b/docs/ru/operations/access-rights.md @@ -26,9 +26,9 @@ ClickHouse поддерживает управление доступом на Рекомендуется использовать SQL-воркфлоу. Оба метода конфигурации работают одновременно, поэтому, если для управления доступом вы используете конфигурационные файлы, вы можете плавно перейти на SQL-воркфлоу. - :::note "Внимание" - Нельзя одновременно использовать оба метода для управления одним и тем же объектом системы доступа. - ::: +:::note Внимание +Нельзя одновременно использовать оба метода для управления одним и тем же объектом системы доступа. +::: Чтобы посмотреть список всех пользователей, ролей, профилей и пр., а также все привилегии, используйте запрос [SHOW ACCESS](../sql-reference/statements/show.md#show-access-statement). ## Использование {#access-control-usage} diff --git a/docs/ru/operations/backup.md b/docs/ru/operations/backup.md index 7eaca62fd69..9ff13bbc8a6 100644 --- a/docs/ru/operations/backup.md +++ b/docs/ru/operations/backup.md @@ -12,9 +12,10 @@ sidebar_label: "Резервное копирование данных" Каждая компания имеет различные доступные ресурсы и бизнес-требования, поэтому нет универсального решения для резервного копирования и восстановления ClickHouse, которое будет подходить в каждой ситуации. То, что работает для одного гигабайта данных, скорее всего, не будет работать для десятков петабайт. Существует множество возможных подходов со своими плюсами и минусами, которые будут рассмотрены ниже. Рекомендуется использовать несколько подходов вместо одного, чтобы компенсировать их различные недостатки. - :::note "Примечание" - Имейте в виду, что если вы создали резервную копию чего-то и никогда не пытались восстановить её, скорее всего, восстановление не будет работать должным образом, когда вам это действительно понадобится (или, по крайней мере, это займет больше времени, чем будет приемлемо для бизнеса). Поэтому, какой бы подход к резервному копированию вы ни выбрали, обязательно автоматизируйте процесс восстановления и регулярно запускайте его на резервном кластере ClickHouse. - ::: +:::note Примечание +Имейте в виду, что если вы создали резервную копию чего-то и никогда не пытались восстановить её, скорее всего, восстановление не будет работать должным образом, когда вам это действительно понадобится (или, по крайней мере, это займет больше времени, чем будет приемлемо для бизнеса). Поэтому, какой бы подход к резервному копированию вы ни выбрали, обязательно автоматизируйте процесс восстановления и регулярно запускайте его на резервном кластере ClickHouse. +::: + ## Дублирование данных {#dublirovanie-dannykh} Часто данные, которые поступают в ClickHouse, доставляются через некоторую отказоустойчивую очередь, например [Apache Kafka](https://kafka.apache.org). В этом случае можно настроить дополнительный набор подписчиков, которые будут считывать один и тот же поток данных во время записи в ClickHouse и хранить его в холодном хранилище. Большинство компаний уже имеют некоторые рекомендуемые по умолчанию холодные хранилища, которые могут быть хранилищем объектов или распределенной файловой системой, например [HDFS](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html). diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md index 2b824ce91bd..3b037521692 100644 --- a/docs/ru/operations/configuration-files.md +++ b/docs/ru/operations/configuration-files.md @@ -85,6 +85,58 @@ $ cat /etc/clickhouse-server/users.d/alice.xml Сервер следит за изменениями конфигурационных файлов, а также файлов и ZooKeeper-узлов, которые были использованы при выполнении подстановок и переопределений, и перезагружает настройки пользователей и кластеров на лету. То есть, можно изменять кластера, пользователей и их настройки без перезапуска сервера. +## Шифрование и Скрытие {#encryption} + +Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encrypted_by` с именем кодека шифрования как значение к элементу, который надо зашифровать. + +В отличии от аттрибутов `from_zk`, `from_env` и `incl` (или элемента `include`), подстановка, т.е. расшифровка зашифрованного значения, не выподняется в файле предобработки. Расшифровка происходит только во время исполнения в серверном процессе. + +Пример: + +```xml + + + + + 00112233445566778899aabbccddeeff + + + + + admin + 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 + + + +``` + +Чтобы получить зашифрованное значение может быть использовано приложение-пример `encrypt_decrypt` . + +Пример: + +``` bash +./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd +``` + +``` text +961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 +``` + +Даже с применённым шифрованием в файле предобработки элементы все равно сохраняются в незашифрованном виде. В случае если это является проблемой, мы предлагаем две альтернативы: или установить разрешения на файл предобработки 600 или использовать аттрибут `hide_in_preprocessed`. + +Пример: + +```xml + + + + admin + secret + + + +``` + ## Примеры записи конфигурации на YAML {#example} Здесь можно рассмотреть пример реальной конфигурации записанной на YAML: [config.yaml.example](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.yaml.example). diff --git a/docs/ru/operations/external-authenticators/kerberos.md b/docs/ru/operations/external-authenticators/kerberos.md index 4641f15cb56..4c8a7cae60c 100644 --- a/docs/ru/operations/external-authenticators/kerberos.md +++ b/docs/ru/operations/external-authenticators/kerberos.md @@ -8,8 +8,9 @@ ClickHouse предоставляет возможность аутентифи В настоящее время возможно использование Kerberos только как внешнего аутентификатора, то есть для аутентификации уже существующих пользователей с помощью Kerberos. Пользователи, настроенные для Kerberos-аутентификации, могут работать с ClickHouse только через HTTP-интерфейс, причём сами клиенты должны иметь возможность аутентификации с использованием механизма GSS-SPNEGO. - ::: Для Kerberos-аутентификации необходимо предварительно корректно настроить Kerberos на стороне клиента, на сервере и в конфигурационных файлах самого ClickHouse. Ниже описана лишь конфигурация ClickHouse. - ::: +:::note Примечание +Для Kerberos-аутентификации необходимо предварительно корректно настроить Kerberos на стороне клиента, на сервере и в конфигурационных файлах самого ClickHouse. Ниже описана лишь конфигурация ClickHouse. +::: ## Настройка Kerberos в ClickHouse {#enabling-kerberos-in-clickhouse} Для того, чтобы задействовать Kerberos-аутентификацию в ClickHouse, в первую очередь необходимо добавить одну-единственную секцию `kerberos` в `config.xml`. @@ -56,12 +57,13 @@ ClickHouse предоставляет возможность аутентифи ``` -:::danger "Важно" - В конфигурационном файле не могут быть указаны одновременно оба параметра. В противном случае, аутентификация с помощью Kerberos будет недоступна для всех пользователей. - -:::danger "Важно" - В конфигурационном файле может быть не более одной секции `kerberos`. В противном случае, аутентификация с помощью Kerberos будет отключена для всех пользователей. +:::danger Важно +В конфигурационном файле не могут быть указаны одновременно оба параметра. В противном случае, аутентификация с помощью Kerberos будет недоступна для всех пользователей. +::: +:::danger Важно +В конфигурационном файле может быть не более одной секции `kerberos`. В противном случае, аутентификация с помощью Kerberos будет отключена для всех пользователей. +::: ## Аутентификация пользователей с помощью Kerberos {#kerberos-as-an-external-authenticator-for-existing-users} @@ -100,11 +102,12 @@ ClickHouse предоставляет возможность аутентифи ``` -:::danger "Важно" - Если пользователь настроен для Kerberos-аутентификации, другие виды аутентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу. +:::danger Важно +Если пользователь настроен для Kerberos-аутентификации, другие виды аутентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу. +::: -:::info "" - Ещё раз отметим, что кроме `users.xml`, необходимо также включить Kerberos в `config.xml`. +:::info Примечаниие +Ещё раз отметим, что кроме `users.xml`, необходимо также включить Kerberos в `config.xml`. ::: ### Настройка Kerberos через SQL {#enabling-kerberos-using-sql} diff --git a/docs/ru/operations/named-collections.md b/docs/ru/operations/named-collections.md index ba6b47116ad..48ee7c9f15d 100644 --- a/docs/ru/operations/named-collections.md +++ b/docs/ru/operations/named-collections.md @@ -88,7 +88,6 @@ SELECT * FROM s3_engine_table LIMIT 3; 3306 test 8 - 1 1 diff --git a/docs/ru/operations/opentelemetry.md b/docs/ru/operations/opentelemetry.md index 4e127e9e0f0..af6d3eef205 100644 --- a/docs/ru/operations/opentelemetry.md +++ b/docs/ru/operations/opentelemetry.md @@ -8,7 +8,7 @@ sidebar_label: Поддержка OpenTelemetry ClickHouse поддерживает [OpenTelemetry](https://opentelemetry.io/) — открытый стандарт для сбора трассировок и метрик из распределенного приложения. -:::danger "Предупреждение" +:::danger Предупреждение Поддержка стандарта экспериментальная и будет со временем меняться. ::: diff --git a/docs/ru/operations/optimizing-performance/profile-guided-optimization.md b/docs/ru/operations/optimizing-performance/profile-guided-optimization.md new file mode 120000 index 00000000000..31cb656bd99 --- /dev/null +++ b/docs/ru/operations/optimizing-performance/profile-guided-optimization.md @@ -0,0 +1 @@ +../../../en/operations/optimizing-performance/profile-guided-optimization.md \ No newline at end of file diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 421df3fe3eb..2c7f0b773e8 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -24,8 +24,8 @@ ClickHouse перезагружает встроенные словари с з Настройки компрессии данных. -:::danger "Внимание" - Лучше не использовать, если вы только начали работать с ClickHouse. +:::danger Внимание +Лучше не использовать, если вы только начали работать с ClickHouse. ::: Общий вид конфигурации: @@ -89,9 +89,10 @@ ClickHouse проверяет условия для `min_part_size` и `min_part ``` - :::note "Примечание" - Хранение ключей в конфигурационном файле не рекомендовано. Это не безопасно. Вы можете переместить ключи в отдельный файл на секретном диске и сделать symlink к этому конфигурационному файлу в папке `config.d/`. - ::: +:::note Примечание +Хранение ключей в конфигурационном файле не рекомендовано. Это не безопасно. Вы можете переместить ключи в отдельный файл на секретном диске и сделать symlink к этому конфигурационному файлу в папке `config.d/`. +::: + Загрузка из файла конфигурации, когда ключ представлен в шестнадцатеричной форме: ```xml @@ -174,8 +175,8 @@ ClickHouse проверяет условия для `min_part_size` и `min_part Значение по умолчанию: `1073741824` (1 ГБ). -:::info "Примечание" - Жесткое ограничение настраивается с помощью системных инструментов. +:::info Примечание +Жесткое ограничение настраивается с помощью системных инструментов. ::: **Пример** @@ -475,12 +476,13 @@ ClickHouse проверяет условия для `min_part_size` и `min_part По умолчанию, если секция `interserver_http_credentials` не задана в конфигурации, аутентификация при репликации не используется. - :::note "Примечание" - Настройки `interserver_http_credentials` не относятся к [конфигурации](../../interfaces/cli.md#configuration_files) учетных данных клиента ClickHouse. - ::: - :::note "Примечание" - Учетные данные в `interserver_http_credentials` являются общими для репликации по `HTTP` и `HTTPS`. - ::: +:::note Примечание +Настройки `interserver_http_credentials` не относятся к [конфигурации](../../interfaces/cli.md#configuration_files) учетных данных клиента ClickHouse. +::: +:::note Примечание +Учетные данные в `interserver_http_credentials` являются общими для репликации по `HTTP` и `HTTPS`. +::: + Раздел содержит следующие параметры: - `user` — имя пользователя. @@ -757,8 +759,8 @@ ClickHouse поддерживает динамическое изменение Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`. Запросы также могут быть ограничены настройками: [max_concurrent_insert_queries](#max-concurrent-insert-queries), [max_concurrent_select_queries](#max-concurrent-select-queries), [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries). -:::info "Примечание" - Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений. +:::info Примечание +Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений. ::: Возможные значения: @@ -778,8 +780,8 @@ ClickHouse поддерживает динамическое изменение Определяет максимальное количество одновременных `INSERT` запросов. -:::info "Примечание" - Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений. +:::info Примечание +Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений. ::: Возможные значения: @@ -799,8 +801,8 @@ ClickHouse поддерживает динамическое изменение Определяет максимальное количество одновременных `SELECT` запросов. -:::info "Примечание" - Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений. +:::info Примечание +Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений. ::: Возможные значения: @@ -1058,6 +1060,10 @@ ClickHouse использует потоки из глобального пул metric_log
7500 1000 + 1048576 + 8192 + 524288 + false ``` @@ -1102,7 +1108,7 @@ ClickHouse использует потоки из глобального пул - verificationMode - Способ проверки сертификатов узла. Подробности находятся в описании класса [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h). Допустимые значения: `none`, `relaxed`, `strict`, `once`. - verificationDepth - Максимальная длина верификационной цепи. Верификация завершится ошибкой, если длина цепи сертификатов превысит установленное значение. - loadDefaultCAFile - Признак того, что будут использоваться встроенные CA-сертификаты для OpenSSL. Допустимые значения: `true`, `false`. \| -- cipherList - Поддерживаемые OpenSSL-шифры. Например, `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`. +- cipherList - Поддерживаемые OpenSSL-шифры. Например, `ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH`. - cacheSessions - Включение/выключение кеширования сессии. Использовать обязательно вместе с `sessionIdContext`. Допустимые значения: `true`, `false`. - sessionIdContext - Уникальный набор произвольных символов, которые сервер добавляет к каждому сгенерированному идентификатору. Длина строки не должна превышать `SSL_MAX_SSL_SESSION_ID_LENGTH`. Рекомендуется к использованию всегда, поскольку позволяет избежать проблем как в случае, если сервер кеширует сессию, так и если клиент затребовал кеширование. По умолчанию `${application.name}`. - sessionCacheSize - Максимальное количество сессий, которые кэширует сервер. По умолчанию - 1024\*20. 0 - неограниченное количество сессий. @@ -1155,12 +1161,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. - +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. **Пример** ``` xml @@ -1169,6 +1182,10 @@ ClickHouse использует потоки из глобального пул part_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1176,8 +1193,8 @@ ClickHouse использует потоки из глобального пул Путь к каталогу с данными. -:::danger "Обратите внимание" - Завершающий слеш обязателен. +:::danger Обратите внимание +Завершающий слеш обязателен. ::: **Пример** @@ -1218,11 +1235,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы, куда будет записываться лог; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1234,6 +1259,10 @@ ClickHouse использует потоки из глобального пул query_log
Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1245,11 +1274,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы, куда будет записываться лог; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1261,6 +1298,10 @@ ClickHouse использует потоки из глобального пул query_thread_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1272,11 +1313,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` – имя базы данных. -- `table` – имя системной таблицы, где будут логироваться запросы. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать, если задан параметр `engine`. -- `engine` — устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать, если задан параметр `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1288,6 +1337,10 @@ ClickHouse использует потоки из глобального пул query_views_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1297,12 +1350,20 @@ ClickHouse использует потоки из глобального пул Параметры: -- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице. -- `database` — имя базы данных для хранения таблицы. -- `table` — имя таблицы, куда будут записываться текстовые сообщения. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. **Пример** ```xml @@ -1312,6 +1373,10 @@ ClickHouse использует потоки из глобального пул system text_log
7500 + 1048576 + 8192 + 524288 + false Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day @@ -1323,13 +1388,21 @@ ClickHouse использует потоки из глобального пул Настройки для [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation. -Parameters: +Параметры: -- `database` — Database for storing a table. -- `table` — Table name. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. По умолчанию файл настроек сервера `config.xml` содержит следующие настройки: @@ -1339,9 +1412,125 @@ Parameters: trace_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 ``` +## asynchronous_insert_log {#server_configuration_parameters-asynchronous_insert_log} + +Настройки для asynchronous_insert_log Система для логирования ассинхронных вставок. + +Параметры: + +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. + +**Пример** + +```xml + + + system + asynchronous_insert_log
+ 7500 + toYYYYMM(event_date) + 1048576 + 8192 + 524288 + +
+
+``` + +## crash_log {#server_configuration_parameters-crash_log} + +Настройки для таблицы [crash_log](../../operations/system-tables/crash-log.md). + +Параметры: + +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1024. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 1024. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: true. + +**Пример** + +``` xml + + system + crash_log
+ toYYYYMM(event_date) + 7500 + 1024 + 1024 + 512 + true +
+``` + +## backup_log {#server_configuration_parameters-backup_log} + +Настройки для системной таблицы [backup_log](../../operations/system-tables/backup_log.md), предназначенной для логирования операций `BACKUP` и `RESTORE`. + +Параметры: + +- `database` — имя базы данных. +- `table` — имя таблицы. +- `partition_by` — [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать одновременно с `engine`. +- `order_by` - [произвольный ключ сортировки](../../engines/table-engines/mergetree-family/mergetree.md#order_by). Нельзя использовать одновременно с `engine`. +- `engine` - [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). Нельзя использовать с `partition_by` или `order_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1024. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 1024. +- `buffer_size_rows_flush_threshold` – количество строк в логе, при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size_rows / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. +- `storage_policy` – название политики хранения (необязательный параметр). +- `settings` - [дополнительные настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#settings) (необязательный параметр). + +**Пример** + +```xml + + + system + backup_log
+ 1000 + toYYYYMM(event_date) + 1048576 + 8192 + 524288 + false + +
+
+``` + ## query_masking_rules {#query-masking-rules} Правила, основанные на регулярных выражениях, которые будут применены для всех запросов, а также для всех сообщений перед сохранением их в лог на сервере, @@ -1447,8 +1636,8 @@ TCP порт для защищённого обмена данными с кли Путь ко временным данным для обработки больших запросов. -:::danger "Обратите внимание" - Завершающий слеш обязателен. +:::danger Обратите внимание +Завершающий слеш обязателен. ::: **Пример** @@ -1463,7 +1652,7 @@ TCP порт для защищённого обмена данными с кли Если политика не задана, используется [tmp_path](#tmp-path). В противном случае `tmp_path` игнорируется. -:::note "Примечание" +:::note Примечание - `move_factor` игнорируется. - `keep_free_space_bytes` игнорируется. - `max_data_part_size_bytes` игнорируется. @@ -1604,11 +1793,11 @@ ClickHouse использует ZooKeeper для хранения метадан Если `use_minimalistic_part_header_in_zookeeper = 1`, то [реплицированные](../../operations/server-configuration-parameters/settings.md) таблицы хранят заголовки кусков данных в компактном виде, используя только одну `znode`. Если таблица содержит много столбцов, этот метод хранения значительно уменьшает объём данных, хранящихся в Zookeeper. - :::note "Внимание" - После того как вы установили `use_minimalistic_part_header_in_zookeeper = 1`, невозможно откатить ClickHouse до версии, которая не поддерживает этот параметр. Будьте осторожны при обновлении ClickHouse на серверах в кластере. Не обновляйте все серверы сразу. Безопаснее проверять новые версии ClickHouse в тестовой среде или только на некоторых серверах кластера. +:::note Внимание +После того как вы установили `use_minimalistic_part_header_in_zookeeper = 1`, невозможно откатить ClickHouse до версии, которая не поддерживает этот параметр. Будьте осторожны при обновлении ClickHouse на серверах в кластере. Не обновляйте все серверы сразу. Безопаснее проверять новые версии ClickHouse в тестовой среде или только на некоторых серверах кластера. - Заголовки частей данных, ранее сохранённые с этим параметром, не могут быть восстановлены в их предыдущем (некомпактном) представлении. - ::: +Заголовки частей данных, ранее сохранённые с этим параметром, не могут быть восстановлены в их предыдущем (некомпактном) представлении. +::: **Значение по умолчанию**: 0. ## disable_internal_dns_cache {#server-settings-disable-internal-dns-cache} @@ -1777,3 +1966,19 @@ ClickHouse использует ZooKeeper для хранения метадан - Положительное целое число. Значение по умолчанию: `10000`. + +## validate_tcp_client_information {#validate-tcp-client-information} + +Включена ли валидация данных о клиенте при запросе от клиента, использующего TCP соединение. + +Если `true`, то на неверные данные от клиента будет выброшено исключение. + +Если `false`, то данные не будут валидироваться. Сервер будет работать с клиентами всех версий. + +Значение по умолчанию: `false`. + +**Пример** + +``` xml +true +``` diff --git a/docs/ru/operations/settings/query-complexity.md b/docs/ru/operations/settings/query-complexity.md index de9bb969085..fb3b18bd46a 100644 --- a/docs/ru/operations/settings/query-complexity.md +++ b/docs/ru/operations/settings/query-complexity.md @@ -311,6 +311,52 @@ FORMAT Null; **Подробности** -При вставке данных, ClickHouse вычисляет количество партиций во вставленном блоке. Если число партиций больше, чем `max_partitions_per_insert_block`, ClickHouse генерирует исключение со следующим текстом: +При вставке данных ClickHouse проверяет количество партиций во вставляемом блоке. Если количество разделов превышает число `max_partitions_per_insert_block`, ClickHouse либо логирует предупреждение, либо выбрасывает исключение в зависимости от значения `throw_on_max_partitions_per_insert_block`. Исключения имеют следующий текст: -> «Too many partitions for single INSERT block (more than» + toString(max_parts) + «). The limit is controlled by ‘max_partitions_per_insert_block’ setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).» +> “Too many partitions for a single INSERT block (`partitions_count` partitions, limit is ” + toString(max_partitions) + “). The limit is controlled by the ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).” + +## throw_on_max_partitions_per_insert_block {#settings-throw_on_max_partition_per_insert_block} + +Позволяет контролировать поведение при достижении `max_partitions_per_insert_block` + +- `true` - Когда вставляемый блок достигает `max_partitions_per_insert_block`, возникает исключение. +- `false` - Записывает предупреждение при достижении `max_partitions_per_insert_block`. + +Значение по умолчанию: `true` + +## max_sessions_for_user {#max-sessions-per-user} + +Максимальное количество одновременных сессий на одного аутентифицированного пользователя. + +Пример: + +``` xml + + + 1 + + + 2 + + + 0 + + + + + + single_session_profile + + + + two_sessions_profile + + + + unlimited_sessions_profile + + +``` + +Значение по умолчанию: 0 (неограниченное количество сессий). diff --git a/docs/ru/operations/settings/settings-profiles.md b/docs/ru/operations/settings/settings-profiles.md index ba2cb9a601f..a8374c345c0 100644 --- a/docs/ru/operations/settings/settings-profiles.md +++ b/docs/ru/operations/settings/settings-profiles.md @@ -8,9 +8,9 @@ sidebar_label: "Профили настроек" Профиль настроек — это набор настроек, сгруппированных под одним именем. - :::note "Информация" - Для управления профилями настроек рекомендуется использовать [SQL-ориентированный воркфлоу](../access-rights.md#access-control), который также поддерживается в ClickHouse. - ::: +:::note Информация +Для управления профилями настроек рекомендуется использовать [SQL-ориентированный воркфлоу](../access-rights.md#access-control), который также поддерживается в ClickHouse. +::: Название профиля может быть любым. Вы можете указать один и тот же профиль для разных пользователей. Самое важное, что можно прописать в профиле — `readonly=1`, это обеспечит доступ только на чтение. @@ -39,7 +39,7 @@ SET profile = 'web' 8 - + 1000000000 100000000000 @@ -67,6 +67,7 @@ SET profile = 'web' 50 100 + 4 1 diff --git a/docs/ru/operations/settings/settings-users.md b/docs/ru/operations/settings/settings-users.md index 50f4eb5ae6b..ecdd4f85974 100644 --- a/docs/ru/operations/settings/settings-users.md +++ b/docs/ru/operations/settings/settings-users.md @@ -8,9 +8,10 @@ sidebar_label: "Настройки пользователей" Раздел `users` конфигурационного файла `user.xml` содержит настройки для пользователей. - :::note "Информация" - Для управления пользователями рекомендуется использовать [SQL-ориентированный воркфлоу](../access-rights.md#access-control), который также поддерживается в ClickHouse. - ::: +:::note Информация +Для управления пользователями рекомендуется использовать [SQL-ориентированный воркфлоу](../access-rights.md#access-control), который также поддерживается в ClickHouse. +::: + Структура раздела `users`: ``` xml @@ -146,8 +147,9 @@ sidebar_label: "Настройки пользователей" ::/0 ``` -:::danger "Внимание" - Открывать доступ из любой сети небезопасно, если у вас нет правильно настроенного брандмауэра или сервер не отключен от интернета. +:::danger Внимание +Открывать доступ из любой сети небезопасно, если у вас нет правильно настроенного брандмауэра или сервер не отключен от интернета. +::: Чтобы открыть только локальный доступ, укажите: diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 957a917c780..c58b3ae8107 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -238,39 +238,6 @@ ClickHouse применяет настройку в тех случаях, ко В случае превышения `input_format_allow_errors_ratio` ClickHouse генерирует исключение. -## input_format_parquet_import_nested {#input_format_parquet_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Parquet](../../interfaces/formats.md#data-format-parquet). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - -## input_format_arrow_import_nested {#input_format_arrow_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Arrow](../../interfaces/formats.md#data_types-matching-arrow). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - -## input_format_orc_import_nested {#input_format_orc_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [ORC](../../interfaces/formats.md#data-format-orc). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - ## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions} Включает или отключает парсер SQL, если потоковый парсер не может проанализировать данные. Этот параметр используется только для формата [Values](../../interfaces/formats.md#data-format-values) при вставке данных. Дополнительные сведения о парсерах читайте в разделе [Синтаксис](../../sql-reference/syntax.md). @@ -374,9 +341,10 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( - [CSV](../../interfaces/formats.md#csv) - [TabSeparated](../../interfaces/formats.md#tabseparated) - :::note "Примечание" - Когда опция включена, сервер отправляет клиенту расширенные метаданные. Это требует дополнительных вычислительных ресурсов на сервере и может снизить производительность. - ::: +:::note Примечание +Когда опция включена, сервер отправляет клиенту расширенные метаданные. Это требует дополнительных вычислительных ресурсов на сервере и может снизить производительность. +::: + Возможные значения: - 0 — выключена. @@ -656,8 +624,8 @@ ClickHouse может парсить только базовый формат `Y Изменяет поведение операций, выполняемых со строгостью `ANY`. -:::warning "Внимание" - Настройка применяется только для операций `JOIN`, выполняемых над таблицами с движком [Join](../../engines/table-engines/special/join.md). +:::warning Внимание +Настройка применяется только для операций `JOIN`, выполняемых над таблицами с движком [Join](../../engines/table-engines/special/join.md). ::: Возможные значения: @@ -738,9 +706,9 @@ ClickHouse может парсить только базовый формат `Y Включает устаревшее поведение сервера ClickHouse при выполнении операций `ANY INNER|LEFT JOIN`. - :::note "Внимание" - Используйте этот параметр только в целях обратной совместимости, если ваши варианты использования требуют устаревшего поведения `JOIN`. - ::: +:::note Внимание +Используйте этот параметр только в целях обратной совместимости, если ваши варианты использования требуют устаревшего поведения `JOIN`. +::: Когда включено устаревшее поведение: - Результаты операций "t1 ANY LEFT JOIN t2" и "t2 ANY RIGHT JOIN t1" не равны, поскольку ClickHouse использует логику с сопоставлением ключей таблицы "многие к одному слева направо". @@ -1102,9 +1070,9 @@ SELECT type, query FROM system.query_log WHERE log_comment = 'log_comment test' Максимальный размер блоков несжатых данных перед сжатием при записи в таблицу. По умолчанию - 1 048 576 (1 MiB). При уменьшении размера, незначительно уменьшается коэффициент сжатия, незначительно возрастает скорость сжатия и разжатия за счёт кэш-локальности, и уменьшается потребление оперативной памяти. - :::note "Предупреждение" - Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse. - ::: +:::note Предупреждение +Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse. +::: Не путайте блоки для сжатия (кусок памяти, состоящий из байт) и блоки для обработки запроса (пачка строк из таблицы). ## min_compress_block_size {#min-compress-block-size} @@ -1119,9 +1087,9 @@ SELECT type, query FROM system.query_log WHERE log_comment = 'log_comment test' Пусть мы записываем столбец URL типа String (средний размер - 60 байт на значение). При записи 8192 строк, будет, в среднем, чуть меньше 500 КБ данных. Так как это больше 65 536 строк, то сжатый блок будет сформирован на каждую засечку. В этом случае, при чтении с диска данных из диапазона в одну засечку, не будет разжато лишних данных. - :::note "Предупреждение" - Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse. - ::: +:::note Предупреждение +Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse. +::: ## max_query_size {#settings-max_query_size} @@ -1209,9 +1177,9 @@ SELECT type, query FROM system.query_log WHERE log_comment = 'log_comment test' Может быть использована для ограничения скорости сети при репликации данных для добавления или замены новых узлов. - :::note - 60000000 байт/с примерно соответствует 457 Мбит/с (60000000 / 1024 / 1024 * 8). - ::: +:::note Примечание +60000000 байт/с примерно соответствует 457 Мбит/с (60000000 / 1024 / 1024 * 8). +::: ## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server} @@ -1230,9 +1198,9 @@ SELECT type, query FROM system.query_log WHERE log_comment = 'log_comment test' Может быть использована для ограничения скорости сети при репликации данных для добавления или замены новых узлов. - :::note - 60000000 байт/с примерно соответствует 457 Мбит/с (60000000 / 1024 / 1024 * 8). - ::: +:::note Примечание +60000000 байт/с примерно соответствует 457 Мбит/с (60000000 / 1024 / 1024 * 8). +::: ## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms} @@ -1399,8 +1367,9 @@ load_balancing = round_robin Значение по умолчанию: 1. -:::danger "Warning" - Отключайте эту настройку при использовании [max_parallel_replicas](#settings-max_parallel_replicas). +:::danger Предупреждение +Отключайте эту настройку при использовании [max_parallel_replicas](#settings-max_parallel_replicas). +::: ## totals_mode {#totals-mode} @@ -1429,8 +1398,9 @@ load_balancing = round_robin - Ключ сэмплирования является выражением, которое сложно вычисляется. - У распределения сетевых задержек в кластере длинный «хвост», из-за чего при параллельных запросах к нескольким серверам увеличивается среднее время задержки. -:::danger "Предупреждение" - Параллельное выполнение запроса может привести к неверному результату, если в запросе есть объединение или подзапросы и при этом таблицы не удовлетворяют определенным требованиям. Подробности смотрите в разделе [Распределенные подзапросы и max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries). +:::danger Предупреждение +Параллельное выполнение запроса может привести к неверному результату, если в запросе есть объединение или подзапросы и при этом таблицы не удовлетворяют определенным требованиям. Подробности смотрите в разделе [Распределенные подзапросы и max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries). +::: ## compile_expressions {#compile-expressions} @@ -2203,8 +2173,8 @@ SELECT * FROM test_table Устанавливает приоритет ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) для потоков, исполняющих запросы. Планировщик ОС учитывает эти приоритеты при выборе следующего потока для исполнения на доступном ядре CPU. -:::warning "Предупреждение" - Для использования этой настройки необходимо установить свойство `CAP_SYS_NICE`. Пакет `clickhouse-server` устанавливает его во время инсталляции. Некоторые виртуальные окружения не позволяют установить `CAP_SYS_NICE`. В этом случае, `clickhouse-server` выводит сообщение при запуске. +:::warning Предупреждение +Для использования этой настройки необходимо установить свойство `CAP_SYS_NICE`. Пакет `clickhouse-server` устанавливает его во время инсталляции. Некоторые виртуальные окружения не позволяют установить `CAP_SYS_NICE`. В этом случае, `clickhouse-server` выводит сообщение при запуске. ::: Допустимые значения: @@ -4213,3 +4183,29 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi - Запрос: `SELECT * FROM file('sample.csv')` Если чтение и обработка `sample.csv` прошли успешно, файл будет переименован в `processed_sample_1683473210851438.csv`. + +## precise_float_parsing {#precise_float_parsing} + +Позволяет выбрать алгоритм, используемый при парсинге [Float32/Float64](../../sql-reference/data-types/float.md): +* Если установлено значение `1`, то используется точный метод. Он более медленный, но всегда возвращает число, наиболее близкое к входному значению. +* В противном случае используется быстрый метод (поведение по умолчанию). Обычно результат его работы совпадает с результатом, полученным точным методом, однако в редких случаях он может отличаться на 1 или 2 наименее значимых цифры. + +Возможные значения: `0`, `1`. + +Значение по умолчанию: `0`. + +Пример: + +```sql +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 0; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7090999999999998 │ 15008753.000000002 │ +└─────────────────────┴──────────────────────────┘ + +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 1; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7091 │ 15008753 │ +└─────────────────────┴──────────────────────────┘ +``` diff --git a/docs/ru/operations/system-tables/backup_log.md b/docs/ru/operations/system-tables/backup_log.md new file mode 100644 index 00000000000..15c1bfb20a4 --- /dev/null +++ b/docs/ru/operations/system-tables/backup_log.md @@ -0,0 +1,145 @@ +--- +slug: /ru/operations/system-tables/backup_log +--- +# system.backup_log {#system_tables-backup-log} + +Содержит информацию о всех операциях `BACKUP` and `RESTORE`. + +Колонки: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Дата события. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Время события с точностью до микросекунд. +- `id` ([String](../../sql-reference/data-types/string.md)) — Идентификатор операции. +- `name` ([String](../../sql-reference/data-types/string.md)) — Название хранилища (содержимое секции `FROM` или `TO` в SQL запросе). +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Статус операции. Возможные значения: + - `'CREATING_BACKUP'` + - `'BACKUP_CREATED'` + - `'BACKUP_FAILED'` + - `'RESTORING'` + - `'RESTORED'` + - `'RESTORE_FAILED'` +- `error` ([String](../../sql-reference/data-types/string.md)) — Сообщение об ошибке, при наличии (записи для успешных операций содержат пустую строку). +- `start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Время начала операции. +- `end_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Время завершения операции. +- `num_files` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Количество файлов, хранимых в бэкапе. +- `total_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Общий размер файлов, хранимых в бэкапе. +- `num_entries` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Количество позиций в бэкапе, т.е. либо количество файлов в папке (если бэкап хранится в папке), либо количество файлов в архиве (если бэкап хранится в архиве). Это значение не равно `num_files` в случае если это инкрементальный бэкап либо он содержит пустые файлы или дубликаты. Следующее утверждение верно всегда: `num_entries <= num_files`. +- `uncompressed_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Размер бэкапа до сжатия. +- `compressed_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Размер бэкапа после сжатия. Если бэкап не хранится в виде архива, это значение равно `uncompressed_size`. +- `files_read` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Количество файлов, прочитанных во время операции восстановления. +- `bytes_read` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Общий размер файлов, прочитанных во время операции восстановления. + +**Пример** + +```sql +BACKUP TABLE test_db.my_table TO Disk('backups_disk', '1.zip') +``` +```response +┌─id───────────────────────────────────┬─status─────────┐ +│ e5b74ecb-f6f1-426a-80be-872f90043885 │ BACKUP_CREATED │ +└──────────────────────────────────────┴────────────────┘ +``` +```sql +SELECT * FROM system.backup_log WHERE id = 'e5b74ecb-f6f1-426a-80be-872f90043885' ORDER BY event_date, event_time_microseconds \G +``` +```response +Row 1: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:05:21.998566 +id: e5b74ecb-f6f1-426a-80be-872f90043885 +name: Disk('backups_disk', '1.zip') +status: CREATING_BACKUP +error: +start_time: 2023-08-19 11:05:21 +end_time: 1970-01-01 03:00:00 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +Row 2: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:08:56.916192 +id: e5b74ecb-f6f1-426a-80be-872f90043885 +name: Disk('backups_disk', '1.zip') +status: BACKUP_CREATED +error: +start_time: 2023-08-19 11:05:21 +end_time: 2023-08-19 11:08:56 +num_files: 57 +total_size: 4290364870 +num_entries: 46 +uncompressed_size: 4290362365 +compressed_size: 3525068304 +files_read: 0 +bytes_read: 0 +``` +```sql +RESTORE TABLE test_db.my_table FROM Disk('backups_disk', '1.zip') +``` +```response +┌─id───────────────────────────────────┬─status───┐ +│ cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 │ RESTORED │ +└──────────────────────────────────────┴──────────┘ +``` +```sql +SELECT * FROM system.backup_log WHERE id = 'cdf1f731-52ef-42da-bc65-2e1bfcd4ce90' ORDER BY event_date, event_time_microseconds \G +``` +```response +Row 1: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:09:19.718077 +id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 +name: Disk('backups_disk', '1.zip') +status: RESTORING +error: +start_time: 2023-08-19 11:09:19 +end_time: 1970-01-01 03:00:00 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +Row 2: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:09:29.334234 +id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 +name: Disk('backups_disk', '1.zip') +status: RESTORED +error: +start_time: 2023-08-19 11:09:19 +end_time: 2023-08-19 11:09:29 +num_files: 57 +total_size: 4290364870 +num_entries: 46 +uncompressed_size: 4290362365 +compressed_size: 4290362365 +files_read: 57 +bytes_read: 4290364870 +``` + +Это по сути та же информация, что заносится и в системную таблицу `system.backups`: + +```sql +SELECT * FROM system.backups ORDER BY start_time +``` +```response +┌─id───────────────────────────────────┬─name──────────────────────────┬─status─────────┬─error─┬──────────start_time─┬────────────end_time─┬─num_files─┬─total_size─┬─num_entries─┬─uncompressed_size─┬─compressed_size─┬─files_read─┬─bytes_read─┐ +│ e5b74ecb-f6f1-426a-80be-872f90043885 │ Disk('backups_disk', '1.zip') │ BACKUP_CREATED │ │ 2023-08-19 11:05:21 │ 2023-08-19 11:08:56 │ 57 │ 4290364870 │ 46 │ 4290362365 │ 3525068304 │ 0 │ 0 │ +│ cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 │ Disk('backups_disk', '1.zip') │ RESTORED │ │ 2023-08-19 11:09:19 │ 2023-08-19 11:09:29 │ 57 │ 4290364870 │ 46 │ 4290362365 │ 4290362365 │ 57 │ 4290364870 │ +└──────────────────────────────────────┴───────────────────────────────┴────────────────┴───────┴─────────────────────┴─────────────────────┴───────────┴────────────┴─────────────┴───────────────────┴─────────────────┴────────────┴────────────┘ +``` + +**См. также** + +- [Backup and Restore](../../operations/backup.md) diff --git a/docs/ru/operations/system-tables/index.md b/docs/ru/operations/system-tables/index.md index 7ff368b1910..aedefb24193 100644 --- a/docs/ru/operations/system-tables/index.md +++ b/docs/ru/operations/system-tables/index.md @@ -21,7 +21,7 @@ sidebar_label: "Системные таблицы" Большинство системных таблиц хранят свои данные в оперативной памяти. Сервер ClickHouse создает эти системные таблицы при старте. -В отличие от других системных таблиц, таблицы с системными логами [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md) и [text_log](../../operations/system-tables/text_log.md) используют движок таблиц [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) и по умолчанию хранят свои данные в файловой системе. Если удалить таблицу из файловой системы, сервер ClickHouse снова создаст пустую таблицу во время следующей записи данных. Если схема системной таблицы изменилась в новом релизе, то ClickHouse переименует текущую таблицу и создаст новую. +В отличие от других системных таблиц, таблицы с системными логами [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md), [text_log](../../operations/system-tables/text_log.md) и [backup_log](../../operations/system-tables/backup_log.md) используют движок таблиц [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) и по умолчанию хранят свои данные в файловой системе. Если удалить таблицу из файловой системы, сервер ClickHouse снова создаст пустую таблицу во время следующей записи данных. Если схема системной таблицы изменилась в новом релизе, то ClickHouse переименует текущую таблицу и создаст новую. Таблицы с системными логами `log` можно настроить, создав конфигурационный файл с тем же именем, что и таблица в разделе `/etc/clickhouse-server/config.d/`, или указав соответствующие элементы в `/etc/clickhouse-server/config.xml`. Настраиваться могут следующие элементы: @@ -45,6 +45,10 @@ sidebar_label: "Системные таблицы" ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024 --> 7500 + 1048576 + 8192 + 524288 + false ``` diff --git a/docs/ru/operations/system-tables/licenses.md b/docs/ru/operations/system-tables/licenses.md index 13407a27c94..e35b5d5e3be 100644 --- a/docs/ru/operations/system-tables/licenses.md +++ b/docs/ru/operations/system-tables/licenses.md @@ -20,21 +20,10 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15 ``` text ┌─library_name───────┬─license_type─┬─license_path────────────────────────┐ -│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │ -│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │ -│ avro │ Apache │ /contrib/avro/LICENSE.txt │ │ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │ -│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │ -│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │ -│ aws │ Apache │ /contrib/aws/LICENSE.txt │ -│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │ -│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │ +│ base64 │ BSD 2-clause │ /contrib/aklomp-base64/LICENSE │ │ brotli │ MIT │ /contrib/brotli/LICENSE │ -│ capnproto │ MIT │ /contrib/capnproto/LICENSE │ -│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │ -│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │ -│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │ -│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │ +│ [...] │ [...] │ [...] │ └────────────────────┴──────────────┴─────────────────────────────────────┘ ``` diff --git a/docs/ru/operations/system-tables/mutations.md b/docs/ru/operations/system-tables/mutations.md index 7a9f4ecc0da..bbf22952c2f 100644 --- a/docs/ru/operations/system-tables/mutations.md +++ b/docs/ru/operations/system-tables/mutations.md @@ -31,8 +31,8 @@ slug: /ru/operations/system-tables/mutations - `1` — мутация завершена, - `0` — мутация еще продолжается. -:::info "Замечание" - Даже если `parts_to_do = 0`, для реплицированной таблицы возможна ситуация, когда мутация ещё не завершена из-за долго выполняющейся операции `INSERT`, которая добавляет данные, которые нужно будет мутировать. +:::info Замечание +Даже если `parts_to_do = 0`, для реплицированной таблицы возможна ситуация, когда мутация ещё не завершена из-за долго выполняющейся операции `INSERT`, которая добавляет данные, которые нужно будет мутировать. ::: Если во время мутации какого-либо куска возникли проблемы, заполняются следующие столбцы: diff --git a/docs/ru/operations/system-tables/parts.md b/docs/ru/operations/system-tables/parts.md index 66c3fe3095e..74984591dd2 100644 --- a/docs/ru/operations/system-tables/parts.md +++ b/docs/ru/operations/system-tables/parts.md @@ -99,9 +99,10 @@ slug: /ru/operations/system-tables/parts - `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Массив выражений. Каждое выражение задаёт правило [TTL MOVE](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). - :::note "Предупреждение" - Массив выражений `move_ttl_info.expression` используется, в основном, для обратной совместимости. Для работы с правилами `TTL MOVE` лучше использовать поля `move_ttl_info.min` и `move_ttl_info.max`. - ::: +:::note Предупреждение +Массив выражений `move_ttl_info.expression` используется, в основном, для обратной совместимости. Для работы с правилами `TTL MOVE` лучше использовать поля `move_ttl_info.min` и `move_ttl_info.max`. +::: + - `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Массив значений. Каждый элемент массива задаёт минимальное значение ключа даты и времени для правила [TTL MOVE](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). - `move_ttl_info.max` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Массив значений. Каждый элемент массива задаёт максимальное значение ключа даты и времени для правила [TTL MOVE](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). diff --git a/docs/ru/operations/system-tables/query_log.md b/docs/ru/operations/system-tables/query_log.md index 8f858c14fb1..8d79aa0eef7 100644 --- a/docs/ru/operations/system-tables/query_log.md +++ b/docs/ru/operations/system-tables/query_log.md @@ -5,9 +5,9 @@ slug: /ru/operations/system-tables/query_log Содержит информацию о выполняемых запросах, например, время начала обработки, продолжительность обработки, сообщения об ошибках. - :::note "Внимание" - Таблица не содержит входных данных для запросов `INSERT`. - ::: +:::note Внимание +Таблица не содержит входных данных для запросов `INSERT`. +::: Настойки логгирования можно изменить в секции серверной конфигурации [query_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query-log). Можно отключить логгирование настройкой [log_queries = 0](../settings/settings.md#settings-log-queries). По-возможности, не отключайте логгирование, поскольку информация из таблицы важна при решении проблем. @@ -99,7 +99,8 @@ ClickHouse не удаляет данные из таблица автомати - `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(#system_tables-events - `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1. - `log_comment` ([String](../../sql-reference/data-types/string.md)) — комментарий к записи в логе. Представляет собой произвольную строку, длина которой должна быть не больше, чем [max_query_size](../../operations/settings/settings.md#settings-max_query_size). Если нет комментария, то пустая строка. -- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов. +- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов, эти потоки не обязательно выполняются одновременно. +- `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — максимальное количество одновременно работавших потоков, участвоваших в обработке запроса. - `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `агрегатных функций`, использованных при выполнении запроса. - `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `комбинаторов агрегатных функций`, использованных при выполнении запроса. - `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `движков баз данных`, использованных при выполнении запроса. diff --git a/docs/ru/operations/update.md b/docs/ru/operations/update.md index 4ad4d8d942b..9a58a7ea5dd 100644 --- a/docs/ru/operations/update.md +++ b/docs/ru/operations/update.md @@ -16,9 +16,10 @@ $ sudo service clickhouse-server restart Если ClickHouse установлен не из рекомендуемых deb-пакетов, используйте соответствующий метод обновления. - :::note "Примечание" - Вы можете обновить сразу несколько серверов, кроме случая, когда все реплики одного шарда отключены. - ::: +:::note Примечание +Вы можете обновить сразу несколько серверов, кроме случая, когда все реплики одного шарда отключены. +::: + Обновление ClickHouse до определенной версии: **Пример** diff --git a/docs/ru/operations/utilities/clickhouse-local.md b/docs/ru/operations/utilities/clickhouse-local.md index 61fba2dd7cc..6f0394a183d 100644 --- a/docs/ru/operations/utilities/clickhouse-local.md +++ b/docs/ru/operations/utilities/clickhouse-local.md @@ -12,8 +12,9 @@ sidebar_label: clickhouse-local `clickhouse-local` при настройке по умолчанию не имеет доступа к данным, которыми управляет сервер ClickHouse, установленный на этом же хосте, однако можно подключить конфигурацию сервера с помощью ключа `--config-file`. -:::danger "Warning" - Мы не рекомендуем подключать серверную конфигурацию к `clickhouse-local`, поскольку данные можно легко повредить неосторожными действиями. +:::danger Предупреждение +Мы не рекомендуем подключать серверную конфигурацию к `clickhouse-local`, поскольку данные можно легко повредить неосторожными действиями. +::: Для временных данных по умолчанию создается специальный каталог. @@ -110,3 +111,42 @@ Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec. ├──────────┼──────────┤ ... ``` + +## Запрос данных в файле с помощью SQL {#query_data_in_file} + +Часто `clickhouse-local` используется для выполнения специальных запросов к файлам, когда не нужно вставлять данные в таблицу. `clickhouse-local` может транслировать данные из файла во временную таблицу и выполнить ваш SQL. + +Если файл находится на той же машине, что и `clickhouse-local`, то можно просто указать файл для загрузки. Следующий файл `reviews.tsv` содержит выборку отзывов о товарах Amazon: + +```bash +./clickhouse local -q "SELECT * FROM 'reviews.tsv'" +``` + +Эта команда является сокращением команды: + +```bash +./clickhouse local -q "SELECT * FROM file('reviews.tsv')" +``` + +ClickHouse знает, что файл использует формат, разделенный табуляцией, из расширения имени файла. Если необходимо явно указать формат, просто добавьте один из [множества входных форматов ClickHouse](../../interfaces/formats.md): + +```bash +./clickhouse local -q "SELECT * FROM file('reviews.tsv', 'TabSeparated')" +``` + +Функция таблицы `file` создает таблицу, и вы можете использовать `DESCRIBE` для просмотра предполагаемой схемы: + +```bash +./clickhouse local -q "DESCRIBE file('reviews.tsv')" +``` + +:::tip +В имени файла разрешается использовать [Шаблоны поиска](/docs/ru/sql-reference/table-functions/file.md/#globs-in-path). + +Примеры: + +```bash +./clickhouse local -q "SELECT * FROM 'reviews*.jsonl'" +./clickhouse local -q "SELECT * FROM 'review_?.csv'" +./clickhouse local -q "SELECT * FROM 'review_{1..3}.csv'" +``` diff --git a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md index f7c7f98a8dd..59a9c7f8cf1 100644 --- a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md @@ -90,8 +90,8 @@ FROM sequenceMatch(pattern)(timestamp, cond1, cond2, ...) ``` -:::danger "Предупреждение" - События, произошедшие в одну и ту же секунду, располагаются в последовательности в неопределенном порядке, что может повлиять на результат работы функции. +:::danger Предупреждение +События, произошедшие в одну и ту же секунду, располагаются в последовательности в неопределенном порядке, что может повлиять на результат работы функции. ::: **Аргументы** @@ -176,8 +176,8 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM Вычисляет количество цепочек событий, соответствующих шаблону. Функция обнаруживает только непересекающиеся цепочки событий. Она начинает искать следующую цепочку только после того, как полностью совпала текущая цепочка событий. -:::danger "Предупреждение" - События, произошедшие в одну и ту же секунду, располагаются в последовательности в неопределенном порядке, что может повлиять на результат работы функции. +:::danger Предупреждение +События, произошедшие в одну и ту же секунду, располагаются в последовательности в неопределенном порядке, что может повлиять на результат работы функции. ::: ``` sql diff --git a/docs/ru/sql-reference/aggregate-functions/reference/corr.md b/docs/ru/sql-reference/aggregate-functions/reference/corr.md index 7871a04a4ff..faa411db632 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/corr.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/corr.md @@ -9,6 +9,6 @@ sidebar_position: 107 Вычисляет коэффициент корреляции Пирсона: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`. - :::note "Примечание" - Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `corrStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. - ::: +:::note Примечание +Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `corrStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. +::: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/covarpop.md b/docs/ru/sql-reference/aggregate-functions/reference/covarpop.md index d1c96a9a8e3..22bf21d5d9a 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/covarpop.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/covarpop.md @@ -9,6 +9,6 @@ sidebar_position: 36 Вычисляет величину `Σ((x - x̅)(y - y̅)) / n`. - :::note "Примечание" - Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `covarPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. - ::: +:::note Примечание +Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `covarPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. +::: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/covarsamp.md b/docs/ru/sql-reference/aggregate-functions/reference/covarsamp.md index b04b2c1c89d..ed7b6eda12c 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/covarsamp.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/covarsamp.md @@ -11,6 +11,6 @@ sidebar_position: 37 Возвращает Float64. В случае, когда `n <= 1`, возвращается +∞. - :::note "Примечание" - Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `covarSampStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. - ::: +:::note Примечание +Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `covarSampStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. +::: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/deltasum.md b/docs/ru/sql-reference/aggregate-functions/reference/deltasum.md index 3816fec9dce..64f78e85c6d 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/deltasum.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/deltasum.md @@ -7,8 +7,8 @@ sidebar_position: 141 Суммирует арифметическую разницу между последовательными строками. Если разница отрицательна — она будет проигнорирована. -:::info "Примечание" - Чтобы эта функция работала должным образом, исходные данные должны быть отсортированы. В [материализованном представлении](../../../sql-reference/statements/create/view.md#materialized) вместо нее рекомендуется использовать [deltaSumTimestamp](../../../sql-reference/aggregate-functions/reference/deltasumtimestamp.md#agg_functions-deltasumtimestamp). +:::info Примечание +Чтобы эта функция работала должным образом, исходные данные должны быть отсортированы. В [материализованном представлении](../../../sql-reference/statements/create/view.md#materialized) вместо нее рекомендуется использовать [deltaSumTimestamp](../../../sql-reference/aggregate-functions/reference/deltasumtimestamp.md#agg_functions-deltasumtimestamp). ::: **Синтаксис** diff --git a/docs/ru/sql-reference/aggregate-functions/reference/intervalLengthSum.md b/docs/ru/sql-reference/aggregate-functions/reference/intervalLengthSum.md index dfe0ea7a4f3..f4c0b2cdd73 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/intervalLengthSum.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/intervalLengthSum.md @@ -19,8 +19,8 @@ intervalLengthSum(start, end) - `start` — начальное значение интервала. [Int32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Int64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Float32](../../../sql-reference/data-types/float.md#float32-float64), [Float64](../../../sql-reference/data-types/float.md#float32-float64), [DateTime](../../../sql-reference/data-types/datetime.md#data_type-datetime) или [Date](../../../sql-reference/data-types/date.md#data_type-date). - `end` — конечное значение интервала. [Int32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Int64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Float32](../../../sql-reference/data-types/float.md#float32-float64), [Float64](../../../sql-reference/data-types/float.md#float32-float64), [DateTime](../../../sql-reference/data-types/datetime.md#data_type-datetime) или [Date](../../../sql-reference/data-types/date.md#data_type-date). -:::info "Примечание" - Аргументы должны быть одного типа. В противном случае ClickHouse сгенерирует исключение. +:::info Примечание +Аргументы должны быть одного типа. В противном случае ClickHouse сгенерирует исключение. ::: **Возвращаемое значение** diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index 21f7c833090..e538a78fc3b 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -13,9 +13,9 @@ sidebar_position: 208 Внутренние состояния функций `quantile*` не объединяются, если они используются в одном запросе. Если вам необходимо вычислить квантили нескольких уровней, используйте функцию [quantiles](#quantiles), это повысит эффективность запроса. - :::note "Примечание" - Использование `quantileTDigestWeighted` [не рекомендуется для небольших наборов данных](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) и может привести к значительной ошибке. Рассмотрите возможность использования [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) в таких случаях. - ::: +:::note Примечание +Использование `quantileTDigestWeighted` [не рекомендуется для небольших наборов данных](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) и может привести к значительной ошибке. Рассмотрите возможность использования [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) в таких случаях. +::: **Синтаксис** ``` sql diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiletiming.md index 59c2ced3439..6e75dc094ef 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiletiming.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiletiming.md @@ -37,18 +37,20 @@ quantileTiming(level)(expr) В противном случае, результат вычисления округляется до ближайшего множителя числа 16. - :::note "Примечание" - Для указанного типа последовательностей функция производительнее и точнее, чем [quantile](#quantile). - ::: +:::note Примечание +Для указанного типа последовательностей функция производительнее и точнее, чем [quantile](#quantile). +::: + **Возвращаемое значение** - Квантиль заданного уровня. Тип: `Float32`. - :::note "Примечания" - Если в функцию `quantileTimingIf` не передать значений, то вернётся [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf). Это необходимо для отделения подобных случаев от случаев, когда результат 0. Подробности про сортировку `NaN` cмотрите в разделе [Секция ORDER BY](../../../sql-reference/statements/select/order-by.md#select-order-by). - ::: +:::note Примечание +Если в функцию `quantileTimingIf` не передать значений, то вернётся [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf). Это необходимо для отделения подобных случаев от случаев, когда результат 0. Подробности про сортировку `NaN` cмотрите в разделе [Секция ORDER BY](../../../sql-reference/statements/select/order-by.md#select-order-by). +::: + **Пример** Входная таблица: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiletimingweighted.md index 4cce04e6d4e..839afec5a12 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiletimingweighted.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiletimingweighted.md @@ -39,18 +39,19 @@ quantileTimingWeighted(level)(expr, weight) В противном случае, результат вычисления округляется до ближайшего множителя числа 16. - :::note "Примечание" - Для указанного типа последовательностей функция производительнее и точнее, чем [quantile](#quantile). - ::: +:::note Примечание +Для указанного типа последовательностей функция производительнее и точнее, чем [quantile](#quantile). +::: **Возвращаемое значение** - Квантиль заданного уровня. Тип: `Float32`. - :::note "Примечания" - Если в функцию `quantileTimingIf` не передать значений, то вернётся [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf). Это необходимо для отделения подобных случаев от случаев, когда результат 0. Подробности про сортировку `NaN` cмотрите в разделе [Секция ORDER BY](../../../sql-reference/statements/select/order-by.md#select-order-by). - ::: +:::note Примечание +Если в функцию `quantileTimingIf` не передать значений, то вернётся [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf). Это необходимо для отделения подобных случаев от случаев, когда результат 0. Подробности про сортировку `NaN` cмотрите в разделе [Секция ORDER BY](../../../sql-reference/statements/select/order-by.md#select-order-by). +::: + **Пример** Входная таблица: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/stddevpop.md b/docs/ru/sql-reference/aggregate-functions/reference/stddevpop.md index bb6b43e716c..ddb424aa8a1 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/stddevpop.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/stddevpop.md @@ -7,6 +7,6 @@ sidebar_position: 30 Результат равен квадратному корню от `varPop(x)`. - :::note "Примечание" - Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `stddevPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. - ::: +:::note Примечание +Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `stddevPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. +::: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/stddevsamp.md b/docs/ru/sql-reference/aggregate-functions/reference/stddevsamp.md index c8048f2d3dc..eddd3b4b32e 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/stddevsamp.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/stddevsamp.md @@ -7,6 +7,6 @@ sidebar_position: 31 Результат равен квадратному корню от `varSamp(x)`. - :::note "Примечание" - Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `stddevSampStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. - ::: +:::note Примечание +Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `stddevSampStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. +::: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/ru/sql-reference/aggregate-functions/reference/uniqcombined.md index c0c1ca2c512..a7dd308791c 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -35,9 +35,10 @@ uniqCombined(HLL_precision)(x[, ...]) - Результат детерминирован (не зависит от порядка выполнения запроса). - :::note "Note" - Так как используется 32-битный хэш для не-`String` типов, результат будет иметь очень очень большую ошибку для количества разичных элементов существенно больше `UINT_MAX` (ошибка быстро растёт начиная с нескольких десятков миллиардов различных значений), таким образом в этом случае нужно использовать [uniqCombined64](#agg_function-uniqcombined64) - ::: +:::note Примечание +Так как используется 32-битный хэш для не-`String` типов, результат будет иметь очень очень большую ошибку для количества разичных элементов существенно больше `UINT_MAX` (ошибка быстро растёт начиная с нескольких десятков миллиардов различных значений), таким образом в этом случае нужно использовать [uniqCombined64](#agg_function-uniqcombined64) +::: + По сравнению с функцией [uniq](#agg_function-uniq), `uniqCombined`: - Потребляет в несколько раз меньше памяти. diff --git a/docs/ru/sql-reference/aggregate-functions/reference/varpop.md b/docs/ru/sql-reference/aggregate-functions/reference/varpop.md index ba1719151f2..64bf703a1d8 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/varpop.md @@ -9,6 +9,6 @@ sidebar_position: 32 То есть, дисперсию для множества значений. Возвращает `Float64`. - :::note "Примечание" - Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `varPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. - ::: +:::note Примечание +Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `varPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. +::: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/varsamp.md b/docs/ru/sql-reference/aggregate-functions/reference/varsamp.md index d81b94d1b13..e23f95a15f6 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/varsamp.md @@ -11,6 +11,6 @@ sidebar_position: 33 Возвращает `Float64`. В случае, когда `n <= 1`, возвращается `+∞`. - :::note "Примечание" - Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `varSampStable`. Она работает медленнее, но обеспечиват меньшую вычислительную ошибку. - ::: +:::note Примечание +Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `varSampStable`. Она работает медленнее, но обеспечиват меньшую вычислительную ошибку. +::: diff --git a/docs/ru/sql-reference/data-types/nullable.md b/docs/ru/sql-reference/data-types/nullable.md index f6d6bb1f9c6..331e9cc4998 100644 --- a/docs/ru/sql-reference/data-types/nullable.md +++ b/docs/ru/sql-reference/data-types/nullable.md @@ -25,8 +25,8 @@ sidebar_label: Nullable В случае, когда маска указывает, что в ячейке хранится `NULL`, в файле значений хранится значение по умолчанию для типа данных. Т.е. если, например, поле имеет тип `Nullable(Int8)`, то ячейка будет хранить значение по умолчанию для `Int8`. Эта особенность увеличивает размер хранилища. -:::info "Info" - Почти всегда использование `Nullable` снижает производительность, учитывайте это при проектировании своих баз. +:::info Примечание +Почти всегда использование `Nullable` снижает производительность, учитывайте это при проектировании своих баз. ::: ## Поиск NULL {#finding-null} diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index 6e3660844f6..380b075d38d 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -25,11 +25,11 @@ slug: /ru/sql-reference/data-types/simpleaggregatefunction - [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap) - [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap) - :::note "Примечание" - Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому комбинаторы [-Merge](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators-merge) и [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) не требуются. +:::note Примечание +Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому комбинаторы [-Merge](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators-merge) и [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) не требуются. - `SimpleAggregateFunction` имеет лучшую производительность, чем `AggregateFunction` с той же агрегатной функцией. - ::: +`SimpleAggregateFunction` имеет лучшую производительность, чем `AggregateFunction` с той же агрегатной функцией. +::: **Параметры** - `func` — имя агрегатной функции. diff --git a/docs/ru/sql-reference/data-types/special-data-types/interval.md b/docs/ru/sql-reference/data-types/special-data-types/interval.md index 109ceee7852..867a6665f4b 100644 --- a/docs/ru/sql-reference/data-types/special-data-types/interval.md +++ b/docs/ru/sql-reference/data-types/special-data-types/interval.md @@ -8,8 +8,8 @@ sidebar_label: Interval Семейство типов данных, представляющих интервалы дат и времени. Оператор [INTERVAL](../../../sql-reference/data-types/special-data-types/interval.md#operator-interval) возвращает значения этих типов. -:::danger "Внимание" - Нельзя использовать типы данных `Interval` для хранения данных в таблице. +:::danger Внимание +Нельзя использовать типы данных `Interval` для хранения данных в таблице. ::: Структура: diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index c8c55ed5c96..16b5f026f67 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -434,8 +434,8 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) 3. Оценить потребление оперативной памяти с помощью таблицы `system.dictionaries`. 4. Увеличивать/уменьшать количество ячеек до получения требуемого расхода оперативной памяти. -:::danger "Warning" - Не используйте в качестве источника ClickHouse, поскольку он медленно обрабатывает запросы со случайным чтением. +:::danger Предупреждение +Не используйте в качестве источника ClickHouse, поскольку он медленно обрабатывает запросы со случайным чтением. ::: ### complex_key_cache {#complex-key-cache} diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index 2aaae6b7075..5bd0d9ed206 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -212,9 +212,9 @@ SOURCE(HTTP( ### Выявленная уязвимость в функционировании ODBC словарей {#vyiavlennaia-uiazvimost-v-funktsionirovanii-odbc-slovarei} - :::note "Внимание" - При соединении с базой данных через ODBC можно заменить параметр соединения `Servername`. В этом случае, значения `USERNAME` и `PASSWORD` из `odbc.ini` отправляются на удаленный сервер и могут быть скомпрометированы. - ::: +:::note Внимание +При соединении с базой данных через ODBC можно заменить параметр соединения `Servername`. В этом случае, значения `USERNAME` и `PASSWORD` из `odbc.ini` отправляются на удаленный сервер и могут быть скомпрометированы. +::: **Пример небезопасного использования** Сконфигурируем unixODBC для работы с PostgreSQL. Содержимое `/etc/odbc.ini`: @@ -463,8 +463,8 @@ SOURCE(ODBC( - `invalidate_query` — запрос для проверки статуса словаря. Необязательный параметр. Читайте подробнее в разделе [Обновление словарей](external-dicts-dict-lifetime.md). - `query` – пользовательский запрос. Необязательный параметр. -:::info "Примечание" - Поля `table` и `query` не могут быть использованы вместе. Также обязательно должен быть один из источников данных: `table` или `query`. +:::info Примечание +Поля `table` и `query` не могут быть использованы вместе. Также обязательно должен быть один из источников данных: `table` или `query`. ::: ClickHouse получает от ODBC-драйвера информацию о квотировании и квотирует настройки в запросах к драйверу, поэтому имя таблицы нужно указывать в соответствии с регистром имени таблицы в базе данных. @@ -542,9 +542,9 @@ SOURCE(MYSQL( - `query` – пользовательский запрос. Необязательный параметр. -:::info "Примечание" - Поля `table` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `table` или `query`. - Явный параметр `secure` отсутствует. Автоматически поддержана работа в обоих случаях: когда установка SSL-соединения необходима и когда нет. +:::info Примечание +Поля `table` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `table` или `query`. +Явный параметр `secure` отсутствует. Автоматически поддержана работа в обоих случаях: когда установка SSL-соединения необходима и когда нет. ::: MySQL можно подключить на локальном хосте через сокеты, для этого необходимо задать `host` и `socket`. @@ -634,8 +634,8 @@ SOURCE(CLICKHOUSE( - `secure` - флаг, разрешающий или не разрешающий защищённое SSL-соединение. - `query` – пользовательский запрос. Необязательный параметр. -:::info "Примечание" - Поля `table` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `table` или `query`. +:::info Примечание +Поля `table` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `table` или `query`. ::: ### MongoDB {#dicts-external_dicts_dict_sources-mongodb} @@ -750,8 +750,8 @@ SOURCE(REDIS( - `max_threads` – максимальное количество тредов для загрузки данных из нескольких партиций в словарь. - `query` – пользовательский запрос. Необязательный параметр. -:::info "Примечание" - Поля `column_family` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `column_family` или `query`. +:::info Примечание +Поля `column_family` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `column_family` или `query`. ::: ### PostgreSQL {#dicts-external_dicts_dict_sources-postgresql} @@ -807,6 +807,6 @@ SOURCE(POSTGRESQL( - `invalidate_query` – запрос для проверки условия загрузки словаря. Необязательный параметр. Более подробную информацию смотрите в разделе [обновление словарей](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). - `query` – пользовательский запрос. Необязательный параметр. -:::info "Примечание" - Поля `table` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `table` или `query`. +:::info Примечание +Поля `table` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `table` или `query`. ::: diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index a711287ae8e..c2f943d8276 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -57,8 +57,8 @@ ClickHouse поддерживает следующие виды ключей: Структура может содержать либо `` либо ``. DDL-запрос может содержать только `PRIMARY KEY`. -:::danger "Обратите внимание" - Ключ не надо дополнительно описывать в атрибутах. +:::danger Обратите внимание +Ключ не надо дополнительно описывать в атрибутах. ::: ### Числовой ключ {#ext_dict-numeric-key} @@ -94,9 +94,10 @@ PRIMARY KEY Id Ключом может быть кортеж (`tuple`) из полей произвольных типов. В этом случае [layout](external-dicts-dict-layout.md) должен быть `complex_key_hashed` или `complex_key_cache`. -:::tip "Совет" - Составной ключ может состоять из одного элемента. Это даёт возможность использовать в качестве ключа, например, строку. - ::: +:::tip Совет +Составной ключ может состоять из одного элемента. Это даёт возможность использовать в качестве ключа, например, строку. +::: + Структура ключа задаётся в элементе ``. Поля ключа задаются в том же формате, что и [атрибуты](external-dicts-dict-structure.md) словаря. Пример: ``` xml diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md index a262a354889..af002557f6b 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -50,9 +50,9 @@ ClickHouse: Если вы создаёте внешние словари [DDL-запросами](../../statements/create/index.md#create-dictionary-query), то не задавайте конфигурацию словаря в конфигурации сервера. - :::note "Внимание" - Можно преобразовывать значения по небольшому словарю, описав его в запросе `SELECT` (см. функцию [transform](../../../sql-reference/functions/other-functions.md)). Эта функциональность не связана с внешними словарями. - ::: +:::note Внимание +Можно преобразовывать значения по небольшому словарю, описав его в запросе `SELECT` (см. функцию [transform](../../../sql-reference/functions/other-functions.md)). Эта функциональность не связана с внешними словарями. +::: ## Смотрите также {#ext-dicts-see-also} - [Настройка внешнего словаря](external-dicts-dict.md) diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 439eddfd752..659e2d3f75e 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -18,8 +18,8 @@ empty([x]) Массив считается пустым, если он не содержит ни одного элемента. -:::note "Примечание" - Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. Запрос `SELECT empty(arr) FROM TABLE` преобразуется к запросу `SELECT arr.size0 = 0 FROM TABLE`. +:::note Примечание +Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. Запрос `SELECT empty(arr) FROM TABLE` преобразуется к запросу `SELECT arr.size0 = 0 FROM TABLE`. ::: Функция также поддерживает работу с типами [String](string-functions.md#empty) и [UUID](uuid-functions.md#empty). @@ -62,8 +62,8 @@ notEmpty([x]) Массив считается непустым, если он содержит хотя бы один элемент. -:::note "Примечание" - Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. Запрос `SELECT notEmpty(arr) FROM table` преобразуется к запросу `SELECT arr.size0 != 0 FROM TABLE`. +:::note Примечание +Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. Запрос `SELECT notEmpty(arr) FROM table` преобразуется к запросу `SELECT arr.size0 != 0 FROM TABLE`. ::: Функция также поддерживает работу с типами [String](string-functions.md#notempty) и [UUID](uuid-functions.md#notempty). @@ -694,8 +694,8 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; └─────────┘ ``` -:::note "Примечание" - Для улучшения эффективности сортировки применяется [преобразование Шварца](https://ru.wikipedia.org/wiki/%D0%9F%D1%80%D0%B5%D0%BE%D0%B1%D1%80%D0%B0%D0%B7%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_%D0%A8%D0%B2%D0%B0%D1%80%D1%86%D0%B0). +:::note Примечание +Для улучшения эффективности сортировки применяется [преобразование Шварца](https://ru.wikipedia.org/wiki/%D0%9F%D1%80%D0%B5%D0%BE%D0%B1%D1%80%D0%B0%D0%B7%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_%D0%A8%D0%B2%D0%B0%D1%80%D1%86%D0%B0). ::: ## arrayPartialSort(\[func,\] limit, arr, …) {#array_functions-sort} @@ -1703,3 +1703,327 @@ SELECT arrayProduct([toDecimal64(1,8), toDecimal64(2,8), toDecimal64(3,8)]) as r │ 6 │ Float64 │ └─────┴──────────────────────────────────────────────────────────────────────────────────────────┘ ``` + +## arrayRotateLeft + +Поворачивает [массив](../../sql-reference/data-types/array.md) влево на заданное число элементов. +Если количество элементов отрицательно, то массив поворачивается вправо. + +**Синтаксис** + +``` sql +arrayRotateLeft(arr, n) +``` + +**Аргументы** + +- `arr` — [Массив](../../sql-reference/data-types/array.md). +- `n` — Число элементов, на которое нужно повернуть массив. + +**Возвращаемое значение** + +- Массив, повернутый на заданное число элементов влево. + +Тип: [Массив](../../sql-reference/data-types/array.md). + +**Примеры** + +Запрос: + +``` sql +SELECT arrayRotateLeft([1,2,3,4,5,6], 2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [3,4,5,6,1,2] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayRotateLeft([1,2,3,4,5,6], -2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [5,6,1,2,3,4] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayRotateLeft(['a','b','c','d','e'], 3) as res; +``` + +Результат: + +``` text +┌─res───────────────────┐ +│ ['d','e','a','b','c'] │ +└───────────────────────┘ +``` + +## arrayRotateRight + +Поворачивает [массив](../../sql-reference/data-types/array.md) вправо на заданное число элементов. +Если количество элементов отрицательно, то массив поворачивается влево. + +**Синтаксис** + +``` sql +arrayRotateRight(arr, n) +``` + +**Аргументы** + +- `arr` — [Массив](../../sql-reference/data-types/array.md). +- `n` — Число элементов, на которое нужно повернуть массив. + +**Возвращаемое значение** + +- Массив, повернутый на заданное число элементов вправо. + +Тип: [Массив](../../sql-reference/data-types/array.md). + +**Примеры** + +Запрос: + +``` sql +SELECT arrayRotateRight([1,2,3,4,5,6], 2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [5,6,1,2,3,4] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayRotateRight([1,2,3,4,5,6], -2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [3,4,5,6,1,2] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayRotateRight(['a','b','c','d','e'], 3) as res; +``` + +Результат: + +``` text +┌─res───────────────────┐ +│ ['c','d','e','a','b'] │ +└───────────────────────┘ +``` + +## arrayShiftLeft + +Сдвигает [массив](../../sql-reference/data-types/array.md) влево на заданное число элементов. +Новые элементы заполняются переданным аргументом или значением по умолчанию для типа элементов массива. +Если количество элементов отрицательно, то массив сдвигается вправо. + +**Синтаксис** + +``` sql +arrayShiftLeft(arr, n[, default]) +``` + +**Аргументы** + +- `arr` — [Массив](../../sql-reference/data-types/array.md). +- `n` — Число элементов, на которое нужно сдвинуть массив. +- `default` — Опциональный. Значение по умолчанию для новых элементов. + +**Возвращаемое значение** + +- Массив, сдвинутый на заданное число элементов влево. + +Тип: [Массив](../../sql-reference/data-types/array.md). + +**Примеры** + +Запрос: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6], 2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [3,4,5,6,0,0] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6], -2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [0,0,1,2,3,4] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6], 2, 42) as res; +``` + +Результат: + +``` text +┌─res─────────────┐ +│ [3,4,5,6,42,42] │ +└─────────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftLeft(['a','b','c','d','e','f'], 3, 'foo') as res; +``` + +Результат: + +``` text +┌─res─────────────────────────────┐ +│ ['d','e','f','foo','foo','foo'] │ +└─────────────────────────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res; +``` + +Результат: + +``` text +┌─res─────────────────┐ +│ [3,4,5,6,4242,4242] │ +└─────────────────────┘ +``` + +## arrayShiftRight + +Сдвигает [массив](../../sql-reference/data-types/array.md) вправо на заданное число элементов. +Новые элементы заполняются переданным аргументом или значением по умолчанию для типа элементов массива. +Если количество элементов отрицательно, то массив сдвигается влево. + +**Синтаксис** + +``` sql +arrayShiftRight(arr, n[, default]) +``` + +**Аргументы** + +- `arr` — [Массив](../../sql-reference/data-types/array.md). +- `n` — Число элементов, на которое нужно сдвинуть массив. +- `default` — Опциональный. Значение по умолчанию для новых элементов. + +**Возвращаемое значение** + +- Массив, сдвинутый на заданное число элементов вправо. + +Тип: [Массив](../../sql-reference/data-types/array.md). + +**Примеры** + +Запрос: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6], 2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [0,0,1,2,3,4] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6], -2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [3,4,5,6,0,0] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6], 2, 42) as res; +``` + +Результат: + +``` text +┌─res─────────────┐ +│ [42,42,1,2,3,4] │ +└─────────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftRight(['a','b','c','d','e','f'], 3, 'foo') as res; +``` + +Результат: + +``` text +┌─res─────────────────────────────┐ +│ ['foo','foo','foo','a','b','c'] │ +└─────────────────────────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res; +``` + +Результат: + +``` text +┌─res─────────────────┐ +│ [4242,4242,1,2,3,4] │ +└─────────────────────┘ +``` diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 4db8a1ec6f8..fa5728a097d 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -359,7 +359,7 @@ SELECT toStartOfISOYear(toDate('2017-01-01')) AS ISOYear20170101; Округляет дату или дату-с-временем до последнего числа месяца. Возвращается дата. -:::note "Attention" +:::note Важно Возвращаемое значение для некорректных дат зависит от реализации. ClickHouse может вернуть нулевую дату, выбросить исключение, или выполнить «естественное» перетекание дат между месяцами. ::: diff --git a/docs/ru/sql-reference/functions/encoding-functions.md b/docs/ru/sql-reference/functions/encoding-functions.md index 675bf1cbf9f..3e962ac23c2 100644 --- a/docs/ru/sql-reference/functions/encoding-functions.md +++ b/docs/ru/sql-reference/functions/encoding-functions.md @@ -154,7 +154,7 @@ SELECT hex(toFloat64(number)) AS hex_presentation FROM numbers(15, 2); Если вы хотите преобразовать результат в число, вы можете использовать функции [reverse](../../sql-reference/functions/string-functions.md#reverse) и [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions). -:::note "Примечание" +:::note Примечание Если `unhex` вызывается из `clickhouse-client`, двоичные строки отображаются с использованием UTF-8. ::: @@ -296,10 +296,10 @@ unbin(arg) Для числового аргумента `unbin()` не возвращает значение, обратное результату `bin()`. Чтобы преобразовать результат в число, используйте функции [reverse](../../sql-reference/functions/string-functions.md#reverse) и [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#reinterpretasuint8163264). -:::note "Примечание" +:::note Примечание Если `unbin` вызывается из клиента `clickhouse-client`, бинарная строка возвращается в кодировке UTF-8. ::: - + Поддерживает двоичные цифры `0` и `1`. Количество двоичных цифр не обязательно должно быть кратно восьми. Если строка аргумента содержит что-либо, кроме двоичных цифр, возвращается некоторый результат, определенный реализацией (ошибки не возникает). **Аргументы** diff --git a/docs/ru/sql-reference/functions/ext-dict-functions.md b/docs/ru/sql-reference/functions/ext-dict-functions.md index d14f0ddf027..047862f0617 100644 --- a/docs/ru/sql-reference/functions/ext-dict-functions.md +++ b/docs/ru/sql-reference/functions/ext-dict-functions.md @@ -4,7 +4,7 @@ sidebar_position: 58 sidebar_label: "Функции для работы с внешними словарями" --- -:::note "Внимание" +:::note Внимание Для словарей, созданных с помощью [DDL-запросов](../../sql-reference/statements/create/dictionary.md), в параметре `dict_name` указывается полное имя словаря вместе с базой данных, например: `.`. Если база данных не указана, используется текущая. ::: diff --git a/docs/ru/sql-reference/functions/geo/geohash.md b/docs/ru/sql-reference/functions/geo/geohash.md index 9e56d977bc7..3b39b1318ea 100644 --- a/docs/ru/sql-reference/functions/geo/geohash.md +++ b/docs/ru/sql-reference/functions/geo/geohash.md @@ -85,8 +85,8 @@ geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precisi - `latitude_max` — максимальная широта. Диапазон возможных значений: `[-90°, 90°]`. Тип данных: [Float](../../../sql-reference/data-types/float.md). - `precision` — точность geohash. Диапазон возможных значений: `[1, 12]`. Тип данных: [UInt8](../../../sql-reference/data-types/int-uint.md). -:::info "Замечание" - Все передаваемые координаты должны быть одного и того же типа: либо `Float32`, либо `Float64`. +:::info Замечание +Все передаваемые координаты должны быть одного и того же типа: либо `Float32`, либо `Float64`. ::: **Возвращаемые значения** @@ -96,8 +96,8 @@ geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precisi Тип данных: [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)). -:::info "Замечание" - Если возвращаемый массив содержит свыше 10 000 000 элементов, функция сгенерирует исключение. +:::info Замечание +Если возвращаемый массив содержит свыше 10 000 000 элементов, функция сгенерирует исключение. ::: **Пример** diff --git a/docs/ru/sql-reference/functions/introspection.md b/docs/ru/sql-reference/functions/introspection.md index 26497ef21d3..2b74e9d776c 100644 --- a/docs/ru/sql-reference/functions/introspection.md +++ b/docs/ru/sql-reference/functions/introspection.md @@ -8,8 +8,8 @@ sidebar_label: "Функции интроспекции" Функции из этого раздела могут использоваться для интроспекции [ELF](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format) и [DWARF](https://en.wikipedia.org/wiki/DWARF) в целях профилирования запросов. -:::danger "Предупреждение" - Эти функции выполняются медленно и могут приводить к нежелательным последствиям в плане безопасности. +:::danger Предупреждение +Эти функции выполняются медленно и могут приводить к нежелательным последствиям в плане безопасности. ::: Для правильной работы функций интроспекции: diff --git a/docs/ru/sql-reference/functions/json-functions.md b/docs/ru/sql-reference/functions/json-functions.md index 53ab638165a..123f40ce05d 100644 --- a/docs/ru/sql-reference/functions/json-functions.md +++ b/docs/ru/sql-reference/functions/json-functions.md @@ -361,8 +361,8 @@ SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[*]'); SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[0]'); ``` -:::note "Примечание" - до версии 21.11 порядок аргументов функции был обратный, т.е. JSON_EXISTS(path, json) +:::note Примечание +До версии 21.11 порядок аргументов функции был обратный, т.е. JSON_EXISTS(path, json) ::: ## JSON_QUERY(json, path) {#json-query} @@ -388,8 +388,8 @@ SELECT toTypeName(JSON_QUERY('{"hello":2}', '$.hello')); [2] String ``` -:::note "Примечание" - до версии 21.11 порядок аргументов функции был обратный, т.е. JSON_QUERY(path, json) +:::note Примечание +До версии 21.11 порядок аргументов функции был обратный, т.е. JSON_QUERY(path, json) ::: ## JSON_VALUE(json, path) {#json-value} @@ -416,8 +416,8 @@ world String ``` -:::note "Примечание" - до версии 21.11 порядок аргументов функции был обратный, т.е. JSON_VALUE(path, json) +:::note Примечание +До версии 21.11 порядок аргументов функции был обратный, т.е. JSON_VALUE(path, json) ::: ## toJSONString {#tojsonstring} diff --git a/docs/ru/sql-reference/functions/nlp-functions.md b/docs/ru/sql-reference/functions/nlp-functions.md index 5d6540871d5..ed5fbffbb9c 100644 --- a/docs/ru/sql-reference/functions/nlp-functions.md +++ b/docs/ru/sql-reference/functions/nlp-functions.md @@ -6,8 +6,8 @@ sidebar_label: NLP # [экспериментально] Функции для работы с естественным языком {#nlp-functions} -:::danger "Предупреждение" - Сейчас использование функций для работы с естественным языком является экспериментальной возможностью. Чтобы использовать данные функции, включите настройку `allow_experimental_nlp_functions = 1`. +:::danger Предупреждение +Сейчас использование функций для работы с естественным языком является экспериментальной возможностью. Чтобы использовать данные функции, включите настройку `allow_experimental_nlp_functions = 1`. ::: ## stem {#stem} diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index de54f1b3607..78608aaf817 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -792,8 +792,8 @@ neighbor(column, offset[, default_value]) Результат функции зависит от затронутых блоков данных и порядка данных в блоке. -:::danger "Предупреждение" - Функция может получить доступ к значению в столбце соседней строки только внутри обрабатываемого в данный момент блока данных. +:::danger Предупреждение +Функция может получить доступ к значению в столбце соседней строки только внутри обрабатываемого в данный момент блока данных. ::: Порядок строк, используемый при вычислении функции `neighbor`, может отличаться от порядка строк, возвращаемых пользователю. @@ -902,8 +902,8 @@ FROM numbers(16) Считает разницу между последовательными значениями строк в блоке данных. Возвращает 0 для первой строки и разницу с предыдущей строкой для каждой последующей строки. -:::danger "Предупреждение" - Функция может взять значение предыдущей строки только внутри текущего обработанного блока данных. +:::danger Предупреждение +Функция может взять значение предыдущей строки только внутри текущего обработанного блока данных. ::: Результат функции зависит от затронутых блоков данных и порядка данных в блоке. @@ -983,9 +983,9 @@ WHERE diff != 1 У каждого события есть время начала и время окончания. Считается, что время начала включено в событие, а время окончания исключено из него. Столбцы со временем начала и окончания событий должны иметь одинаковый тип данных. Функция подсчитывает количество событий, происходящих одновременно на момент начала каждого из событий в выборке. -:::danger "Предупреждение" - События должны быть отсортированы по возрастанию времени начала. Если это требование нарушено, то функция вызывает исключение. - Каждый блок данных обрабатывается независимо. Если события из разных блоков данных накладываются по времени, они не могут быть корректно обработаны. +:::danger Предупреждение +События должны быть отсортированы по возрастанию времени начала. Если это требование нарушено, то функция вызывает исключение. +Каждый блок данных обрабатывается независимо. Если события из разных блоков данных накладываются по времени, они не могут быть корректно обработаны. ::: **Синтаксис** @@ -1675,8 +1675,8 @@ FROM numbers(10); Накапливает состояния агрегатной функции для каждой строки блока данных. -:::danger "Warning" - Функция обнуляет состояние для каждого нового блока. +:::danger Предупреждение +Функция обнуляет состояние для каждого нового блока. ::: **Синтаксис** @@ -2133,8 +2133,8 @@ countDigits(x) Тип: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). -:::note "Примечание" - Для `Decimal` значений учитывается их масштаб: вычисляется результат по базовому целочисленному типу, полученному как `(value * scale)`. Например: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. То есть вы можете проверить десятичное переполнение для `Decimal64` с помощью `countDecimal(x) > 18`. Это медленный вариант [isDecimalOverflow](#is-decimal-overflow). +:::note Примечание +Для `Decimal` значений учитывается их масштаб: вычисляется результат по базовому целочисленному типу, полученному как `(value * scale)`. Например: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. То есть вы можете проверить десятичное переполнение для `Decimal64` с помощью `countDecimal(x) > 18`. Это медленный вариант [isDecimalOverflow](#is-decimal-overflow). ::: **Пример** diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index 6e3830869cd..4f9ae4428a4 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -28,8 +28,8 @@ position(needle IN haystack) Алиас: `locate(haystack, needle[, start_pos])`. -:::note "Примечание" - Синтаксис `position(needle IN haystack)` обеспечивает совместимость с SQL, функция работает так же, как `position(haystack, needle)`. +:::note Примечание +Синтаксис `position(needle IN haystack)` обеспечивает совместимость с SQL, функция работает так же, как `position(haystack, needle)`. ::: **Аргументы** @@ -329,8 +329,8 @@ Result: Для поиска без учета регистра и/или в кодировке UTF-8 используйте функции `multiSearchAnyCaseInsensitive, multiSearchAnyUTF8, multiSearchAnyCaseInsensitiveUTF8`. -:::note "Примечание" - Во всех функциях `multiSearch*` количество needles должно быть меньше 28 из-за особенностей реализации. +:::note Примечание +Во всех функциях `multiSearch*` количество needles должно быть меньше 28 из-за особенностей реализации. ::: ## match(haystack, pattern) {#matchhaystack-pattern} @@ -347,8 +347,8 @@ Result: То же, что и `match`, но возвращает ноль, если ни одно регулярное выражение не подошло и один, если хотя бы одно. Используется библиотека [hyperscan](https://github.com/intel/hyperscan) для соответствия регулярных выражений. Для шаблонов на поиск многих подстрок в строке, лучше используйте `multiSearchAny`, так как она работает существенно быстрее. -:::note "Примечание" - Длина любой строки из `haystack` должна быть меньше 232 байт, иначе бросается исключение. Это ограничение связано с ограничением hyperscan API. +:::note Примечание +Длина любой строки из `haystack` должна быть меньше 232 байт, иначе бросается исключение. Это ограничение связано с ограничением hyperscan API. ::: ## multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn} @@ -370,12 +370,12 @@ Result: То же, что и `multiFuzzyMatchAny`, только возвращает массив всех индексов всех подходящих регулярных выражений в любом порядке в пределах константного редакционного расстояния. -:::note "Примечание" - `multiFuzzyMatch*` функции не поддерживают UTF-8 закодированные регулярные выражения, и такие выражения рассматриваются как байтовые из-за ограничения hyperscan. +:::note Примечание +`multiFuzzyMatch*` функции не поддерживают UTF-8 закодированные регулярные выражения, и такие выражения рассматриваются как байтовые из-за ограничения hyperscan. ::: - -:::note "Примечание" - Чтобы выключить все функции, использующие hyperscan, используйте настройку `SET allow_hyperscan = 0;`. + +:::note Примечание +Чтобы выключить все функции, использующие hyperscan, используйте настройку `SET allow_hyperscan = 0;`. ::: ## extract(haystack, pattern) {#extracthaystack-pattern} @@ -389,8 +389,8 @@ Result: Разбирает строку `haystack` на фрагменты, соответствующие группам регулярного выражения `pattern`. Возвращает массив массивов, где первый массив содержит все фрагменты, соответствующие первой группе регулярного выражения, второй массив - соответствующие второй группе, и т.д. -:::note "Замечание" - Функция `extractAllGroupsHorizontal` работает медленнее, чем функция [extractAllGroupsVertical](#extractallgroups-vertical). +:::note Замечание +Функция `extractAllGroupsHorizontal` работает медленнее, чем функция [extractAllGroupsVertical](#extractallgroups-vertical). ::: **Синтаксис** @@ -561,8 +561,8 @@ SELECT * FROM Months WHERE ilike(name, '%j%'); Для поиска без учета регистра и/или в формате UTF-8 используйте функции `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`. -:::note "Примечание" - Для случая UTF-8 мы используем триграммное расстояние. Вычисление n-граммного расстояния не совсем честное. Мы используем 2-х байтные хэши для хэширования n-грамм, а затем вычисляем (не)симметрическую разность между хэш таблицами – могут возникнуть коллизии. В формате UTF-8 без учета регистра мы не используем честную функцию `tolower` – мы обнуляем 5-й бит (нумерация с нуля) каждого байта кодовой точки, а также первый бит нулевого байта, если байтов больше 1 – это работает для латиницы и почти для всех кириллических букв. +:::note Примечание +Для случая UTF-8 мы используем триграммное расстояние. Вычисление n-граммного расстояния не совсем честное. Мы используем 2-х байтные хэши для хэширования n-грамм, а затем вычисляем (не)симметрическую разность между хэш таблицами – могут возникнуть коллизии. В формате UTF-8 без учета регистра мы не используем честную функцию `tolower` – мы обнуляем 5-й бит (нумерация с нуля) каждого байта кодовой точки, а также первый бит нулевого байта, если байтов больше 1 – это работает для латиницы и почти для всех кириллических букв. ::: ## countMatches(haystack, pattern) {#countmatcheshaystack-pattern} diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 088b1a9a1f1..5a023d94d0f 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -762,6 +762,44 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut; └────────────┴───────┘ ``` +## toDecimalString + +Принимает любой численный тип первым аргументом, возвращает строковое десятичное представление числа с точностью, заданной вторым аргументом. + +**Синтаксис** + +``` sql +toDecimalString(number, scale) +``` + +**Параметры** + +- `number` — Значение любого числового типа: [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md), [Float](/docs/ru/sql-reference/data-types/float.md), [Decimal](/docs/ru/sql-reference/data-types/decimal.md), +- `scale` — Требуемое количество десятичных знаков после запятой, [UInt8](/docs/ru/sql-reference/data-types/int-uint.md). + * Значение `scale` для типов [Decimal](/docs/ru/sql-reference/data-types/decimal.md) и [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md) должно не превышать 77 (так как это наибольшее количество значимых символов для этих типов), + * Значение `scale` для типа [Float](/docs/ru/sql-reference/data-types/float.md) не должно превышать 60. + +**Возвращаемое значение** + +- Строка ([String](/docs/en/sql-reference/data-types/string.md)), представляющая собой десятичное представление входного числа с заданной длиной дробной части. + При необходимости число округляется по стандартным правилам арифметики. + +**Пример использования** + +Запрос: + +``` sql +SELECT toDecimalString(CAST('64.32', 'Float64'), 5); +``` + +Результат: + +```response +┌─toDecimalString(CAST('64.32', 'Float64'), 5)┐ +│ 64.32000 │ +└─────────────────────────────────────────────┘ +``` + ## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264} ## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264} @@ -894,7 +932,7 @@ x::t - Преобразованное значение. -:::note "Примечание" +:::note Примечание Если входное значение выходит за границы нового типа, то результат переполняется. Например, `CAST(-1, 'UInt8')` возвращает `255`. ::: @@ -1443,8 +1481,8 @@ SELECT toLowCardinality('1'); Преобразует значение `DateTime64` в значение `Int64` с фиксированной точностью менее одной секунды. Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. -:::info "Примечание" - Возвращаемое значение — это временная метка в UTC, а не в часовом поясе `DateTime64`. +:::info Примечание +Возвращаемое значение — это временная метка в UTC, а не в часовом поясе `DateTime64`. ::: **Синтаксис** diff --git a/docs/ru/sql-reference/operators/exists.md b/docs/ru/sql-reference/operators/exists.md index 38855abbcf3..0e7d0ac7574 100644 --- a/docs/ru/sql-reference/operators/exists.md +++ b/docs/ru/sql-reference/operators/exists.md @@ -7,8 +7,8 @@ slug: /ru/sql-reference/operators/exists `EXISTS` может быть использован в секции [WHERE](../../sql-reference/statements/select/where.md). -:::danger "Предупреждение" - Ссылки на таблицы или столбцы основного запроса не поддерживаются в подзапросе. +:::danger Предупреждение +Ссылки на таблицы или столбцы основного запроса не поддерживаются в подзапросе. ::: **Синтаксис** diff --git a/docs/ru/sql-reference/operators/in.md b/docs/ru/sql-reference/operators/in.md index 60400fb2b31..6b44446b3f7 100644 --- a/docs/ru/sql-reference/operators/in.md +++ b/docs/ru/sql-reference/operators/in.md @@ -122,7 +122,7 @@ FROM t_null Существует два варианта IN-ов с подзапросами (аналогично для JOIN-ов): обычный `IN` / `JOIN` и `GLOBAL IN` / `GLOBAL JOIN`. Они отличаются способом выполнения при распределённой обработке запроса. -:::note "Внимание" +:::note Внимание Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`. ::: При использовании обычного IN-а, запрос отправляется на удалённые серверы, и на каждом из них выполняются подзапросы в секциях `IN` / `JOIN`. diff --git a/docs/ru/sql-reference/operators/index.md b/docs/ru/sql-reference/operators/index.md index b5fec3cb38c..3238c3de4aa 100644 --- a/docs/ru/sql-reference/operators/index.md +++ b/docs/ru/sql-reference/operators/index.md @@ -212,8 +212,9 @@ FROM test.Orders; В качестве значения оператора `INTERVAL` вы можете также использовать строковый литерал. Например, выражение `INTERVAL 1 HOUR` идентично выражению `INTERVAL '1 hour'` или `INTERVAL '1' hour`. -:::danger "Внимание" - Интервалы различных типов нельзя объединять. Нельзя использовать выражения вида `INTERVAL 4 DAY 1 HOUR`. Вместо этого интервалы можно выразить в единицах меньших или равных наименьшей единице интервала, Например, `INTERVAL 25 HOUR`. Также можно выполнять последовательные операции как показано в примере ниже. +:::danger Внимание +Интервалы различных типов нельзя объединять. Нельзя использовать выражения вида `INTERVAL 4 DAY 1 HOUR`. Вместо этого интервалы можно выразить в единицах меньших или равных наименьшей единице интервала, Например, `INTERVAL 25 HOUR`. Также можно выполнять последовательные операции как показано в примере ниже. +::: Примеры: @@ -249,9 +250,10 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL '4' day + INTERV Вы можете изменить дату, не используя синтаксис `INTERVAL`, а просто добавив или отняв секунды, минуты и часы. Например, чтобы передвинуть дату на один день вперед, можно прибавить к ней значение `60*60*24`. - :::note "Примечание" - Синтаксис `INTERVAL` или функция `addDays` предпочтительнее для работы с датами. Сложение с числом (например, синтаксис `now() + ...`) не учитывает региональные настройки времени, например, переход на летнее время. - ::: +:::note Примечание +Синтаксис `INTERVAL` или функция `addDays` предпочтительнее для работы с датами. Сложение с числом (например, синтаксис `now() + ...`) не учитывает региональные настройки времени, например, переход на летнее время. +::: + Пример: ``` sql @@ -263,7 +265,6 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 6 │ 2014-10-26 00:00:00 │ 2014-10-26 23:00:00 │ 2014-10-27 00:00:00 │ └─────────────────────┴─────────────────────┴─────────────────────┘ ``` -::: **Смотрите также** @@ -303,9 +304,9 @@ END В случае указания `x` - функция `transform(x, [a, ...], [b, ...], c)`. Иначе — `multiIf(a, b, ..., c)`. При отсутствии секции `ELSE c`, значением по умолчанию будет `NULL`. - :::note "Примечание" - Функция `transform` не умеет работать с `NULL`. - ::: +:::note Примечание +Функция `transform` не умеет работать с `NULL`. +::: ## Оператор склеивания строк {#operator-skleivaniia-strok} `s1 || s2` - функция `concat(s1, s2)` diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index a8ace213075..1a45f0f1f7f 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -76,8 +76,8 @@ DROP COLUMN [IF EXISTS] name Запрос удаляет данные из файловой системы. Так как это представляет собой удаление целых файлов, запрос выполняется почти мгновенно. -:::warning "Предупреждение" - Вы не можете удалить столбец, используемый в [материализованном представлениии](../../../sql-reference/statements/create/view.md#materialized). В противном случае будет ошибка. +:::warning Предупреждение +Вы не можете удалить столбец, используемый в [материализованном представлениии](../../../sql-reference/statements/create/view.md#materialized). В противном случае будет ошибка. ::: Пример: @@ -182,7 +182,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) Синтаксис: ```sql -ALTER TABLE table_name MODIFY column_name REMOVE property; +ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property; ``` **Пример** diff --git a/docs/ru/sql-reference/statements/alter/delete.md b/docs/ru/sql-reference/statements/alter/delete.md index 1d16bbdc768..dc968a17349 100644 --- a/docs/ru/sql-reference/statements/alter/delete.md +++ b/docs/ru/sql-reference/statements/alter/delete.md @@ -12,9 +12,10 @@ ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr Удаляет данные, соответствующие указанному выражению фильтрации. Реализовано как [мутация](../../../sql-reference/statements/alter/index.md#mutations). - :::note - Префикс `ALTER TABLE` делает этот синтаксис отличным от большинства других систем, поддерживающих SQL. Он предназначен для обозначения того, что в отличие от аналогичных запросов в базах данных OLTP это тяжелая операция, не предназначенная для частого использования. - ::: +:::note Примечание +Префикс `ALTER TABLE` делает этот синтаксис отличным от большинства других систем, поддерживающих SQL. Он предназначен для обозначения того, что в отличие от аналогичных запросов в базах данных OLTP это тяжелая операция, не предназначенная для частого использования. +::: + Выражение `filter_expr` должно иметь тип `UInt8`. Запрос удаляет строки в таблице, для которых это выражение принимает ненулевое значение. Один запрос может содержать несколько команд, разделенных запятыми. diff --git a/docs/ru/sql-reference/statements/alter/index.md b/docs/ru/sql-reference/statements/alter/index.md index 33c2c1de417..07f5ff0a298 100644 --- a/docs/ru/sql-reference/statements/alter/index.md +++ b/docs/ru/sql-reference/statements/alter/index.md @@ -26,9 +26,9 @@ ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|CLEAR|COMMENT|MODIFY COLUMN - [CONSTRAINT](../../../sql-reference/statements/alter/constraint.md) - [TTL](../../../sql-reference/statements/alter/ttl.md) - :::note - Запрос `ALTER TABLE` поддерживается только для таблиц типа `*MergeTree`, а также `Merge` и `Distributed`. Запрос имеет несколько вариантов. - ::: +:::note Примечание +Запрос `ALTER TABLE` поддерживается только для таблиц типа `*MergeTree`, а также `Merge` и `Distributed`. Запрос имеет несколько вариантов. +::: Следующие запросы `ALTER` управляют представлениями: - [ALTER TABLE ... MODIFY QUERY](../../../sql-reference/statements/alter/view.md) — изменяет структуру [Materialized view](../create/view.md#materialized). @@ -76,8 +76,8 @@ ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name Вы можете указать время ожидания (в секундах) выполнения всех запросов `ALTER` для неактивных реплик с помощью настройки [replication_wait_for_inactive_replica_timeout](../../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout). -:::info "Примечание" - Для всех запросов `ALTER` при `alter_sync = 2` и неактивности некоторых реплик больше времени, заданного настройкой `replication_wait_for_inactive_replica_timeout`, генерируется исключение `UNFINISHED`. +:::info Примечание +Для всех запросов `ALTER` при `alter_sync = 2` и неактивности некоторых реплик больше времени, заданного настройкой `replication_wait_for_inactive_replica_timeout`, генерируется исключение `UNFINISHED`. ::: Для запросов `ALTER TABLE ... UPDATE|DELETE` синхронность выполнения определяется настройкой [mutations_sync](../../../operations/settings/settings.md#mutations_sync). diff --git a/docs/ru/sql-reference/statements/alter/partition.md b/docs/ru/sql-reference/statements/alter/partition.md index 90688c9ece2..4d37dc82fce 100644 --- a/docs/ru/sql-reference/statements/alter/partition.md +++ b/docs/ru/sql-reference/statements/alter/partition.md @@ -182,9 +182,10 @@ ALTER TABLE table_name [ON CLUSTER cluster] FREEZE [PARTITION partition_expr] [W Создаёт резервную копию для заданной партиции. Если выражение `PARTITION` опущено, резервные копии будут созданы для всех партиций. - :::note "Примечание" - Создание резервной копии не требует остановки сервера. - ::: +:::note Примечание +Создание резервной копии не требует остановки сервера. +::: + Для таблиц старого стиля имя партиций можно задавать в виде префикса (например, `2019`). В этом случае, резервные копии будут созданы для всех соответствующих партиций. Подробнее о том, как корректно задать имя партиции, см. в разделе [Как задавать имя партиции в запросах ALTER](#alter-how-to-specify-part-expr). Запрос формирует для текущего состояния таблицы жесткие ссылки на данные в этой таблице. Ссылки размещаются в директории `/var/lib/clickhouse/shadow/N/...`, где: @@ -193,9 +194,9 @@ ALTER TABLE table_name [ON CLUSTER cluster] FREEZE [PARTITION partition_expr] [W - `N` — инкрементальный номер резервной копии. - если задан параметр `WITH NAME`, то вместо инкрементального номера используется значение параметра `'backup_name'`. - :::note "Примечание" - При использовании [нескольких дисков для хранения данных таблицы](../../statements/alter/index.md#table_engine-mergetree-multiple-volumes) директория `shadow/N` появляется на каждом из дисков, на которых были куски, попавшие под выражение `PARTITION`. - ::: +:::note Примечание +При использовании [нескольких дисков для хранения данных таблицы](../../statements/alter/index.md#table_engine-mergetree-multiple-volumes) директория `shadow/N` появляется на каждом из дисков, на которых были куски, попавшие под выражение `PARTITION`. +::: Структура директорий внутри резервной копии такая же, как внутри `/var/lib/clickhouse/`. Запрос выполнит `chmod` для всех файлов, запрещая запись в них. Обратите внимание, запрос `ALTER TABLE t FREEZE PARTITION` не реплицируется. Он создает резервную копию только на локальном сервере. После создания резервной копии данные из `/var/lib/clickhouse/shadow/` можно скопировать на удалённый сервер, а локальную копию удалить. diff --git a/docs/ru/sql-reference/statements/alter/projection.md b/docs/ru/sql-reference/statements/alter/projection.md index 33e52b93add..523bdf371b0 100644 --- a/docs/ru/sql-reference/statements/alter/projection.md +++ b/docs/ru/sql-reference/statements/alter/projection.md @@ -20,6 +20,6 @@ sidebar_label: PROJECTION Также команды реплицируются, синхронизируя описания проекций в метаданных с помощью ZooKeeper. - :::note - Манипуляции с проекциями поддерживаются только для таблиц с движком [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) (включая [replicated](../../../engines/table-engines/mergetree-family/replication.md) варианты). - ::: +:::note Примечание +Манипуляции с проекциями поддерживаются только для таблиц с движком [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) (включая [replicated](../../../engines/table-engines/mergetree-family/replication.md) варианты). +::: diff --git a/docs/ru/sql-reference/statements/alter/sample-by.md b/docs/ru/sql-reference/statements/alter/sample-by.md index ca3cb93d12b..7b5705898e3 100644 --- a/docs/ru/sql-reference/statements/alter/sample-by.md +++ b/docs/ru/sql-reference/statements/alter/sample-by.md @@ -16,7 +16,7 @@ ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY SAMPLE BY new_expression Эта команда является упрощенной в том смысле, что она изменяет только метаданные. Первичный ключ должен содержать новый ключ сэмплирования. - :::note "Note" - Это работает только для таблиц в семействе [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) (включая +:::note Примечание +Это работает только для таблиц в семействе [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) (включая [реплицируемые](../../../engines/table-engines/mergetree-family/replication.md) таблицы). - ::: \ No newline at end of file +::: \ No newline at end of file diff --git a/docs/ru/sql-reference/statements/alter/setting.md b/docs/ru/sql-reference/statements/alter/setting.md index 5eba971fae1..078e73c48e3 100644 --- a/docs/ru/sql-reference/statements/alter/setting.md +++ b/docs/ru/sql-reference/statements/alter/setting.md @@ -15,9 +15,9 @@ sidebar_label: SETTING ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY|RESET SETTING ... ``` - :::note "Примечание" - Эти запросы могут применяться только к таблицам на движке [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md). - ::: +:::note Примечание +Эти запросы могут применяться только к таблицам на движке [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md). +::: ## MODIFY SETTING {#alter_modify_setting} diff --git a/docs/ru/sql-reference/statements/alter/update.md b/docs/ru/sql-reference/statements/alter/update.md index c9ca9651d62..b2032ac77d1 100644 --- a/docs/ru/sql-reference/statements/alter/update.md +++ b/docs/ru/sql-reference/statements/alter/update.md @@ -12,9 +12,10 @@ ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] WHERE Манипулирует данными, соответствующими заданному выражению фильтрации. Реализовано как [мутация](../../../sql-reference/statements/alter/index.md#mutations). - :::note - Префикс `ALTER TABLE` делает этот синтаксис отличным от большинства других систем, поддерживающих SQL. Он предназначен для обозначения того, что в отличие от аналогичных запросов в базах данных OLTP это тяжелая операция, не предназначенная для частого использования. - ::: +:::note Примечание +Префикс `ALTER TABLE` делает этот синтаксис отличным от большинства других систем, поддерживающих SQL. Он предназначен для обозначения того, что в отличие от аналогичных запросов в базах данных OLTP это тяжелая операция, не предназначенная для частого использования. +::: + Выражение `filter_expr` должно иметь тип `UInt8`. Запрос изменяет значение указанных столбцов на вычисленное значение соответствующих выражений в каждой строке, для которой `filter_expr` принимает ненулевое значение. Вычисленные значения преобразуются к типу столбца с помощью оператора `CAST`. Изменение столбцов, которые используются при вычислении первичного ключа или ключа партиционирования, не поддерживается. Один запрос может содержать несколько команд, разделенных запятыми. diff --git a/docs/ru/sql-reference/statements/create/row-policy.md b/docs/ru/sql-reference/statements/create/row-policy.md index c19e8a8fc9b..ae5bdc6783a 100644 --- a/docs/ru/sql-reference/statements/create/row-policy.md +++ b/docs/ru/sql-reference/statements/create/row-policy.md @@ -28,17 +28,17 @@ CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluste Ключевым словом `ALL` обозначаются все пользователи, включая текущего. Ключевые слова `ALL EXCEPT` позволяют исключить пользователей из списка всех пользователей. Например, `CREATE ROW POLICY ... TO ALL EXCEPT accountant, john@localhost` - :::note - Если для таблицы не задано ни одной политики доступа к строкам, то любой пользователь может выполнить команду SELECT и получить все строки таблицы. Если определить хотя бы одну политику для таблицы, до доступ к строкам будет управляться этими политиками, причем для всех пользователей (даже для тех, для кого политики не определялись). Например, следующая политика +:::note Примечание +Если для таблицы не задано ни одной политики доступа к строкам, то любой пользователь может выполнить команду SELECT и получить все строки таблицы. Если определить хотя бы одну политику для таблицы, до доступ к строкам будет управляться этими политиками, причем для всех пользователей (даже для тех, для кого политики не определялись). Например, следующая политика - `CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter` +`CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter` - запретит пользователям `mira` и `peter` видеть строки с `b != 1`, и еще запретит всем остальным пользователям (например, пользователю `paul`) видеть какие-либо строки вообще из таблицы `mydb.table1`. +запретит пользователям `mira` и `peter` видеть строки с `b != 1`, и еще запретит всем остальным пользователям (например, пользователю `paul`) видеть какие-либо строки вообще из таблицы `mydb.table1`. - Если это нежелательно, такое поведение можно исправить, определив дополнительную политику: +Если это нежелательно, такое поведение можно исправить, определив дополнительную политику: - `CREATE ROW POLICY pol2 ON mydb.table1 USING 1 TO ALL EXCEPT mira, peter` - ::: +`CREATE ROW POLICY pol2 ON mydb.table1 USING 1 TO ALL EXCEPT mira, peter` +::: ## Секция AS {#create-row-policy-as} diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index 64eae49be6c..dbd6a325c40 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -156,8 +156,9 @@ ENGINE = engine PRIMARY KEY(expr1[, expr2,...]); ``` -:::danger "Предупреждение" - Вы не можете сочетать оба способа в одном запросе. +:::danger Предупреждение +Вы не можете сочетать оба способа в одном запросе. +::: ## Ограничения {#constraints} @@ -209,8 +210,9 @@ ALTER TABLE codec_example MODIFY COLUMN float_value CODEC(Default); Кодеки можно последовательно комбинировать, например, `CODEC(Delta, Default)`. -:::danger "Предупреждение" - Нельзя распаковать базу данных ClickHouse с помощью сторонних утилит наподобие `lz4`. Необходимо использовать специальную утилиту [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor). +:::danger Предупреждение +Нельзя распаковать базу данных ClickHouse с помощью сторонних утилит наподобие `lz4`. Необходимо использовать специальную утилиту [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor). +::: Сжатие поддерживается для следующих движков таблиц: @@ -240,6 +242,7 @@ ClickHouse поддерживает кодеки общего назначени - `Delta(delta_bytes)` — Метод, в котором исходные значения заменяются разностью двух соседних значений, за исключением первого значения, которое остаётся неизменным. Для хранения разниц используется до `delta_bytes`, т.е. `delta_bytes` — это максимальный размер исходных данных. Возможные значения `delta_bytes`: 1, 2, 4, 8. Значение по умолчанию для `delta_bytes` равно `sizeof(type)`, если результат 1, 2, 4, or 8. Во всех других случаях — 1. - `DoubleDelta` — Вычисляется разницу от разниц и сохраняет её в компакном бинарном виде. Оптимальная степень сжатия достигается для монотонных последовательностей с постоянным шагом, наподобие временных рядов. Можно использовать с любым типом данных фиксированного размера. Реализует алгоритм, используемый в TSDB Gorilla, поддерживает 64-битные типы данных. Использует 1 дополнительный бит для 32-байтовых значений: 5-битные префиксы вместо 4-битных префиксов. Подробнее читайте в разделе «Compressing Time Stamps» документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). +- `GCD` - Вычисляет НОД всех чисел, а затем делит их на него. Этот кодек предназначен для подготовки данных и не подходит для использования без дополнительного кодека. GCD-кодек может использоваться с Integer, Decimal и DateTime. Хорошим вариантом использования было бы хранение временных меток или денежных значений с высокой точностью. - `Gorilla` — Вычисляет XOR между текущим и предыдущим значением и записывает результат в компактной бинарной форме. Еффективно сохраняет ряды медленно изменяющихся чисел с плавающей запятой, поскольку наилучший коэффициент сжатия достигается, если соседние значения одинаковые. Реализует алгоритм, используемый в TSDB Gorilla, адаптируя его для работы с 64-битными значениями. Подробнее читайте в разделе «Compressing Values» документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). - `T64` — Метод сжатия который обрезает неиспользуемые старшие биты целочисленных значений (включая `Enum`, `Date` и `DateTime`). На каждом шаге алгоритма, кодек помещает блок из 64 значений в матрицу 64✕64, транспонирует её, обрезает неиспользуемые биты, а то, что осталось возвращает в виде последовательности. Неиспользуемые биты, это биты, которые не изменяются от минимального к максимальному на всём диапазоне значений куска данных. @@ -265,12 +268,12 @@ ENGINE = MergeTree() Эти кодеки используют фиксированный одноразовый ключ шифрования. Таким образом, это детерминированное шифрование. Оно совместимо с поддерживающими дедупликацию движками, в частности, [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md). Однако у шифрования имеется недостаток: если дважды зашифровать один и тот же блок данных, текст на выходе получится одинаковым, и злоумышленник, у которого есть доступ к диску, заметит эту эквивалентность (при этом доступа к содержимому он не получит). - :::note "Внимание" - Большинство движков, включая семейство `MergeTree`, создают на диске индексные файлы, не применяя кодеки. А значит, в том случае, если зашифрованный столбец индексирован, на диске отобразится незашифрованный текст. - ::: - :::note "Внимание" - Если вы выполняете запрос SELECT с упоминанием конкретного значения в зашифрованном столбце (например, при использовании секции WHERE), это значение может появиться в [system.query_log](../../../operations/system-tables/query_log.md). Рекомендуем отключить логирование. - ::: +:::note Внимание +Большинство движков, включая семейство `MergeTree`, создают на диске индексные файлы, не применяя кодеки. А значит, в том случае, если зашифрованный столбец индексирован, на диске отобразится незашифрованный текст. +::: +:::note Внимание +Если вы выполняете запрос SELECT с упоминанием конкретного значения в зашифрованном столбце (например, при использовании секции WHERE), это значение может появиться в [system.query_log](../../../operations/system-tables/query_log.md). Рекомендуем отключить логирование. +::: **Пример** ```sql @@ -281,9 +284,10 @@ CREATE TABLE mytable ENGINE = MergeTree ORDER BY x; ``` - :::note "Замечание" - Если необходимо применить сжатие, это нужно явно прописать в запросе. Без этого будет выполнено только шифрование данных. - ::: +:::note Примечание +Если необходимо применить сжатие, это нужно явно прописать в запросе. Без этого будет выполнено только шифрование данных. +::: + **Пример** ```sql @@ -324,9 +328,10 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name Запрос `REPLACE` позволяет частично изменить таблицу (структуру или данные). - :::note "Замечание" - Такие запросы поддерживаются только движком БД [Atomic](../../../engines/database-engines/atomic.md). - ::: +:::note Примечание +Такие запросы поддерживаются только движком БД [Atomic](../../../engines/database-engines/atomic.md). +::: + Чтобы удалить часть данных из таблицы, вы можете создать новую таблицу, добавить в нее данные из старой таблицы, которые вы хотите оставить (отобрав их с помощью запроса `SELECT`), затем удалить старую таблицу и переименовать новую таблицу так как старую: ```sql @@ -399,9 +404,9 @@ SELECT * FROM base.t1; Вы можете добавить комментарий к таблице при ее создании. - :::note "Замечание" - Комментарий поддерживается для всех движков таблиц, кроме [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) и [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md). - ::: +:::note Примечание +Комментарий поддерживается для всех движков таблиц, кроме [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) и [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md). +::: **Синтаксис** ``` sql diff --git a/docs/ru/sql-reference/statements/create/user.md b/docs/ru/sql-reference/statements/create/user.md index c0c50174d78..9c1f25bf6d2 100644 --- a/docs/ru/sql-reference/statements/create/user.md +++ b/docs/ru/sql-reference/statements/create/user.md @@ -55,8 +55,8 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] - `CREATE USER mira@'localhost'` — Эквивалентно `HOST LOCAL`. - `CREATE USER mira@'192.168.%.%'` — Эквивалентно `HOST LIKE`. -:::info "Внимание" - ClickHouse трактует конструкцию `user_name@'address'` как имя пользователя целиком. То есть технически вы можете создать несколько пользователей с одинаковыми `user_name`, но разными частями конструкции после `@`, но лучше так не делать. +:::info Внимание +ClickHouse трактует конструкцию `user_name@'address'` как имя пользователя целиком. То есть технически вы можете создать несколько пользователей с одинаковыми `user_name`, но разными частями конструкции после `@`, но лучше так не делать. ::: ## Секция GRANTEES {#grantees} diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index 1a60dc0716c..543a4b21ad1 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -48,12 +48,12 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na Материализованное представление устроено следующим образом: при вставке данных в таблицу, указанную в SELECT-е, кусок вставляемых данных преобразуется этим запросом SELECT, и полученный результат вставляется в представление. - :::note "Важно" +:::note Важно +Материализованные представления в ClickHouse используют **имена столбцов** вместо порядка следования столбцов при вставке в целевую таблицу. Если в результатах запроса `SELECT` некоторые имена столбцов отсутствуют, то ClickHouse использует значение по умолчанию, даже если столбец не является [Nullable](../../data-types/nullable.md). Безопасной практикой при использовании материализованных представлений считается добавление псевдонимов для каждого столбца. - Материализованные представления в ClickHouse используют **имена столбцов** вместо порядка следования столбцов при вставке в целевую таблицу. Если в результатах запроса `SELECT` некоторые имена столбцов отсутствуют, то ClickHouse использует значение по умолчанию, даже если столбец не является [Nullable](../../data-types/nullable.md). Безопасной практикой при использовании материализованных представлений считается добавление псевдонимов для каждого столбца. +Материализованные представления в ClickHouse больше похожи на `after insert` триггеры. Если в запросе материализованного представления есть агрегирование, оно применяется только к вставляемому блоку записей. Любые изменения существующих данных исходной таблицы (например обновление, удаление, удаление раздела и т.д.) не изменяют материализованное представление. +::: - Материализованные представления в ClickHouse больше похожи на `after insert` триггеры. Если в запросе материализованного представления есть агрегирование, оно применяется только к вставляемому блоку записей. Любые изменения существующих данных исходной таблицы (например обновление, удаление, удаление раздела и т.д.) не изменяют материализованное представление. - ::: Если указано `POPULATE`, то при создании представления в него будут добавлены данные, уже содержащиеся в исходной таблице, как если бы был сделан запрос `CREATE TABLE ... AS SELECT ...` . Если `POPULATE` не указано, представление будет содержать только данные, добавленные в таблицу после создания представления. Использовать `POPULATE` не рекомендуется, так как в представление не попадут данные, добавляемые в таблицу во время создания представления. Запрос `SELECT` может содержать `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Следует иметь ввиду, что соответствующие преобразования будут выполняться независимо, на каждый блок вставляемых данных. Например, при наличии `GROUP BY`, данные будут агрегироваться при вставке, но только в рамках одной пачки вставляемых данных. Далее, данные не будут доагрегированы. Исключение - использование ENGINE, производящего агрегацию данных самостоятельно, например, `SummingMergeTree`. @@ -68,10 +68,10 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na ## LIVE-представления [экспериментальный функционал] {#live-view} - :::note "Важно" - Представления `LIVE VIEW` являются экспериментальной возможностью. Их использование может повлечь потерю совместимости в будущих версиях. - Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view). - ::: +:::note Важно +Представления `LIVE VIEW` являются экспериментальной возможностью. Их использование может повлечь потерю совместимости в будущих версиях. +Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view). +::: ```sql CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... ``` @@ -81,14 +81,14 @@ CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] A LIVE-представления работают по тому же принципу, что и распределенные таблицы. Но вместо объединения отдельных частей данных с разных серверов, LIVE-представления объединяют уже имеющийся результат с новыми данными. Если в исходном запросе LIVE-представления есть вложенный подзапрос, его результаты не кешируются, в кеше хранится только результат основного запроса. -:::info "Ограничения" - - [Табличные функции](../../../sql-reference/table-functions/index.md) в основном запросе не поддерживаются. - - Таблицы, не поддерживающие изменение с помощью запроса `INSERT`, такие как [словари](../../../sql-reference/dictionaries/index.md) и [системные таблицы](../../../operations/system-tables/index.md), а также [нормальные представления](#normal) или [материализованные представления](#materialized), не запускают обновление LIVE-представления. - - В LIVE-представлениях могут использоваться только такие запросы, которые объединяют результаты по старым и новым данным. LIVE-представления не работают с запросами, требующими полного пересчета данных или агрегирования с сохранением состояния. - - `LIVE VIEW` не работает для реплицируемых и распределенных таблиц, добавление данных в которые происходит на разных узлах. - - `LIVE VIEW` не обновляется, если в исходном запросе используются несколько таблиц. +:::info Ограничения +- [Табличные функции](../../../sql-reference/table-functions/index.md) в основном запросе не поддерживаются. +- Таблицы, не поддерживающие изменение с помощью запроса `INSERT`, такие как [словари](../../../sql-reference/dictionaries/index.md) и [системные таблицы](../../../operations/system-tables/index.md), а также [нормальные представления](#normal) или [материализованные представления](#materialized), не запускают обновление LIVE-представления. +- В LIVE-представлениях могут использоваться только такие запросы, которые объединяют результаты по старым и новым данным. LIVE-представления не работают с запросами, требующими полного пересчета данных или агрегирования с сохранением состояния. +- `LIVE VIEW` не работает для реплицируемых и распределенных таблиц, добавление данных в которые происходит на разных узлах. +- `LIVE VIEW` не обновляется, если в исходном запросе используются несколько таблиц. - В случаях, когда `LIVE VIEW` не обновляется автоматически, чтобы обновлять его принудительно с заданной периодичностью, используйте [WITH REFRESH](#live-view-with-refresh). +В случаях, когда `LIVE VIEW` не обновляется автоматически, чтобы обновлять его принудительно с заданной периодичностью, используйте [WITH REFRESH](#live-view-with-refresh). ::: ### Отслеживание изменений LIVE-представлений {#live-view-monitoring} diff --git a/docs/ru/sql-reference/statements/exchange.md b/docs/ru/sql-reference/statements/exchange.md index 2c872791afd..fdc28b88c45 100644 --- a/docs/ru/sql-reference/statements/exchange.md +++ b/docs/ru/sql-reference/statements/exchange.md @@ -9,9 +9,9 @@ sidebar_label: EXCHANGE Атомарно обменивает имена двух таблиц или словарей. Это действие также можно выполнить с помощью запроса [RENAME](./rename.md), используя третье временное имя, но в таком случае действие неатомарно. - :::note "Примечание" - Запрос `EXCHANGE` поддерживается только движком баз данных [Atomic](../../engines/database-engines/atomic.md). - ::: +:::note Примечание +Запрос `EXCHANGE` поддерживается только движком баз данных [Atomic](../../engines/database-engines/atomic.md). +::: **Синтаксис** ```sql diff --git a/docs/ru/sql-reference/statements/explain.md b/docs/ru/sql-reference/statements/explain.md index 0179c840df6..4e0a13f7eae 100644 --- a/docs/ru/sql-reference/statements/explain.md +++ b/docs/ru/sql-reference/statements/explain.md @@ -134,9 +134,9 @@ Union ReadFromStorage (SystemNumbers) ``` - :::note "Примечание" - Оценка стоимости выполнения шага и запроса не поддерживается. - ::: +:::note Примечание +Оценка стоимости выполнения шага и запроса не поддерживается. +::: При `json = 1` шаги выполнения запроса выводятся в формате JSON. Каждый узел — это словарь, в котором всегда есть ключи `Node Type` и `Plans`. `Node Type` — это строка с именем шага. `Plans` — это массив с описаниями дочерних шагов. Другие дополнительные ключи могут быть добавлены в зависимости от типа узла и настроек. Пример: diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 4fa6ac4ce66..747e36b8809 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -11,7 +11,7 @@ sidebar_label: INSERT INTO **Синтаксис** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). @@ -100,7 +100,7 @@ INSERT INTO t FORMAT TabSeparated **Синтаксис** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ... ``` Соответствие столбцов определяется их позицией в секции SELECT. При этом, их имена в выражении SELECT и в таблице для INSERT, могут отличаться. При необходимости выполняется приведение типов данных, эквивалентное соответствующему оператору CAST. @@ -120,7 +120,7 @@ INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... **Синтаксис** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name ``` Используйте этот синтаксис, чтобы вставить данные из файла, который хранится на стороне **клиента**. `file_name` и `type` задаются в виде строковых литералов. [Формат](../../interfaces/formats.md) входного файла должен быть задан в секции `FORMAT`. diff --git a/docs/ru/sql-reference/statements/optimize.md b/docs/ru/sql-reference/statements/optimize.md index 26993183232..abca8ab73d1 100644 --- a/docs/ru/sql-reference/statements/optimize.md +++ b/docs/ru/sql-reference/statements/optimize.md @@ -8,8 +8,8 @@ sidebar_label: OPTIMIZE Запрос пытается запустить внеплановое слияние кусков данных для таблиц. -:::danger "Внимание" - `OPTIMIZE` не устраняет причину появления ошибки `Too many parts`. +:::danger Внимание +`OPTIMIZE` не устраняет причину появления ошибки `Too many parts`. ::: **Синтаксис** @@ -30,8 +30,8 @@ ClickHouse не оповещает клиента. Чтобы включить Вы можете указать время ожидания (в секундах) выполнения запросов `OPTIMIZE` для неактивных реплик с помощью настройки [replication_wait_for_inactive_replica_timeout](../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout). -:::info "Примечание" - Если значение настройки `alter_sync` равно `2` и некоторые реплики не активны больше времени, заданного настройкой `replication_wait_for_inactive_replica_timeout`, то генерируется исключение `UNFINISHED`. +:::info Примечание +Если значение настройки `alter_sync` равно `2` и некоторые реплики не активны больше времени, заданного настройкой `replication_wait_for_inactive_replica_timeout`, то генерируется исключение `UNFINISHED`. ::: ## Выражение BY {#by-expression} @@ -40,10 +40,10 @@ ClickHouse не оповещает клиента. Чтобы включить Список столбцов для дедупликации должен включать все столбцы, указанные в условиях сортировки (первичный ключ и ключ сортировки), а также в условиях партиционирования (ключ партиционирования). - :::note "Примечание" - Обратите внимание, что символ подстановки `*` обрабатывается так же, как и в запросах `SELECT`: столбцы [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) и [ALIAS](../../sql-reference/statements/create/table.md#alias) не включаются в результат. - Если указать пустой список или выражение, которое возвращает пустой список, то сервер вернет ошибку. Запрос вида `DEDUPLICATE BY aliased_value` также вернет ошибку. - ::: +:::note Примечание +Обратите внимание, что символ подстановки `*` обрабатывается так же, как и в запросах `SELECT`: столбцы [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) и [ALIAS](../../sql-reference/statements/create/table.md#alias) не включаются в результат. +Если указать пустой список или выражение, которое возвращает пустой список, то сервер вернет ошибку. Запрос вида `DEDUPLICATE BY aliased_value` также вернет ошибку. +::: **Синтаксис** ``` sql diff --git a/docs/ru/sql-reference/statements/rename.md b/docs/ru/sql-reference/statements/rename.md index 6575dae9642..797a0d47f35 100644 --- a/docs/ru/sql-reference/statements/rename.md +++ b/docs/ru/sql-reference/statements/rename.md @@ -9,9 +9,9 @@ sidebar_label: RENAME Переименовывает базы данных, таблицы или словари. Несколько сущностей могут быть переименованы в одном запросе. Обратите внимание, что запрос `RENAME` с несколькими сущностями это неатомарная операция. Чтобы обменять имена атомарно, используйте выражение [EXCHANGE](./exchange.md). - :::note "Примечание" - Запрос `RENAME` поддерживается только движком баз данных [Atomic](../../engines/database-engines/atomic.md). - ::: +:::note Примечание +Запрос `RENAME` поддерживается только движком баз данных [Atomic](../../engines/database-engines/atomic.md). +::: **Синтаксис** ```sql diff --git a/docs/ru/sql-reference/statements/select/group-by.md b/docs/ru/sql-reference/statements/select/group-by.md index ea4f357d33c..f6b0669156c 100644 --- a/docs/ru/sql-reference/statements/select/group-by.md +++ b/docs/ru/sql-reference/statements/select/group-by.md @@ -13,9 +13,9 @@ sidebar_label: GROUP BY Если вы хотите для группировки данных в таблице указывать номера столбцов, а не названия, включите настройку [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). - :::note "Примечание" - Есть ещё один способ запустить агрегацию по таблице. Если запрос содержит столбцы исходной таблицы только внутри агрегатных функций, то `GROUP BY` секцию можно опустить, и предполагается агрегирование по пустому набору ключей. Такие запросы всегда возвращают ровно одну строку. - ::: +:::note Примечание +Есть ещё один способ запустить агрегацию по таблице. Если запрос содержит столбцы исходной таблицы только внутри агрегатных функций, то `GROUP BY` секцию можно опустить, и предполагается агрегирование по пустому набору ключей. Такие запросы всегда возвращают ровно одну строку. +::: ## Обработка NULL {#null-processing} При агрегации ClickHouse интерпретирует [NULL](../../syntax.md#null-literal) как обычное значение, то есть `NULL==NULL`. Это отличается от обработки `NULL` в большинстве других контекстов. @@ -52,9 +52,9 @@ sidebar_label: GROUP BY Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым строки уже сгруппированы, указывается значение `0` или пустая строка. - :::note "Примечание" - Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов. - ::: +:::note Примечание +Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов. +::: **Пример** Рассмотрим таблицу t: @@ -112,9 +112,9 @@ SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP; Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка. - :::note "Примечание" - Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов. - ::: +:::note Примечание +Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов. +::: **Пример** Рассмотрим таблицу t: @@ -204,9 +204,9 @@ SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE; - В `Pretty*` форматах, строка выводится в виде отдельной таблицы после основного результата. - В других форматах она не доступна. - :::note "Примечание" - totals выводится только в результатах запросов `SELECT`, и не вывоводится в `INSERT INTO ... SELECT`. - ::: +:::note Примечание +totals выводится только в результатах запросов `SELECT`, и не вывоводится в `INSERT INTO ... SELECT`. +::: При использовании секции [HAVING](having.md) поведение `WITH TOTALS` контролируется настройкой `totals_mode`. ### Настройка обработки итогов {#configuring-totals-processing} diff --git a/docs/ru/sql-reference/statements/select/join.md b/docs/ru/sql-reference/statements/select/join.md index 6be438f8c43..612cf276009 100644 --- a/docs/ru/sql-reference/statements/select/join.md +++ b/docs/ru/sql-reference/statements/select/join.md @@ -37,9 +37,9 @@ FROM - `LEFT ANY JOIN`, `RIGHT ANY JOIN` и `INNER ANY JOIN`, Частично (для противоположных сторон `LEFT` и `RIGHT`) или полностью (для `INNER` и `FULL`) отключает декартово произведение для стандартных видов `JOIN`. - `ASOF JOIN` и `LEFT ASOF JOIN`, Для соединения последовательностей по нечеткому совпадению. Использование `ASOF JOIN` описано ниже. - :::note "Примечание" - Если настройка [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) установлена в значение `partial_merge`, то для `RIGHT JOIN` и `FULL JOIN` поддерживается только уровень строгости `ALL` (`SEMI`, `ANTI`, `ANY` и `ASOF` не поддерживаются). - ::: +:::note Примечание +Если настройка [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) установлена в значение `partial_merge`, то для `RIGHT JOIN` и `FULL JOIN` поддерживается только уровень строгости `ALL` (`SEMI`, `ANTI`, `ANY` и `ASOF` не поддерживаются). +::: ## Настройки {#join-settings} Значение строгости по умолчанию может быть переопределено с помощью настройки [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness). @@ -64,9 +64,9 @@ FROM Оператор `OR` внутри секции `ON` работает, используя алгоритм хеш-соединения — на каждый аргумент `OR` с ключами соединений для `JOIN` создается отдельная хеш-таблица, поэтому потребление памяти и время выполнения запроса растет линейно при увеличении количества выражений `OR` секции `ON`. - :::note "Примечание" - Если в условии использованы столбцы из разных таблиц, то пока поддерживается только оператор равенства (`=`). - ::: +:::note Примечание +Если в условии использованы столбцы из разных таблиц, то пока поддерживается только оператор равенства (`=`). +::: **Пример** Рассмотрим `table_1` и `table_2`: @@ -199,9 +199,9 @@ USING (equi_column1, ... equi_columnN, asof_column) `ASOF JOIN` принимает метку времени пользовательского события из `table_1` и находит такое событие в `table_2` метка времени которого наиболее близка к метке времени события из `table_1` в соответствии с условием на ближайшее совпадение. При этом столбец `user_id` используется для объединения по равенству, а столбец `ev_time` для объединения по ближайшему совпадению. В нашем примере `event_1_1` может быть объединено с `event_2_1`, `event_1_2` может быть объединено с `event_2_3`, а `event_2_2` не объединяется. - :::note "Примечание" - `ASOF JOIN` не поддержан для движка таблиц [Join](../../../engines/table-engines/special/join.md). - ::: +:::note Примечание +`ASOF JOIN` не поддержан для движка таблиц [Join](../../../engines/table-engines/special/join.md). +::: Чтобы задать значение строгости по умолчанию, используйте сессионный параметр [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness). ## Распределённый JOIN {#global-join} diff --git a/docs/ru/sql-reference/statements/select/limit.md b/docs/ru/sql-reference/statements/select/limit.md index 73daa76fafa..6ca26075f1a 100644 --- a/docs/ru/sql-reference/statements/select/limit.md +++ b/docs/ru/sql-reference/statements/select/limit.md @@ -13,9 +13,9 @@ sidebar_label: LIMIT При отсутствии секции [ORDER BY](order-by.md), однозначно сортирующей результат, результат может быть произвольным и может являться недетерминированным. - :::note "Примечание" - Количество возвращаемых строк может зависеть также от настройки [limit](../../../operations/settings/settings.md#limit). - ::: +:::note Примечание +Количество возвращаемых строк может зависеть также от настройки [limit](../../../operations/settings/settings.md#limit). +::: ## Модификатор LIMIT ... WITH TIES {#limit-with-ties} Когда вы установите модификатор WITH TIES для `LIMIT n[,m]` и указываете `ORDER BY expr_list`, вы получите первые `n` или `n,m` строк и дополнительно все строки с теми же самым значениями полей указанных в `ORDER BY` равными строке на позиции `n` для `LIMIT n` или `m` для `LIMIT n,m`. diff --git a/docs/ru/sql-reference/statements/select/offset.md b/docs/ru/sql-reference/statements/select/offset.md index fac995b9a8e..908455a8cd1 100644 --- a/docs/ru/sql-reference/statements/select/offset.md +++ b/docs/ru/sql-reference/statements/select/offset.md @@ -31,12 +31,12 @@ SELECT * FROM test_fetch ORDER BY a LIMIT 3 OFFSET 1; Опция `WITH TIES` используется для возврата дополнительных строк, которые привязываются к последней в результате запроса. Например, если `fetch_row_count` имеет значение 5 и существуют еще 2 строки с такими же значениями столбцов, указанных в `ORDER BY`, что и у пятой строки результата, то финальный набор будет содержать 7 строк. - :::note "Примечание" - Секция `OFFSET` должна находиться перед секцией `FETCH`, если обе присутствуют. - ::: - :::note "Примечание" - Общее количество пропущенных строк может зависеть также от настройки [offset](../../../operations/settings/settings.md#offset). - ::: +:::note Примечание +Секция `OFFSET` должна находиться перед секцией `FETCH`, если обе присутствуют. +::: +:::note Примечание +Общее количество пропущенных строк может зависеть также от настройки [offset](../../../operations/settings/settings.md#offset). +::: ## Примеры {#examples} Входная таблица: diff --git a/docs/ru/sql-reference/statements/select/prewhere.md b/docs/ru/sql-reference/statements/select/prewhere.md index 092370d4b3a..d2595cf22a3 100644 --- a/docs/ru/sql-reference/statements/select/prewhere.md +++ b/docs/ru/sql-reference/statements/select/prewhere.md @@ -19,9 +19,9 @@ Prewhere — это оптимизация для более эффективн Если в запросе есть модификатор [FINAL](from.md#select-from-final), оптимизация `PREWHERE` не всегда корректна. Она действует только если включены обе настройки [optimize_move_to_prewhere](../../../operations/settings/settings.md#optimize_move_to_prewhere) и [optimize_move_to_prewhere_if_final](../../../operations/settings/settings.md#optimize_move_to_prewhere_if_final). - :::note "Внимание" - Секция `PREWHERE` выполняется до `FINAL`, поэтому результаты запросов `FROM ... FINAL` могут исказиться при использовании `PREWHERE` с полями, не входящями в `ORDER BY` таблицы. - ::: +:::note Внимание +Секция `PREWHERE` выполняется до `FINAL`, поэтому результаты запросов `FROM ... FINAL` могут исказиться при использовании `PREWHERE` с полями, не входящями в `ORDER BY` таблицы. +::: ## Ограничения {#limitations} `PREWHERE` поддерживается только табличными движками из семейства [*MergeTree](../../../engines/table-engines/mergetree-family/index.md). diff --git a/docs/ru/sql-reference/statements/select/sample.md b/docs/ru/sql-reference/statements/select/sample.md index decef52d06f..4edc91c34e4 100644 --- a/docs/ru/sql-reference/statements/select/sample.md +++ b/docs/ru/sql-reference/statements/select/sample.md @@ -13,9 +13,9 @@ sidebar_label: SAMPLE 2. Возможности аппаратной части не позволяют соответствовать строгим критериям. Например, время ответа должно быть <100 мс. При этом точность расчета имеет более низкий приоритет. 3. Точность результата участвует в бизнес-модели сервиса. Например, пользователи с бесплатной подпиской на сервис могут получать отчеты с меньшей точностью, чем пользователи с премиум подпиской. - :::note "Внимание" - Не стоит использовать сэмплирование в тех задачах, где важна точность расчетов. Например, при работе с финансовыми отчетами. - ::: +:::note Внимание +Не стоит использовать сэмплирование в тех задачах, где важна точность расчетов. Например, при работе с финансовыми отчетами. +::: Свойства сэмплирования: - Сэмплирование работает детерминированно. При многократном выполнении одного и того же запроса `SELECT .. SAMPLE`, результат всегда будет одинаковым. @@ -60,9 +60,9 @@ ORDER BY PageViews DESC LIMIT 1000 Если задано выражение `SAMPLE n`, запрос будет выполнен для выборки из не менее `n` строк (но не значительно больше этого значения). Например, если задать `SAMPLE 10000000`, в выборку попадут не менее 10,000,000 строк. - :::note "Примечание" - Следует иметь в виду, что `n` должно быть достаточно большим числом. Так как минимальной единицей данных для чтения является одна гранула (её размер задаётся настройкой `index_granularity` для таблицы), имеет смысл создавать выборки, размер которых существенно превосходит размер гранулы. - ::: +:::note Примечание +Следует иметь в виду, что `n` должно быть достаточно большим числом. Так как минимальной единицей данных для чтения является одна гранула (её размер задаётся настройкой `index_granularity` для таблицы), имеет смысл создавать выборки, размер которых существенно превосходит размер гранулы. +::: При выполнении `SAMPLE n` коэффициент сэмплирования заранее неизвестен (то есть нет информации о том, относительно какого количества данных будет сформирована выборка). Чтобы узнать коэффициент сэмплирования, используйте столбец `_sample_factor`. Виртуальный столбец `_sample_factor` автоматически создается в тех таблицах, для которых задано выражение `SAMPLE BY` (подробнее см. в разделе [Создание таблицы MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)). В столбце содержится коэффициент сэмплирования для таблицы – он рассчитывается динамически по мере добавления данных в таблицу. Ниже приведены примеры использования столбца `_sample_factor`. diff --git a/docs/ru/sql-reference/statements/select/where.md b/docs/ru/sql-reference/statements/select/where.md index f63ea121d4a..10057cdeb84 100644 --- a/docs/ru/sql-reference/statements/select/where.md +++ b/docs/ru/sql-reference/statements/select/where.md @@ -11,9 +11,9 @@ sidebar_label: WHERE ClickHouse использует в выражении индексы, если это позволяет [движок таблицы](../../../engines/table-engines/index.md). - :::note "Примечание" - Существует оптимизация фильтрации под названием [PREWHERE](prewhere.md). - ::: +:::note Примечание +Существует оптимизация фильтрации под названием [PREWHERE](prewhere.md). +::: Если в секции необходимо проверить [NULL](../../../sql-reference/syntax.md#null-literal), то используйте операторы [IS NULL](../../operators/index.md#operator-is-null) и [IS NOT NULL](../../operators/index.md#is-not-null), а также соответствующие функции [isNull](../../../sql-reference/functions/functions-for-nulls.md#isnull) и [isNotNull](../../../sql-reference/functions/functions-for-nulls.md#isnotnull). В противном случае выражение будет считаться всегда не выполненным. **Пример** diff --git a/docs/ru/sql-reference/statements/show.md b/docs/ru/sql-reference/statements/show.md index 59f33c691ae..b3694002cb5 100644 --- a/docs/ru/sql-reference/statements/show.md +++ b/docs/ru/sql-reference/statements/show.md @@ -367,8 +367,8 @@ SHOW ACCESS Возвращает список кластеров. Все доступные кластеры перечислены в таблице [system.clusters](../../operations/system-tables/clusters.md). -:::info "Note" - По запросу `SHOW CLUSTER name` вы получите содержимое таблицы system.clusters для этого кластера. +:::info Примечание +По запросу `SHOW CLUSTER name` вы получите содержимое таблицы system.clusters для этого кластера. ::: ### Синтаксис {#show-cluster-syntax} diff --git a/docs/ru/sql-reference/statements/system.md b/docs/ru/sql-reference/statements/system.md index ec30a031643..bcaf70e0824 100644 --- a/docs/ru/sql-reference/statements/system.md +++ b/docs/ru/sql-reference/statements/system.md @@ -166,9 +166,9 @@ ClickHouse может управлять фоновыми процессами SYSTEM STOP MERGES [ON CLUSTER cluster_name] [ON VOLUME | [db.]merge_tree_family_table_name] ``` - :::note - `DETACH / ATTACH` таблицы восстанавливает фоновые мержи для этой таблицы (даже в случае отключения фоновых мержей для всех таблиц семейства MergeTree до `DETACH`). - ::: +:::note Примечание +`DETACH / ATTACH` таблицы восстанавливает фоновые мержи для этой таблицы (даже в случае отключения фоновых мержей для всех таблиц семейства MergeTree до `DETACH`). +::: ### START MERGES {#query_language-system-start-merges} Включает фоновые мержи для таблиц семейства MergeTree: @@ -313,8 +313,9 @@ SYSTEM RESTART REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_fami К реплике прикрепляются локально найденные куски, информация о них отправляется в Zookeeper. Если присутствующие в реплике до потери метаданных данные не устарели, они не скачиваются повторно с других реплик. Поэтому восстановление реплики не означает повторную загрузку всех данных по сети. -:::danger "Предупреждение" - Потерянные данные в любых состояниях перемещаются в папку `detached/`. Куски, активные до потери данных (находившиеся в состоянии Committed), прикрепляются. +:::danger Предупреждение +Потерянные данные в любых состояниях перемещаются в папку `detached/`. Куски, активные до потери данных (находившиеся в состоянии Committed), прикрепляются. +::: **Синтаксис** diff --git a/docs/ru/sql-reference/statements/truncate.md b/docs/ru/sql-reference/statements/truncate.md index cd918d19814..9f69dd41cfd 100644 --- a/docs/ru/sql-reference/statements/truncate.md +++ b/docs/ru/sql-reference/statements/truncate.md @@ -18,6 +18,6 @@ TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] Вы можете указать время ожидания (в секундах) выполнения запросов `TRUNCATE` для неактивных реплик с помощью настройки [replication_wait_for_inactive_replica_timeout](../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout). -:::info "Примечание" - Если значение настройки `alter_sync` равно `2` и некоторые реплики не активны больше времени, заданного настройкой `replication_wait_for_inactive_replica_timeout`, то генерируется исключение `UNFINISHED`. +:::info Примечание +Если значение настройки `alter_sync` равно `2` и некоторые реплики не активны больше времени, заданного настройкой `replication_wait_for_inactive_replica_timeout`, то генерируется исключение `UNFINISHED`. ::: diff --git a/docs/ru/sql-reference/statements/watch.md b/docs/ru/sql-reference/statements/watch.md index 3a4bfb7dd00..2d7a318631a 100644 --- a/docs/ru/sql-reference/statements/watch.md +++ b/docs/ru/sql-reference/statements/watch.md @@ -6,10 +6,10 @@ sidebar_label: WATCH # Запрос WATCH {#watch} - :::note "Важно" - Это экспериментальная функция. Она может повлечь потерю совместимости в будущих версиях. - Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку `set allow_experimental_live_view = 1`. - ::: +:::note Важно +Это экспериментальная функция. Она может повлечь потерю совместимости в будущих версиях. +Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку `set allow_experimental_live_view = 1`. +::: **Синтаксис** ``` sql @@ -103,6 +103,6 @@ WATCH lv EVENTS LIMIT 1; Параметр `FORMAT` работает аналогично одноименному параметру запроса [SELECT](../../sql-reference/statements/select/format.md#format-clause). -:::info "Примечание" - При отслеживании [LIVE VIEW](./create/view.md#live-view) через интерфейс HTTP следует использовать формат [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress). Постоянные сообщения об изменениях будут добавлены в поток вывода для поддержания активности долговременного HTTP-соединения до тех пор, пока результат запроса изменяется. Проомежуток времени между сообщениями об изменениях управляется настройкой[live_view_heartbeat_interval](./create/view.md#live-view-settings). +:::info Примечание +При отслеживании [LIVE VIEW](./create/view.md#live-view) через интерфейс HTTP следует использовать формат [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress). Постоянные сообщения об изменениях будут добавлены в поток вывода для поддержания активности долговременного HTTP-соединения до тех пор, пока результат запроса изменяется. Проомежуток времени между сообщениями об изменениях управляется настройкой[live_view_heartbeat_interval](./create/view.md#live-view-settings). ::: \ No newline at end of file diff --git a/docs/ru/sql-reference/table-functions/cluster.md b/docs/ru/sql-reference/table-functions/cluster.md index a831c280ec4..f148a21294a 100644 --- a/docs/ru/sql-reference/table-functions/cluster.md +++ b/docs/ru/sql-reference/table-functions/cluster.md @@ -10,9 +10,9 @@ sidebar_label: cluster Функция `clusterAllReplicas` работает также как `cluster`, но каждая реплика в кластере используется как отдельный шард/отдельное соединение. - :::note "Примечание" - Все доступные кластеры перечислены в таблице [system.clusters](../../operations/system-tables/clusters.md). - ::: +:::note Примечание +Все доступные кластеры перечислены в таблице [system.clusters](../../operations/system-tables/clusters.md). +::: **Синтаксис** ``` sql diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md index 83ef115aacd..12a04957f84 100644 --- a/docs/ru/sql-reference/table-functions/file.md +++ b/docs/ru/sql-reference/table-functions/file.md @@ -107,8 +107,9 @@ SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name St SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32'); ``` -:::danger "Предупреждение" - Если ваш список файлов содержит интервал с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры по отдельности или используйте `?`. +:::danger Предупреждение +Если ваш список файлов содержит интервал с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры по отдельности или используйте `?`. +::: **Пример** diff --git a/docs/ru/sql-reference/table-functions/hdfs.md b/docs/ru/sql-reference/table-functions/hdfs.md index b0d182eef14..bc4d0867172 100644 --- a/docs/ru/sql-reference/table-functions/hdfs.md +++ b/docs/ru/sql-reference/table-functions/hdfs.md @@ -48,8 +48,9 @@ LIMIT 2 Конструкция с `{}` аналогична табличной функции [remote](remote.md). -:::danger "Warning" - Если ваш список файлов содержит интервал с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры по отдельности или используйте `?`. +:::danger Предупреждение +Если ваш список файлов содержит интервал с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры по отдельности или используйте `?`. +::: Шаблоны могут содержаться в разных частях пути. Обрабатываться будут ровно те файлы, которые и удовлетворяют всему шаблону пути, и существуют в файловой системе. diff --git a/docs/ru/sql-reference/table-functions/index.md b/docs/ru/sql-reference/table-functions/index.md index 949cd7dce98..3de57abbca7 100644 --- a/docs/ru/sql-reference/table-functions/index.md +++ b/docs/ru/sql-reference/table-functions/index.md @@ -20,8 +20,9 @@ sidebar_position: 34 - Запросе [INSERT INTO TABLE FUNCTION](../../sql-reference/statements/insert-into.md#inserting-into-table-function). -:::danger "Предупреждение" - Если настройка [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) выключена, то использовать табличные функции невозможно. +:::danger Предупреждение +Если настройка [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) выключена, то использовать табличные функции невозможно. +::: | Функция | Описание | |-----------------------|---------------------------------------------------------------------------------------------------------------------------------------| diff --git a/docs/ru/sql-reference/table-functions/mysql.md b/docs/ru/sql-reference/table-functions/mysql.md index 9c50cfcb307..5cd514def4f 100644 --- a/docs/ru/sql-reference/table-functions/mysql.md +++ b/docs/ru/sql-reference/table-functions/mysql.md @@ -55,9 +55,9 @@ SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', Объект таблицы с теми же столбцами, что и в исходной таблице MySQL. - :::note "Примечание" - Чтобы отличить табличную функцию `mysql (...)` в запросе `INSERT` от имени таблицы со списком столбцов, используйте ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже. - ::: +:::note Примечание +Чтобы отличить табличную функцию `mysql (...)` в запросе `INSERT` от имени таблицы со списком столбцов, используйте ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже. +::: **Примеры** Таблица в MySQL: diff --git a/docs/ru/sql-reference/table-functions/postgresql.md b/docs/ru/sql-reference/table-functions/postgresql.md index 0b1f437b98f..4f705ea821c 100644 --- a/docs/ru/sql-reference/table-functions/postgresql.md +++ b/docs/ru/sql-reference/table-functions/postgresql.md @@ -27,8 +27,8 @@ postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`]) Таблица с теми же столбцами, что и в исходной таблице PostgreSQL. -:::info "Примечание" - В запросах `INSERT` для того чтобы отличить табличную функцию `postgresql(...)` от таблицы со списком имен столбцов вы должны указывать ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже. +:::info Примечание +В запросах `INSERT` для того чтобы отличить табличную функцию `postgresql(...)` от таблицы со списком имен столбцов вы должны указывать ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже. ::: ## Особенности реализации {#implementation-details} @@ -43,8 +43,8 @@ postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`]) PostgreSQL массивы конвертируются в массивы ClickHouse. -:::info "Примечание" - Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустипы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы. +:::info Примечание +Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустипы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы. ::: Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например: diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 9e456ddd3d9..7deef68f47f 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -104,8 +104,9 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi └─────────┘ ``` -:::danger "Warning" - Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. +:::danger Предупреждение +Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. +::: Подсчитаем общее количество строк в файлах с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: diff --git a/docs/ru/sql-reference/table-functions/s3Cluster.md b/docs/ru/sql-reference/table-functions/s3Cluster.md index 1c12913fabe..b8f34d805ff 100644 --- a/docs/ru/sql-reference/table-functions/s3Cluster.md +++ b/docs/ru/sql-reference/table-functions/s3Cluster.md @@ -40,8 +40,9 @@ SELECT * FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickho SELECT count(*) FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'); ``` -:::danger "Внимание" - Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. +:::danger Внимание +Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. +::: **Смотрите также** diff --git a/docs/zh/engines/database-engines/materialized-mysql.md b/docs/zh/engines/database-engines/materialized-mysql.md index 5c735556c48..4432cdcb538 100644 --- a/docs/zh/engines/database-engines/materialized-mysql.md +++ b/docs/zh/engines/database-engines/materialized-mysql.md @@ -54,8 +54,9 @@ CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', - `default_authentication_plugin = mysql_native_password `,因为 `MaterializedMySQL` 只能授权使用该方法。 - `gtid_mode = on`,因为基于GTID的日志记录是提供正确的 `MaterializedMySQL`复制的强制要求。 - :::info "注意" - 当打开`gtid_mode`时,您还应该指定`enforce_gtid_consistency = on`。 +:::info "注意" +当打开`gtid_mode`时,您还应该指定`enforce_gtid_consistency = on`。 +::: ## 虚拟列 {#virtual-columns} diff --git a/docs/zh/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/aggregatingmergetree.md index c6c2888801f..ada004d6558 100644 --- a/docs/zh/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -40,8 +40,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] 已弃用的建表方法 - :::info "注意" - 不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +:::info "注意" +不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md index e31c40ec04d..23326f5f95a 100644 --- a/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -40,8 +40,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] 已弃用的建表方法 - :::info "注意" - 不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +:::info "注意" +不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index 6775662d555..78b9c678eb9 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -124,15 +124,18 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa
已弃用的建表方法 - :::attention "注意" - 不要在新版项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +:::attention "注意" +不要在新版项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +::: - CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], - ... - ) ENGINE [=] MergeTree(date-column [, sampling_expression], (primary, key), index_granularity) +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE [=] MergeTree(date-column [, sampling_expression], (primary, key), index_granularity) +``` **MergeTree() 参数** diff --git a/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md index f5f59b7510f..707016788c4 100644 --- a/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md @@ -43,8 +43,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] 已弃用的建表方法 - :::info "注意" - 不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +:::info "注意" +不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/zh/engines/table-engines/mergetree-family/sharedmergetree.md b/docs/zh/engines/table-engines/mergetree-family/sharedmergetree.md new file mode 100644 index 00000000000..e97f98b05a4 --- /dev/null +++ b/docs/zh/engines/table-engines/mergetree-family/sharedmergetree.md @@ -0,0 +1,103 @@ +--- +slug: /zh/engines/table-engines/mergetree-family/sharedmergetree +--- +# SharedMergeTree {#sharedmergetree} + + +仅在ClickHouse Cloud(以及第一方合作伙伴云服务)中提供 + +SharedMergeTree表引擎系列是ReplicatedMergeTree引擎的云原生替代方案,经过优化,适用于共享对象存储(例如Amazon S3、Google Cloud Storage、MinIO、Azure Blob Storage)。每个特定的MergeTree引擎类型都有对应的SharedMergeTree引擎,例如ReplacingSharedMergeTree替代ReplacingReplicatedMergeTree。 + +SharedMergeTree表引擎为ClickHouse Cloud的性能带来了显著提升。对于终端用户来说,无需做任何改变即可开始使用SharedMergeTree引擎系列,而不是基于ReplicatedMergeTree的引擎。它提供的好处包括: + +- 更高的插入吞吐量 +- 后台合并的吞吐量提高 +- Mutation操作的吞吐量提高 +- 更快的扩容和缩容操作 +- 用于选择查询的更轻量级强一致性 + +SharedMergeTree带来的一个重要改进是,与ReplicatedMergeTree相比,它提供了更彻底的计算和存储分离。下图展示了ReplicatedMergeTree如何分离计算和存储: + +![ReplicatedMergeTree Diagram](../../../images/shared-merge-tree-1.png) + +正如您所见,尽管存储在ReplicatedMergeTree中的数据位于对象存储中,但元数据仍存储在每个clickhouse-server上。这意味着对于每个复制操作,元数据也需要在所有副本上进行复制。 + +![ReplicatedMergeTree Diagram](../../../images/shared-merge-tree-2.png) + +与ReplicatedMergeTree不同,SharedMergeTree不需要副本之间进行通信。相反,所有通信都通过共享存储和clickhouse-keeper进行。SharedMergeTree实现了异步无领导复制,并使用clickhouse-keeper进行协调和元数据存储。这意味着随着服务的扩展,不需要复制元数据。这可以加快复制、变更、合并和扩展操作。SharedMergeTree允许每个表有数百个副本,使得无需分片即可进行动态扩展。这也意味着在ClickHouse Cloud中,使用分布式查询执行方法可以利用更多的计算资源来执行查询。 + + +## 系统监控 + +用于系统监控的ReplicatedMergeTree的大部分系统表(system table)在SharedMergeTree中也存在,唯独没有`system.replication_queue`和`system.replicated_fetches`,因为没有数据和元数据的复制。然而,SharedMergeTree对这两个表有相应的替代表。 + +`system.virtual_parts` + +这个表作为SharedMergeTree对 `system.replication_queue` 的替代,存储关于最新的一组data parts,以及未来正在进行的合并、变更和删除parts。 + +`system.shared_merge_tree_fetches` + +这个表是SharedMergeTree对`system.replicated_fetches`的替代。它包含关于正在加载入内存的主键和校验码信息。 + +## 使用SharedMergeTree + +SharedMergeTree已经是所有开发实例(development tier)中的默认表引擎,并且可以通过提交支持工单在生产环境实例(product tier)中启用:https://clickhouse.cloud/support。 + +对于支持SharedMergeTree表引擎的实例,您不需要做任何额外变更。您可以像以前一样创建表,它会自动使用基于SharedMergeTree的表引擎,该引擎与您在CREATE TABLE查询中指定的引擎相对应。 + +通过使用 SharedMergeTree 表引擎可以创建 my_table 表。 + +```sql +CREATE TABLE my_table( + key UInt64, + value String +) +ENGINE = MergeTree +ORDER BY key +``` + +在ClickHouse Cloud中,由于 `default_table_engine=MergeTree`,用户不必再特别设置`ENGINE=MergeTree`。下面的查询语句和上面的完全一样。 + +```sql + +CREATE TABLE my_table( + key UInt64, + value String +) +ORDER BY key +``` + +如果您使用Replacing、Collapsing、Aggregating、Summing、VersionedCollapsing、Graphite MergeTree表,它们将自动转换为相应的基于SharedMergeTree的表引擎。 + +```sql +CREATE TABLE myFirstReplacingMT +( + `key` Int64, + `someCol` String, + `eventTime` DateTime +) +ENGINE = ReplacingMergeTree +ORDER BY key; +``` + +您可以使用SHOW CREATE TABLE查看用于创建表的语句。 + +``` sql +SHOW CREATE TABLE myFirstReplacingMT; +``` + +```sql +CREATE TABLE default.myFirstReplacingMT +( `key` Int64, `someCol` String, `eventTime` DateTime ) +ENGINE = SharedReplacingMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}') +ORDER BY key +SETTINGS index_granularity = 8192 +``` + +## 配置 + +一些配置的行为发生了显著的改变: + +- `insert_quorum` -- 所有对SharedMergeTree的insert都是quorum insert(写入共享对象存储),因此在使用SharedMergeTree表引擎时不需要此设置。 +- `insert_quorum_parallel` -- 所有对SharedMergeTree的insert都是quorum insert(写入共享对象存储)。 +- `select_sequential_consistency` -- 不需要quorum inserts,会引起在SELECT查询中向clickhouse-keeper增加附加的请求。 diff --git a/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md index f59d327b4ae..849bf7d9ce1 100644 --- a/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md @@ -37,15 +37,18 @@ slug: /zh/engines/table-engines/mergetree-family/summingmergetree 已弃用的建表方法 - :::info "注意" - 不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +:::info "注意" +不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +::: - CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], - ... - ) ENGINE [=] SummingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, [columns]) +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE [=] SummingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, [columns]) +``` 除 `columns` 外的所有参数都与 `MergeTree` 中的含义相同。 diff --git a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index ebe06977dec..0b129907062 100644 --- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -54,8 +54,9 @@ VersionedCollapsingMergeTree(sign, version) 不推荐使用的创建表的方法 - :::info "注意" - 不要在新项目中使用此方法。 如果可能,请将旧项目切换到上述方法。 +:::info "注意" +不要在新项目中使用此方法。 如果可能,请将旧项目切换到上述方法。 +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/zh/engines/table-engines/special/buffer.md b/docs/zh/engines/table-engines/special/buffer.md index bb95ecdc583..f92a819f3c3 100644 --- a/docs/zh/engines/table-engines/special/buffer.md +++ b/docs/zh/engines/table-engines/special/buffer.md @@ -5,7 +5,7 @@ slug: /zh/engines/table-engines/special/buffer 缓冲数据写入 RAM 中,周期性地将数据刷新到另一个表。在读取操作时,同时从缓冲区和另一个表读取数据。 - Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) + Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]]) 引擎的参数:database,table - 要刷新数据的表。可以使用返回字符串的常量表达式而不是数据库名称。 num_layers - 并行层数。在物理上,该表将表示为 num_layers 个独立缓冲区。建议值为16。min_time,max_time,min_rows,max_rows,min_bytes,max_bytes - 从缓冲区刷新数据的条件。 diff --git a/docs/zh/faq/general/why-clickhouse-is-so-fast.md b/docs/zh/faq/general/why-clickhouse-is-so-fast.md index 1962b8b90c2..a7df6aec207 100644 --- a/docs/zh/faq/general/why-clickhouse-is-so-fast.md +++ b/docs/zh/faq/general/why-clickhouse-is-so-fast.md @@ -60,3 +60,4 @@ Last but not least, the ClickHouse team always monitors the Internet on people c - Benchmark on real datasets. - Test for performance regressions in CI. - Measure and observe everything. +::: diff --git a/docs/zh/getting-started/example-datasets/wikistat.md b/docs/zh/getting-started/example-datasets/wikistat.md deleted file mode 100644 index 4ce13b0f1d3..00000000000 --- a/docs/zh/getting-started/example-datasets/wikistat.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -slug: /zh/getting-started/example-datasets/wikistat -sidebar_position: 17 -sidebar_label: WikiStat ---- - -# WikiStat {#wikistat} - -参考: http://dumps.wikimedia.org/other/pagecounts-raw/ - -创建表结构: - -``` sql -CREATE TABLE wikistat -( - date Date, - time DateTime, - project String, - subproject String, - path String, - hits UInt64, - size UInt64 -) ENGINE = MergeTree(date, (path, time), 8192); -``` - -加载数据: - -``` bash -$ for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt -$ cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done -$ ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done -``` diff --git a/docs/zh/getting-started/example-datasets/wikistat.md b/docs/zh/getting-started/example-datasets/wikistat.md new file mode 120000 index 00000000000..2d429d00984 --- /dev/null +++ b/docs/zh/getting-started/example-datasets/wikistat.md @@ -0,0 +1 @@ +../../../en/getting-started/example-datasets/wikistat.md \ No newline at end of file diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md index eedc913cf82..758992e4084 100644 --- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md +++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md @@ -346,9 +346,7 @@ UserID.bin,URL.bin,和EventTime.bin是UserID - 我们将主键列(UserID, URL)中的一些列值标记为橙色。 - 这些橙色标记的列值是每个颗粒中每个主键列的最小值。这里的例外是最后一个颗粒(上图中的颗粒1082),最后一个颗粒我们标记的是最大的值。 - - 正如我们将在下面看到的,这些橙色标记的列值将是表主索引中的条目。 + 这些橙色标记的列值是每个颗粒中第一行的主键列值。正如我们将在下面看到的,这些橙色标记的列值将是表主索引中的条目。 - 我们从0开始对行进行编号,以便与ClickHouse内部行编号方案对齐,该方案也用于记录消息。 ::: @@ -1071,13 +1069,6 @@ ClickHouse服务器日志文件中相应的跟踪日志确认了ClickHouse正在 ## 通过projections使用联合主键索引 -Projections目前是一个实验性的功能,因此我们需要告诉ClickHouse: - -```sql -SET optimize_use_projections = 1; -``` - - 在原表上创建projection: ```sql ALTER TABLE hits_UserID_URL @@ -1096,10 +1087,12 @@ ALTER TABLE hits_UserID_URL :::note - 该projection正在创建一个隐藏表,该表的行顺序和主索引基于该projection的给定order BY子句 -- 我们使用MATERIALIZE关键字,以便立即用源表hits_UserID_URL的所有887万行导入隐藏表 +- `SHOW TABLES` 语句查询是不会列出这个隐藏表的 +- 我们使用`MATERIALIZE`关键字,以便立即用源表hits_UserID_URL的所有887万行导入隐藏表 - 如果在源表hits_UserID_URL中插入了新行,那么这些行也会自动插入到隐藏表中 - 查询总是(从语法上)针对源表hits_UserID_URL,但是如果隐藏表的行顺序和主索引允许更有效地执行查询,那么将使用该隐藏表 -- 实际上,隐式创建的隐藏表的行顺序和主索引与我们显式创建的辅助表相同: +- 请注意,投影(projections)不会使 `ORDER BY` 查询语句的效率更高,即使 `ORDER BY` 匹配上了 projection 的 `ORDER BY` 语句(请参阅:https://github.com/ClickHouse/ClickHouse/issues/47333) +- 实际上,隐式创建的隐藏表的行顺序和主索引与我们显式创建的辅助表相同: @@ -1163,7 +1156,7 @@ ClickHouse服务器日志文件中跟踪日志确认了ClickHouse正在对索引 ``` -## 移除无效的主键列 +## 小结 带有联合主键(UserID, URL)的表的主索引对于加快UserID的查询过滤非常有用。但是,尽管URL列是联合主键的一部分,但该索引在加速URL查询过滤方面并没有提供显著的帮助。 @@ -1176,4 +1169,269 @@ ClickHouse服务器日志文件中跟踪日志确认了ClickHouse正在对索引 但是,如果复合主键中的键列在基数上有很大的差异,那么查询按基数升序对主键列进行排序是有益的。 -主键键列之间的基数差越大,主键键列的顺序越重要。我们将在以后的文章中对此进行演示。请继续关注。 +主键键列之间的基数差得越大,主键中的列的顺序越重要。我们将在下一章节对此进行演示。 + +## 高效地为键列排序 + + + + +在复合主键中,键列的顺序会对以下两方面产生重大影响: +- 查询中过滤次关键字列的效率,以及 +- 表数据文件的压缩率。 + +为了演示这一点,我们将使用我们的[网络流量样本数据集(web traffic sample data set)](#数据集)这个版本, +其中每一行包含三列,分别表示互联网用户(`UserID` 列)对 URL(`URL`列)的访问是否被标记为僵尸流量(`IsRobot` 列)。 + +我们将使用一个包含上述所有三列的复合主键,该主键可用于加快计算以下内容的典型网络分析查询速度 +- 特定 URL 有多少(百分比)流量来自机器人,或 +- 我们对特定用户是否为僵尸用户有多大把握(来自该用户的流量中有多大比例被认为是(或不是)僵尸流量) + +我们使用该查询来计算我们要用作复合主键中三个列的基数(注意,我们使用 [URL 表函数](/docs/en/sql-reference/table-functions/url.md) 来即席查询 TSV 数据,而无需创建本地表)。在 `clickhouse client`中运行此查询: +```sql +SELECT + formatReadableQuantity(uniq(URL)) AS cardinality_URL, + formatReadableQuantity(uniq(UserID)) AS cardinality_UserID, + formatReadableQuantity(uniq(IsRobot)) AS cardinality_IsRobot +FROM +( + SELECT + c11::UInt64 AS UserID, + c15::String AS URL, + c20::UInt8 AS IsRobot + FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz') + WHERE URL != '' +) +``` +响应如下: +```response +┌─cardinality_URL─┬─cardinality_UserID─┬─cardinality_IsRobot─┐ +│ 2.39 million │ 119.08 thousand │ 4.00 │ +└─────────────────┴────────────────────┴─────────────────────┘ + +1 row in set. Elapsed: 118.334 sec. Processed 8.87 million rows, 15.88 GB (74.99 thousand rows/s., 134.21 MB/s.) +``` + +我们可以看到,各列之间的基数,尤其是 `URL` 列和 `IsRobot` 列之间,存在着很大的差异,因此,在复合主键中,这些列的顺序对于有效加快对这些列的查询过滤速度,以及实现表中列数据文件的最佳压缩比都非常重要。 + +为了证明这一点,我们为僵尸流量分析数据创建了两个版本的表: +- 带有复合主键`(URL、UserID、IsRobot)`的表 `hits_URL_UserID_IsRobot`,其中的键列按基数降序排列 +- 使用复合主键`(IsRobot, UserID, URL)` 创建表 `hits_IsRobot_UserID_URL`,其中的键列按基数升序排列 + + +创建具有复合主键`(URL、UserID、IsRobot)`的表 `hits_URL_UserID_IsRobot`: +```sql +CREATE TABLE hits_URL_UserID_IsRobot +( + `UserID` UInt32, + `URL` String, + `IsRobot` UInt8 +) +ENGINE = MergeTree +// highlight-next-line +PRIMARY KEY (URL, UserID, IsRobot); +``` + +然后,填充887万行数据: +```sql +INSERT INTO hits_URL_UserID_IsRobot SELECT + intHash32(c11::UInt64) AS UserID, + c15 AS URL, + c20 AS IsRobot +FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz') +WHERE URL != ''; +``` +响应如下: +```response +0 rows in set. Elapsed: 104.729 sec. Processed 8.87 million rows, 15.88 GB (84.73 thousand rows/s., 151.64 MB/s.) +``` + + +接下来,创建带有复合主键 `(IsRobot,UserID,URL)`的表 `hits_IsRobot_UserID_URL`: +```sql +CREATE TABLE hits_IsRobot_UserID_URL +( + `UserID` UInt32, + `URL` String, + `IsRobot` UInt8 +) +ENGINE = MergeTree +// highlight-next-line +PRIMARY KEY (IsRobot, UserID, URL); +``` +并在其中填入与上一个表相同的 887 万行数据: + +```sql +INSERT INTO hits_IsRobot_UserID_URL SELECT + intHash32(c11::UInt64) AS UserID, + c15 AS URL, + c20 AS IsRobot +FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz') +WHERE URL != ''; +``` +响应如下: +```response +0 rows in set. Elapsed: 95.959 sec. Processed 8.87 million rows, 15.88 GB (92.48 thousand rows/s., 165.50 MB/s.) +``` + + + +### 在次关键字列上高效过滤 + +当查询对至少一列进行过滤时,该列是复合关键字的一部分,并且是第一关键字列,[那么 ClickHouse 将在关键字列的索引标记上运行二分查找算法](#主索引被用来选择颗粒)。 + +当查询(仅)过滤属于复合关键字的某一列,但不是第一关键字列时,[ClickHouse 将在关键字列的索引标记上使用通用排除搜索算法](#查询使用第二位主键的性能问题)。 + + +对于第二种情况,复合主键中关键列的排序对[通用排除搜索算法](https://github.com/ClickHouse/ClickHouse/blob/22.3/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp#L1444)的有效性很重要。 + +这是一个对表中的 `UserID` 列进行过滤的查询,我们对该表的关键字列`(URL、UserID、IsRobot)`按基数进行了降序排序: +```sql +SELECT count(*) +FROM hits_URL_UserID_IsRobot +WHERE UserID = 112304 +``` +响应如下: +```response +┌─count()─┐ +│ 73 │ +└─────────┘ + +1 row in set. Elapsed: 0.026 sec. +// highlight-next-line +Processed 7.92 million rows, +31.67 MB (306.90 million rows/s., 1.23 GB/s.) +``` + +对关键字列`(IsRobot, UserID, URL)`按基数升序排列的表,进行相同的查询: +```sql +SELECT count(*) +FROM hits_IsRobot_UserID_URL +WHERE UserID = 112304 +``` +响应如下: +```response +┌─count()─┐ +│ 73 │ +└─────────┘ + +1 row in set. Elapsed: 0.003 sec. +// highlight-next-line +Processed 20.32 thousand rows, +81.28 KB (6.61 million rows/s., 26.44 MB/s.) +``` + +我们可以看到,在对关键列按基数进行升序排列的表中,查询执行的效率和速度明显更高。 + +其原因是,当通过具有较低基数前键列的次关键字列选择[颗粒](#主索引被用来选择颗粒)时, [通用排除搜索算法](https://github.com/ClickHouse/ClickHouse/blob/22.3/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp#L1444)最有效。 我们在本指南的[上一节](#generic-exclusion-search-algorithm)中对此进行了详细说明。 + + +### 数据文件的最佳压缩率 + +此查询将比较上面创建的两个表中 `UserID` 列的压缩率: + +```sql +SELECT + table AS Table, + name AS Column, + formatReadableSize(data_uncompressed_bytes) AS Uncompressed, + formatReadableSize(data_compressed_bytes) AS Compressed, + round(data_uncompressed_bytes / data_compressed_bytes, 0) AS Ratio +FROM system.columns +WHERE (table = 'hits_URL_UserID_IsRobot' OR table = 'hits_IsRobot_UserID_URL') AND (name = 'UserID') +ORDER BY Ratio ASC +``` +这是响应: +```response +┌─Table───────────────────┬─Column─┬─Uncompressed─┬─Compressed─┬─Ratio─┐ +│ hits_URL_UserID_IsRobot │ UserID │ 33.83 MiB │ 11.24 MiB │ 3 │ +│ hits_IsRobot_UserID_URL │ UserID │ 33.83 MiB │ 877.47 KiB │ 39 │ +└─────────────────────────┴────────┴──────────────┴────────────┴───────┘ + +2 rows in set. Elapsed: 0.006 sec. +``` +我们可以看到,在按关键字列`(IsRobot、UserID、URL)` 按基数升序排列的表中,`UserID` 列的压缩率明显更高。 + +虽然两个表中存储的数据完全相同(我们在两个表中插入了相同的 887 万行),但复合主键中关键字列的顺序对表的 [列数据文件](#数据按照主键排序存储在磁盘上)中的 压缩数据所需的磁盘空间有很大影响: +- 在具有复合主键`(URL, UserID, IsRobot)` 的表 `hits_URL_UserID_IsRobot` 中,我们按照键列的基数降序排列,此时 `UserID.bin` 数据文件占用**11.24MB**的磁盘空间。 +- 在具有复合主键`(IsRobot, UserID, URL)` 的表 `hits_IsRobot_UserID_URL` 中,我们按照键列的基数升序排列,`UserID.bin` 数据文件仅占用**877.47 KiB**的磁盘空间。 + +对磁盘上表的列数据进行良好的压缩比不仅能节省磁盘空间,还能使需要从该列读取数据的查询(尤其是分析查询)更快,因为将列数据从磁盘移动到主内存(操作系统的文件缓存)所需的 i/o 更少。 + +下面我们将说明,为什么主键列按基数升序排列有利于提高表列的压缩率。 + +下图阐述了主键的磁盘上行顺序,其中键列是按基数升序排列的: + + +我们讨论过 [表的行数据按主键列有序存储在磁盘上](#数据按照主键排序存储在磁盘上)。 + +在上图中,表格的行(它们在磁盘上的列值)首先按其 `cl` 值排序,具有相同 `cl` 值的行按其 `ch` 值排序。由于第一键列 `cl` 的基数较低,因此很可能存在具有相同 `cl` 值的行。因此,`ch`值也很可能是有序的(局部地--对于具有相同`cl`值的行而言)。 + +如果在一列中,相似的数据被放在彼此相近的位置,例如通过排序,那么这些数据将得到更好的压缩。 +一般来说,压缩算法会受益于数据的运行长度(可见的数据越多,压缩效果越好)和局部性(数据越相似,压缩率越高)。 + +与上图不同的是,下图阐述了主键的磁盘上行顺序,其中主键列是按基数降序排列的: + + +现在,表格的行首先按其 `ch` 值排序,具有相同 `ch` 值的行按其 `cl` 值排序。 +但是,由于第一键列 `ch` 的基数很高,因此不太可能存在具有相同 `ch` 值的行。因此,`cl`值也不太可能是有序的(局部地--对于具有相同`ch`值的行而言)。 + +因此,`cl`值很可能是随机排序的,因此局部性和压缩比都很差。 + + +### 小结 + +为了在查询中有效地过滤次关键字列和提高表列数据文件的压缩率,按基数升序排列主键中的列是有益的。 + + +### 相关内容 +- 博客: [Super charging your ClickHouse queries](https://clickhouse.com/blog/clickhouse-faster-queries-with-projections-and-primary-indexes) + + +## 有效识别单行 + +尽管在一般情况下,它[不](/knowledgebase/key-value)是ClickHouse 的最佳用例, +但是有时建立在ClickHouse之上的应用程序,需要识别ClickHouse表中的单行。 + + +一个直观的解决方案可能是使用[UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier) 列,每一行的值都是唯一的,并且为了快速检索行,将该列用作主键列。 + +为了实现最快的检索,UUID 列[需要成为主键列](#主索引被用来选择颗粒)。 + +我们讨论过,由于[ClickHouse 表的行数据是按主键列顺序存储在磁盘上的](#数据按照主键排序存储在磁盘上),因此在主键或复合主键中,在基数较小的列之前设置基数非常大的列(如 UUID 列)[不利于其他表列的压缩率](#数据文件的最佳压缩率)。 + +在最快检索速度和最佳数据压缩之间的折中方法是使用某种复合主键,其中 UUID 是最后一列关键字,位于(更)小基数关键字列之后,这些关键字列用于确保表中某些列的良好压缩比。 + +### 一个具体例子 + +一个具体的例子是 Alexey Milovidov 开发的文本粘贴服务 https://pastila.nl, 相关[博客](https://clickhouse.com/blog/building-a-paste-service-with-clickhouse/)。 + +每次更改文本区域时,数据都会自动保存到 ClickHouse 表格行中(每次更改保存一行)。 + +识别和检索(特定版本)粘贴内容的一种方法是使用内容的哈希值作为包含内容的表行的 UUID。 + +下图显示了 +- 当内容发生变化时(例如由于按键将文本键入文本框),行的插入顺序,以及 +- 当使用 `PRIMARY KEY (hash)` 时,插入行数据的磁盘顺序: + + +由于 `hash` 列被用作主键列 +- 可以[非常快速](#主索引被用来选择颗粒) 检索特定行,但 +- 表格的行(列数据)是按照(唯一和随机的)哈希值升序存储在磁盘上的。因此,内容列的值也是按随机顺序存储的,不具有数据局部性,导致**内容列数据文件的压缩率不理想**。 + + +为了大幅提高内容列的压缩率,同时仍能快速检索特定行,pastila.nl 使用两个哈希值(和一个复合主键)来识别特定行: +- 内容哈希值,如上所述,对于不同的数据是不同的,以及 +- 对[局部性敏感的哈希值(fingerprint)](https://en.wikipedia.org/wiki/Locality-sensitive_hashing), 它**不会**因数据的微小变化而变化。 + +下图显示了 +- 当内容发生变化时(例如,由于按键将文本输入文本区),行的插入顺序以及 +- 当使用复合主键`(fingerprint,hash)` 时,插入行数据的磁盘顺序: + + + +现在,磁盘上的行首先按指纹 (`fingerprint`) 排序,对于`fingerprint` 值相同的行,其哈希(`hash`)值决定最终的排序。 + +由于仅有细微差别的数据会获得相同的指纹值,因此类似的数据现在会被存储在磁盘的内容列中,并且彼此靠近。这对内容列的压缩率非常有利,因为压缩算法一般会从数据局部性中获益(数据越相似,压缩率越高)。 + +由此带来的妥协是,检索特定行时需要两个字段("指纹"和 "散列"),以便最佳地利用由复合主键 `(fingerprint, hash)` 产生的主索引。 diff --git a/docs/zh/images/shared-merge-tree-1.png b/docs/zh/images/shared-merge-tree-1.png new file mode 100644 index 00000000000..ef6791e47b9 Binary files /dev/null and b/docs/zh/images/shared-merge-tree-1.png differ diff --git a/docs/zh/images/shared-merge-tree-2.png b/docs/zh/images/shared-merge-tree-2.png new file mode 100644 index 00000000000..be6f9e6f5ed Binary files /dev/null and b/docs/zh/images/shared-merge-tree-2.png differ diff --git a/docs/zh/interfaces/cli.md b/docs/zh/interfaces/cli.md index 80d13154a76..a6b4d10dd2f 100644 --- a/docs/zh/interfaces/cli.md +++ b/docs/zh/interfaces/cli.md @@ -116,7 +116,7 @@ $ clickhouse-client --param_tuple_in_tuple="(10, ('dt', 10))" -q "SELECT * FROM - `--port` – 连接的端口,默认值:9000。注意HTTP接口以及TCP原生接口使用的是不同端口。 - `--user, -u` – 用户名。 默认值:`default`。 - `--password` – 密码。 默认值:空字符串。 -- `--query, -q` – 使用非交互模式查询。 +- `--query, -q` – 使用非交互模式查询。 允许多次指定 `--query`(`--query "SELECT 1;" --query "SELECT 2;"...`)。 - `--database, -d` – 默认当前操作的数据库. 默认值:服务端默认的配置(默认是`default`)。 - `--multiline, -m` – 如果指定,允许多行语句查询(Enter仅代表换行,不代表查询语句完结)。 - `--multiquery, -n` – 如果指定, 允许处理用`;`号分隔的多个查询,只在非交互模式下生效。 diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md index c7a0f355a92..b1501cbb5d6 100644 --- a/docs/zh/interfaces/http.md +++ b/docs/zh/interfaces/http.md @@ -53,7 +53,7 @@ Connection: Close Content-Type: text/tab-separated-values; charset=UTF-8 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f -X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} 1 ``` @@ -262,9 +262,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 您可以在`X-ClickHouse-Progress`响应头中收到查询进度的信息。为此,启用[Http Header携带进度](../operations/settings/settings.md#settings-send_progress_in_http_headers)。示例: ``` text -X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"} +X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"} +X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"} +X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"} ``` 显示字段信息: @@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query' < X-ClickHouse-Format: Template < X-ClickHouse-Timezone: Asia/Shanghai < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} < # HELP "Query" "Number of executing queries" # TYPE "Query" counter @@ -521,7 +521,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact Say Hi!% @@ -561,7 +561,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' < Content-Type: text/plain; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact
% @@ -613,7 +613,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} < Absolute Path File * Connection #0 to host localhost left intact @@ -632,7 +632,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"} < Relative Path File * Connection #0 to host localhost left intact diff --git a/docs/zh/operations/optimizing-performance/profile-guided-optimization.md b/docs/zh/operations/optimizing-performance/profile-guided-optimization.md new file mode 120000 index 00000000000..31cb656bd99 --- /dev/null +++ b/docs/zh/operations/optimizing-performance/profile-guided-optimization.md @@ -0,0 +1 @@ +../../../en/operations/optimizing-performance/profile-guided-optimization.md \ No newline at end of file diff --git a/docs/zh/operations/server-configuration-parameters/settings.md b/docs/zh/operations/server-configuration-parameters/settings.md index f6106d8734e..a4f6ce73255 100644 --- a/docs/zh/operations/server-configuration-parameters/settings.md +++ b/docs/zh/operations/server-configuration-parameters/settings.md @@ -455,7 +455,7 @@ SSL客户端/服务器配置。 - verificationMode – The method for checking the node’s certificates. Details are in the description of the [A.背景](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) 同学们 可能的值: `none`, `relaxed`, `strict`, `once`. - verificationDepth – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. - loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `true`, `false`. \| -- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`. +- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH`. - cacheSessions – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. 可接受的值: `true`, `false`. - sessionIdContext – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. 始终建议使用此参数,因为如果服务器缓存会话,以及客户端请求缓存,它有助于避免出现问题。 默认值: `${application.name}`. - sessionCacheSize – The maximum number of sessions that the server caches. Default value: 1024\*20. 0 – Unlimited sessions. @@ -850,10 +850,11 @@ ZooKeeper中数据部分头的存储方法。 如果 `use_minimalistic_part_header_in_zookeeper = 1`,然后 [复制](../../engines/table-engines/mergetree-family/replication.md) 表存储的数据部分的头紧凑使用一个单一的 `znode`. 如果表包含许多列,则此存储方法显着减少了Zookeeper中存储的数据量。 - :::info "注意" - 申请后 `use_minimalistic_part_header_in_zookeeper = 1`,您不能将ClickHouse服务器降级到不支持此设置的版本。 在集群中的服务器上升级ClickHouse时要小心。 不要一次升级所有服务器。 在测试环境中或在集群的几台服务器上测试ClickHouse的新版本更安全。 +:::info "注意" +申请后 `use_minimalistic_part_header_in_zookeeper = 1`,您不能将ClickHouse服务器降级到不支持此设置的版本。 在集群中的服务器上升级ClickHouse时要小心。 不要一次升级所有服务器。 在测试环境中或在集群的几台服务器上测试ClickHouse的新版本更安全。 - Data part headers already stored with this setting can't be restored to their previous (non-compact) representation. +Data part headers already stored with this setting can't be restored to their previous (non-compact) representation. +::: **默认值:** 0. diff --git a/docs/zh/operations/system-tables/licenses.md b/docs/zh/operations/system-tables/licenses.md index dc09e65264d..38260491dc0 100644 --- a/docs/zh/operations/system-tables/licenses.md +++ b/docs/zh/operations/system-tables/licenses.md @@ -20,21 +20,9 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15 ``` text ┌─library_name───────┬─license_type─┬─license_path────────────────────────┐ -│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │ -│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │ -│ avro │ Apache │ /contrib/avro/LICENSE.txt │ │ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │ -│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │ -│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │ -│ aws │ Apache │ /contrib/aws/LICENSE.txt │ -│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │ -│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │ +│ base64 │ BSD 2-clause │ /contrib/aklomp-base64/LICENSE │ │ brotli │ MIT │ /contrib/brotli/LICENSE │ -│ capnproto │ MIT │ /contrib/capnproto/LICENSE │ -│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │ -│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │ -│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │ -│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │ +│ [...] │ [...] │ [...] │ └────────────────────┴──────────────┴─────────────────────────────────────┘ - ``` diff --git a/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index 8b07c3e67bd..b30cc704100 100644 --- a/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -213,8 +213,9 @@ ClickHouse接收来自ODBC-driver的引用符号,并将查询中的所有设 ### ODBC字典功能的已知漏洞 {#known-vulnerability-of-the-odbc-dictionary-functionality} - :::info "注意" - 通过ODBC驱动程序连接参数连接到数据库时 `Servername` 可以取代。 在这种情况下,值 `USERNAME` 和 `PASSWORD` 从 `odbc.ini` 被发送到远程服务器,并且可能会受到损害。 +:::info "注意" +通过ODBC驱动程序连接参数连接到数据库时 `Servername` 可以取代。 在这种情况下,值 `USERNAME` 和 `PASSWORD` 从 `odbc.ini` 被发送到远程服务器,并且可能会受到损害。 +::: **不安全使用示例** diff --git a/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 1396de63391..b755d45a731 100644 --- a/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -48,8 +48,9 @@ ClickHouse: [字典的DDL查询](../../statements/create.md#create-dictionary-query) 在服务器配置中不需要任何其他记录。 它们允许使用字典作为一流的实体,如表或视图。 - :::info "注意" - 您可以通过在一个小字典中描述它来转换小字典的值 `SELECT` 查询(见 [变换](../../../sql-reference/functions/other-functions.md) 功能)。 此功能与外部字典无关。 +:::info "注意" +您可以通过在一个小字典中描述它来转换小字典的值 `SELECT` 查询(见 [变换](../../../sql-reference/functions/other-functions.md) 功能)。 此功能与外部字典无关。 +::: ## 另请参阅 {#ext-dicts-see-also} diff --git a/docs/zh/sql-reference/functions/arithmetic-functions.md b/docs/zh/sql-reference/functions/arithmetic-functions.md index b0a37565c16..49d800fd069 100644 --- a/docs/zh/sql-reference/functions/arithmetic-functions.md +++ b/docs/zh/sql-reference/functions/arithmetic-functions.md @@ -125,7 +125,7 @@ SELECT max2(-1, 2); **语法** ```sql -max2(value1, value2) +min2(value1, value2) ``` **参数** diff --git a/docs/zh/sql-reference/operators/in.md b/docs/zh/sql-reference/operators/in.md index df4c8772e86..e030b8f1820 100644 --- a/docs/zh/sql-reference/operators/in.md +++ b/docs/zh/sql-reference/operators/in.md @@ -107,8 +107,9 @@ FROM t_null 带子查询的IN-s有两个选项(类似于连接):normal `IN` / `JOIN` 和 `GLOBAL IN` / `GLOBAL JOIN`. 它们在分布式查询处理的运行方式上有所不同。 - :::info "注意" - 请记住,下面描述的算法可能会有不同的工作方式取决于 [设置](../../operations/settings/settings.md) `distributed_product_mode` 设置。 +:::info "注意" +请记住,下面描述的算法可能会有不同的工作方式取决于 [设置](../../operations/settings/settings.md) `distributed_product_mode` 设置。 +::: 当使用常规IN时,查询被发送到远程服务器,并且它们中的每个服务器都在运行子查询 `IN` 或 `JOIN` 条款 diff --git a/docs/zh/sql-reference/statements/alter/index/index.md b/docs/zh/sql-reference/statements/alter/index/index.md index 85038171eb5..568d5d08756 100644 --- a/docs/zh/sql-reference/statements/alter/index/index.md +++ b/docs/zh/sql-reference/statements/alter/index/index.md @@ -21,5 +21,5 @@ Also, they are replicated, syncing indices metadata via ZooKeeper. 此外,它们会被复制,会通过ZooKeeper同步索引元数据。 :::note "注意" - 索引操作仅支持具有以下特征的表 [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md)引擎 (包括[replicated](../../../../engines/table-engines/mergetree-family/replication.md)). +索引操作仅支持具有以下特征的表 [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md)引擎 (包括[replicated](../../../../engines/table-engines/mergetree-family/replication.md)). ::: diff --git a/docs/zh/sql-reference/statements/insert-into.md b/docs/zh/sql-reference/statements/insert-into.md index 9acc1655f9a..f80c0a8a8ea 100644 --- a/docs/zh/sql-reference/statements/insert-into.md +++ b/docs/zh/sql-reference/statements/insert-into.md @@ -8,7 +8,7 @@ INSERT INTO 语句主要用于向系统中添加数据. 查询的基本格式: ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` 您可以在查询中指定要插入的列的列表,如:`[(c1, c2, c3)]`。您还可以使用列[匹配器](../../sql-reference/statements/select/index.md#asterisk)的表达式,例如`*`和/或[修饰符](../../sql-reference/statements/select/index.md#select-modifiers),例如 [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier)。 @@ -71,7 +71,7 @@ INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set 例如,下面的查询所使用的输入格式就与上面INSERT … VALUES的中使用的输入格式相同: ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... ``` ClickHouse会清除数据前所有的空白字符与一个换行符(如果有换行符的话)。所以在进行查询时,我们建议您将数据放入到输入输出格式名称后的新的一行中去(如果数据是以空白字符开始的,这将非常重要)。 @@ -93,7 +93,7 @@ INSERT INTO t FORMAT TabSeparated ### 使用`SELECT`的结果写入 {#inserting-the-results-of-select} ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ... ``` 写入与SELECT的列的对应关系是使用位置来进行对应的,尽管它们在SELECT表达式与INSERT中的名称可能是不同的。如果需要,会对它们执行对应的类型转换。 diff --git a/packages/clickhouse-keeper.yaml b/packages/clickhouse-keeper.yaml index e9c2e929755..3b77d968763 100644 --- a/packages/clickhouse-keeper.yaml +++ b/packages/clickhouse-keeper.yaml @@ -32,6 +32,12 @@ contents: dst: /usr/bin/clickhouse-keeper - src: clickhouse-keeper.service dst: /lib/systemd/system/clickhouse-keeper.service +- src: clickhouse + dst: /usr/bin/clickhouse-keeper-client + type: symlink +- src: clickhouse + dst: /usr/bin/clickhouse-keeper-converter + type: symlink # docs - src: ../AUTHORS dst: /usr/share/doc/clickhouse-keeper/AUTHORS diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml index 018e88ef828..66299fddd4a 100644 --- a/packages/clickhouse-server.yaml +++ b/packages/clickhouse-server.yaml @@ -55,6 +55,9 @@ contents: - src: clickhouse dst: /usr/bin/clickhouse-keeper type: symlink +- src: clickhouse + dst: /usr/bin/clickhouse-keeper-client + type: symlink - src: root/usr/bin/clickhouse-report dst: /usr/bin/clickhouse-report - src: root/usr/bin/clickhouse-server diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 466a0c194f7..a5564f47784 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -1,12 +1,11 @@ #include #include -#include #include -#include #include #include #include #include +#include #include #include #include @@ -18,9 +17,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -38,8 +35,6 @@ #include -namespace fs = std::filesystem; - /** A tool for evaluating ClickHouse performance. * The tool emulates a case with fixed amount of simultaneously executing queries. */ @@ -54,6 +49,7 @@ namespace DB { using Ports = std::vector; +static constexpr std::string_view DEFAULT_CLIENT_NAME = "benchmark"; namespace ErrorCodes { @@ -79,7 +75,6 @@ public: bool randomize_, size_t max_iterations_, double max_time_, - const String & json_path_, size_t confidence_, const String & query_id_, const String & query_to_execute_, @@ -98,7 +93,6 @@ public: cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_), - json_path(json_path_), confidence(confidence_), query_id(query_id_), query_to_execute(query_to_execute_), @@ -130,7 +124,7 @@ public: default_database_, user_, password_, quota_key_, /* cluster_= */ "", /* cluster_secret_= */ "", - /* client_name_= */ "benchmark", + /* client_name_= */ std::string(DEFAULT_CLIENT_NAME), Protocol::Compression::Enable, secure)); @@ -143,6 +137,8 @@ public: global_context->makeGlobalContext(); global_context->setSettings(settings); + global_context->setClientName(std::string(DEFAULT_CLIENT_NAME)); + global_context->setQueryKindInitial(); std::cerr << std::fixed << std::setprecision(3); @@ -165,9 +161,6 @@ public: int main(const std::vector &) override { - if (!json_path.empty() && fs::exists(json_path)) /// Clear file with previous results - fs::remove(json_path); - readQueries(); runBenchmark(); return 0; @@ -197,7 +190,6 @@ private: bool cumulative; size_t max_iterations; double max_time; - String json_path; size_t confidence; String query_id; String query_to_execute; @@ -226,26 +218,23 @@ private: size_t read_bytes = 0; size_t result_rows = 0; size_t result_bytes = 0; - double work_time = 0; using Sampler = ReservoirSampler; Sampler sampler {1 << 16}; - void add(double seconds, size_t read_rows_inc, size_t read_bytes_inc, size_t result_rows_inc, size_t result_bytes_inc) + void add(double duration, size_t read_rows_inc, size_t read_bytes_inc, size_t result_rows_inc, size_t result_bytes_inc) { ++queries; - work_time += seconds; read_rows += read_rows_inc; read_bytes += read_bytes_inc; result_rows += result_rows_inc; result_bytes += result_bytes_inc; - sampler.insert(seconds); + sampler.insert(duration); } void clear() { queries = 0; - work_time = 0; read_rows = 0; read_bytes = 0; result_rows = 0; @@ -331,10 +320,13 @@ private: return false; } - if (delay > 0 && delay_watch.elapsedSeconds() > delay) + double seconds = delay_watch.elapsedSeconds(); + if (delay > 0 && seconds > delay) { printNumberOfQueriesExecuted(queries_executed); - cumulative ? report(comparison_info_total) : report(comparison_info_per_interval); + cumulative + ? report(comparison_info_total, total_watch.elapsedSeconds()) + : report(comparison_info_per_interval, seconds); delay_watch.restart(); } } @@ -350,16 +342,7 @@ private: try { for (size_t i = 0; i < concurrency; ++i) - { - EntryPtrs connection_entries; - connection_entries.reserve(connections.size()); - - for (const auto & connection : connections) - connection_entries.emplace_back(std::make_shared( - connection->get(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings)))); - - pool.scheduleOrThrowOnError([this, connection_entries]() mutable { thread(connection_entries); }); - } + pool.scheduleOrThrowOnError([this]() mutable { thread(); }); } catch (...) { @@ -389,21 +372,18 @@ private: pool.wait(); total_watch.stop(); - if (!json_path.empty()) - reportJSON(comparison_info_total, json_path); - printNumberOfQueriesExecuted(queries_executed); - report(comparison_info_total); + report(comparison_info_total, total_watch.elapsedSeconds()); } - void thread(EntryPtrs & connection_entries) + void thread() { Query query; /// Randomly choosing connection index pcg64 generator(randomSeed()); - std::uniform_int_distribution distribution(0, connection_entries.size() - 1); + std::uniform_int_distribution distribution(0, connections.size() - 1); /// In these threads we do not accept INT signal. sigset_t sig_set; @@ -423,15 +403,13 @@ private: extracted = queue.tryPop(query, 100); if (shutdown || (max_iterations && queries_executed == max_iterations)) - { return; - } } const auto connection_index = distribution(generator); try { - execute(connection_entries, query, connection_index); + execute(query, connection_index); consecutive_errors = 0; } catch (...) @@ -460,17 +438,18 @@ private: } } - void execute(EntryPtrs & connection_entries, Query & query, size_t connection_index) + void execute(Query & query, size_t connection_index) { Stopwatch watch; - Connection & connection = **connection_entries[connection_index]; + ConnectionPool::Entry entry = connections[connection_index]->get( + ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings)); if (reconnect) - connection.disconnect(); + entry->disconnect(); RemoteQueryExecutor executor( - connection, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage); + *entry, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage); if (!query_id.empty()) executor.setQueryId(query_id); @@ -485,19 +464,19 @@ private: executor.finish(); - double seconds = (display_client_side_time || progress.elapsed_ns == 0) + double duration = (display_client_side_time || progress.elapsed_ns == 0) ? watch.elapsedSeconds() : progress.elapsed_ns / 1e9; std::lock_guard lock(mutex); size_t info_index = round_robin ? 0 : connection_index; - comparison_info_per_interval[info_index]->add(seconds, progress.read_rows, progress.read_bytes, info.rows, info.bytes); - comparison_info_total[info_index]->add(seconds, progress.read_rows, progress.read_bytes, info.rows, info.bytes); - t_test.add(info_index, seconds); + comparison_info_per_interval[info_index]->add(duration, progress.read_rows, progress.read_bytes, info.rows, info.bytes); + comparison_info_total[info_index]->add(duration, progress.read_rows, progress.read_bytes, info.rows, info.bytes); + t_test.add(info_index, duration); } - void report(MultiStats & infos) + void report(MultiStats & infos, double seconds) { std::lock_guard lock(mutex); @@ -510,8 +489,6 @@ private: if (0 == info->queries) return; - double seconds = info->work_time / concurrency; - std::string connection_description = connections[i]->getDescription(); if (round_robin) { @@ -525,10 +502,10 @@ private: } std::cerr << connection_description << ", " - << "queries " << info->queries << ", "; + << "queries: " << info->queries << ", "; if (info->errors) { - std::cerr << "errors " << info->errors << ", "; + std::cerr << "errors: " << info->errors << ", "; } std::cerr << "QPS: " << (info->queries / seconds) << ", " @@ -567,62 +544,6 @@ private: } } - void reportJSON(MultiStats & infos, const std::string & filename) - { - WriteBufferFromFile json_out(filename); - - std::lock_guard lock(mutex); - - auto print_key_value = [&](auto key, auto value, bool with_comma = true) - { - json_out << double_quote << key << ": " << value << (with_comma ? ",\n" : "\n"); - }; - - auto print_percentile = [&json_out](Stats & info, auto percent, bool with_comma = true) - { - json_out << "\"" << percent << "\": " << info.sampler.quantileNearest(percent / 100.0) << (with_comma ? ",\n" : "\n"); - }; - - json_out << "{\n"; - - for (size_t i = 0; i < infos.size(); ++i) - { - const auto & info = infos[i]; - - json_out << double_quote << connections[i]->getDescription() << ": {\n"; - json_out << double_quote << "statistics" << ": {\n"; - - double seconds = info->work_time / concurrency; - - print_key_value("QPS", info->queries.load() / seconds); - print_key_value("RPS", info->read_rows / seconds); - print_key_value("MiBPS", info->read_bytes / seconds / 1048576); - print_key_value("RPS_result", info->result_rows / seconds); - print_key_value("MiBPS_result", info->result_bytes / seconds / 1048576); - print_key_value("num_queries", info->queries.load()); - print_key_value("num_errors", info->errors, false); - - json_out << "},\n"; - json_out << double_quote << "query_time_percentiles" << ": {\n"; - - if (info->queries != 0) - { - for (int percent = 0; percent <= 90; percent += 10) - print_percentile(*info, percent); - - print_percentile(*info, 95); - print_percentile(*info, 99); - print_percentile(*info, 99.9); - print_percentile(*info, 99.99, false); - } - - json_out << "}\n"; - json_out << (i == infos.size() - 1 ? "}\n" : "},\n"); - } - - json_out << "}\n"; - } - public: ~Benchmark() override @@ -675,7 +596,6 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) ("iterations,i", value()->default_value(0), "amount of queries to be executed") ("timelimit,t", value()->default_value(0.), "stop launch of queries after specified time limit") ("randomize,r", "randomize order of execution") - ("json", value()->default_value(""), "write final report to specified file in JSON format") ("host,h", value()->multitoken(), "list of hosts") ("port", value()->multitoken(), "list of ports") ("roundrobin", "Instead of comparing queries for different --host/--port just pick one random --host/--port for every query and send query to it.") @@ -739,7 +659,6 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options.count("randomize"), options["iterations"].as(), options["timelimit"].as(), - options["json"].as(), options["confidence"].as(), options["query_id"].as(), options["query"].as(), diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index e1a33231592..64823f9ec7f 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -812,6 +812,11 @@ bool Client::processWithFuzzing(const String & full_query) } catch (...) { + if (!ast_to_process) + fmt::print(stderr, + "Error while forming new query: {}\n", + getCurrentExceptionMessage(true)); + // Some functions (e.g. protocol parsers) don't throw, but // set last_exception instead, so we'll also do it here for // uniformity. @@ -1184,7 +1189,7 @@ void Client::processOptions(const OptionsDescription & options_description, void Client::processConfig() { - if (config().has("query") && config().has("queries-file")) + if (!queries.empty() && config().has("queries-file")) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time"); /// Batch mode is enabled if one of the following is true: @@ -1195,9 +1200,9 @@ void Client::processConfig() /// - --queries-file command line option is present. /// The value of the option is used as file with query (or of multiple queries) to execute. - delayed_interactive = config().has("interactive") && (config().has("query") || config().has("queries-file")); + delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file")); if (stdin_is_a_tty - && (delayed_interactive || (!config().has("query") && queries_files.empty()))) + && (delayed_interactive || (queries.empty() && queries_files.empty()))) { is_interactive = true; } @@ -1238,6 +1243,7 @@ void Client::processConfig() global_context->getSettingsRef().max_insert_block_size); } + global_context->setClientName(std::string(DEFAULT_CLIENT_NAME)); global_context->setQueryKindInitial(); global_context->setQuotaClientKey(config().getString("quota_key", "")); global_context->setQueryKind(query_kind); diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index efe7121cace..556eca808f6 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -608,6 +608,8 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t ss << "ALTER TABLE " << getQuotedTable(original_table) << ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") << partition_name; UInt64 num_shards_drop_partition = executeQueryOnCluster(task_table.cluster_push, ss.str(), task_cluster->settings_push, ClusterExecutionMode::ON_EACH_SHARD); + if (num_shards_drop_partition != task_table.cluster_push->getShardCount()) + return TaskStatus::Error; LOG_INFO(log, "Drop partition {} in original table {} have been executed successfully on {} shards of {}", partition_name, getQuotedTable(original_table), num_shards_drop_partition, task_table.cluster_push->getShardCount()); diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index 64071423b8e..8f24d13d379 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -89,7 +89,7 @@ void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options) .argument("task-path").binding("task-path")); options.addOption(Poco::Util::Option("task-file", "", "path to task file for uploading in ZooKeeper to task-path") .argument("task-file").binding("task-file")); - options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists") + options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists. Default is false.") .argument("task-upload-force").binding("task-upload-force")); options.addOption(Poco::Util::Option("safe-mode", "", "disables ALTER DROP PARTITION in case of errors") .binding("safe-mode")); diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index 5228b582d25..4a7af1ced29 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -17,23 +17,21 @@ public: { command_name = "copy"; command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Recursively copy data containing at `from_path` to `to_path`\nPath should be in format './' or './path' or 'path'"; + description = "Recursively copy data from `FROM_PATH` to `TO_PATH`"; usage = "copy [OPTION]... "; command_option_description->add_options() - ("diskFrom", po::value(), "set name for disk from which we do operations") - ("diskTo", po::value(), "set name for disk to which we do operations") - ; - + ("disk-from", po::value(), "disk from which we copy") + ("disk-to", po::value(), "disk to which we copy"); } void processOptions( Poco::Util::LayeredConfiguration & config, po::variables_map & options) const override { - if (options.count("diskFrom")) - config.setString("diskFrom", options["diskFrom"].as()); - if (options.count("diskTo")) - config.setString("diskTo", options["diskTo"].as()); + if (options.count("disk-from")) + config.setString("disk-from", options["disk-from"].as()); + if (options.count("disk-to")) + config.setString("disk-to", options["disk-to"].as()); } void execute( @@ -47,8 +45,8 @@ public: throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } - String disk_name_from = config.getString("diskFrom", config.getString("disk", "default")); - String disk_name_to = config.getString("diskTo", config.getString("disk", "default")); + String disk_name_from = config.getString("disk-from", config.getString("disk", "default")); + String disk_name_to = config.getString("disk-to", config.getString("disk", "default")); const String & path_from = command_arguments[0]; const String & path_to = command_arguments[1]; @@ -59,7 +57,7 @@ public: String relative_path_from = validatePathAndGetAsRelative(path_from); String relative_path_to = validatePathAndGetAsRelative(path_to); - disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to); + disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to, /* settings= */ {}); } }; } diff --git a/programs/disks/CommandLink.cpp b/programs/disks/CommandLink.cpp index 0e94eb87c04..357832865fb 100644 --- a/programs/disks/CommandLink.cpp +++ b/programs/disks/CommandLink.cpp @@ -15,7 +15,7 @@ public: CommandLink() { command_name = "link"; - description = "Create hardlink from `from_path` to `to_path`\nPath should be in format './' or './path' or 'path'"; + description = "Create hardlink from `from_path` to `to_path`"; usage = "link [OPTION]... "; } diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index 470784bff00..48b54b70014 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -17,11 +17,10 @@ public: { command_name = "list"; command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "List files (the default disk is used by default)\nPath should be in format './' or './path' or 'path'"; + description = "List files at path[s]"; usage = "list [OPTION]... ..."; command_option_description->add_options() - ("recursive", "recursively list all directories") - ; + ("recursive", "recursively list all directories"); } void processOptions( diff --git a/programs/disks/CommandMkDir.cpp b/programs/disks/CommandMkDir.cpp index c938cc52132..e5df982d896 100644 --- a/programs/disks/CommandMkDir.cpp +++ b/programs/disks/CommandMkDir.cpp @@ -18,11 +18,10 @@ public: { command_name = "mkdir"; command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Create directory or directories recursively"; + description = "Create a directory"; usage = "mkdir [OPTION]... "; command_option_description->add_options() - ("recursive", "recursively create directories") - ; + ("recursive", "recursively create directories"); } void processOptions( diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 3c564f3bcd3..654090b2138 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -15,7 +15,7 @@ public: CommandMove() { command_name = "move"; - description = "Move file or directory from `from_path` to `to_path`\nPath should be in format './' or './path' or 'path'"; + description = "Move file or directory from `from_path` to `to_path`"; usage = "move [OPTION]... "; } diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index 2dd5c191d10..b6cacdd2c61 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -20,11 +20,10 @@ public: { command_name = "read"; command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "read File `from_path` to `to_path` or to stdout\nPath should be in format './' or './path' or 'path'"; - usage = "read [OPTION]... \nor\nread [OPTION]... "; + description = "Read a file from `FROM_PATH` to `TO_PATH`"; + usage = "read [OPTION]... []"; command_option_description->add_options() - ("output", po::value(), "set path to file to which we are read") - ; + ("output", po::value(), "file to which we are reading, defaults to `stdout`"); } void processOptions( diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp index b055c6f9343..d075daf3215 100644 --- a/programs/disks/CommandWrite.cpp +++ b/programs/disks/CommandWrite.cpp @@ -21,11 +21,10 @@ public: { command_name = "write"; command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Write File `from_path` or stdin to `to_path`"; - usage = "write [OPTION]... \nor\nstdin | write [OPTION]... \nPath should be in format './' or './path' or 'path'"; + description = "Write a file from `FROM_PATH` to `TO_PATH`"; + usage = "write [OPTION]... [] "; command_option_description->add_options() - ("input", po::value(), "set path to file to which we are write") - ; + ("input", po::value(), "file from which we are reading, defaults to `stdin`"); } void processOptions( diff --git a/programs/disks/ICommand.cpp b/programs/disks/ICommand.cpp index 52d1a2196a9..86188fb6db1 100644 --- a/programs/disks/ICommand.cpp +++ b/programs/disks/ICommand.cpp @@ -1,4 +1,6 @@ #include "ICommand.h" +#include + namespace DB { diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 43c66a32302..d7d61bbcd3b 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -163,13 +163,15 @@ int mainEntryClickHouseFormat(int argc, char ** argv) { ASTPtr res = parseQueryAndMovePosition( parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth); - /// For insert query with data(INSERT INTO ... VALUES ...), will lead to format fail, - /// should throw exception early and make exception message more readable. + + /// For insert query with data(INSERT INTO ... VALUES ...), that will lead to the formatting failure, + /// we should throw an exception early, and make exception message more readable. if (const auto * insert_query = res->as(); insert_query && insert_query->data) { throw Exception(DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA, "Can't format ASTInsertQuery with data, since data will be lost"); } + if (!quiet) { if (!backslash) diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index d83e189f7ef..e10a9fea86b 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -20,10 +20,7 @@ #include #include #include -#include -#include #include -#include #include #include #include @@ -35,6 +32,14 @@ #include +#include + +#include "config.h" + +/// Embedded configuration files used inside the install program +INCBIN(resource_config_xml, SOURCE_DIR "/programs/server/config.xml"); +INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml"); + /** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary. * It also allows to avoid dependency on systemd, upstart, SysV init. @@ -560,7 +565,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (!fs::exists(main_config_file)) { - std::string_view main_config_content = getResource("config.xml"); + std::string_view main_config_content(reinterpret_cast(gresource_config_xmlData), gresource_config_xmlSize); if (main_config_content.empty()) { fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string()); @@ -672,7 +677,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (!fs::exists(users_config_file)) { - std::string_view users_config_content = getResource("users.xml"); + std::string_view users_config_content(reinterpret_cast(gresource_users_xmlData), gresource_users_xmlSize); if (users_config_content.empty()) { fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string()); @@ -992,7 +997,9 @@ namespace { /// sudo respects limits in /etc/security/limits.conf e.g. open files, /// that's why we are using it instead of the 'clickhouse su' tool. - command = fmt::format("sudo -u '{}' {}", user, command); + /// by default, sudo resets all the ENV variables, but we should preserve + /// the values /etc/default/clickhouse in /etc/init.d/clickhouse file + command = fmt::format("sudo --preserve-env -u '{}' {}", user, command); } fmt::print("Will run {}\n", command); diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index 05928a0d20b..757f90fadcf 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -1,5 +1,6 @@ #include "Commands.h" +#include #include "KeeperClient.h" @@ -8,11 +9,11 @@ namespace DB bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { - String arg; - if (!parseKeeperPath(pos, expected, arg)) + String path; + if (!parseKeeperPath(pos, expected, path)) return true; - node->args.push_back(std::move(arg)); + node->args.push_back(std::move(path)); return true; } @@ -24,18 +25,28 @@ void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con else path = client->cwd; - for (const auto & child : client->zookeeper->getChildren(path)) - std::cout << child << " "; + auto children = client->zookeeper->getChildren(path); + std::sort(children.begin(), children.end()); + + bool need_space = false; + for (const auto & child : children) + { + if (std::exchange(need_space, true)) + std::cout << " "; + + std::cout << child; + } + std::cout << "\n"; } bool CDCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { - String arg; - if (!parseKeeperPath(pos, expected, arg)) + String path; + if (!parseKeeperPath(pos, expected, path)) return true; - node->args.push_back(std::move(arg)); + node->args.push_back(std::move(path)); return true; } @@ -53,11 +64,12 @@ void CDCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con bool SetCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { - String arg; - if (!parseKeeperPath(pos, expected, arg)) + String path; + if (!parseKeeperPath(pos, expected, path)) return false; - node->args.push_back(std::move(arg)); + node->args.push_back(std::move(path)); + String arg; if (!parseKeeperArg(pos, expected, arg)) return false; node->args.push_back(std::move(arg)); @@ -77,16 +89,17 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co client->zookeeper->set( client->getAbsolutePath(query->args[0].safeGet()), query->args[1].safeGet(), - static_cast(query->args[2].safeGet())); + static_cast(query->args[2].get())); } bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { - String arg; - if (!parseKeeperPath(pos, expected, arg)) + String path; + if (!parseKeeperPath(pos, expected, path)) return false; - node->args.push_back(std::move(arg)); + node->args.push_back(std::move(path)); + String arg; if (!parseKeeperArg(pos, expected, arg)) return false; node->args.push_back(std::move(arg)); @@ -115,7 +128,7 @@ void CreateCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) static_cast(query->args[2].safeGet())); } -bool GetCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +bool TouchCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { String arg; if (!parseKeeperPath(pos, expected, arg)) @@ -125,17 +138,214 @@ bool GetCommand::parse(IParser::Pos & pos, std::shared_ptr & nod return true; } +void TouchCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + client->zookeeper->createIfNotExists(client->getAbsolutePath(query->args[0].safeGet()), ""); +} + +bool GetCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String path; + if (!parseKeeperPath(pos, expected, path)) + return false; + node->args.push_back(std::move(path)); + + return true; +} + void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const { std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet())) << "\n"; } +bool ExistsCommand::parse(IParser::Pos & pos, std::shared_ptr & node, DB::Expected & expected) const +{ + String path; + if (!parseKeeperPath(pos, expected, path)) + return false; + node->args.push_back(std::move(path)); + + return true; +} + +void ExistsCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const +{ + std::cout << client->zookeeper->exists(client->getAbsolutePath(query->args[0].safeGet())) << "\n"; +} + +bool GetStatCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String path; + if (!parseKeeperPath(pos, expected, path)) + return true; + + node->args.push_back(std::move(path)); + return true; +} + +void GetStatCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + Coordination::Stat stat; + String path; + if (!query->args.empty()) + path = client->getAbsolutePath(query->args[0].safeGet()); + else + path = client->cwd; + + client->zookeeper->get(path, &stat); + + std::cout << "cZxid = " << stat.czxid << "\n"; + std::cout << "mZxid = " << stat.mzxid << "\n"; + std::cout << "pZxid = " << stat.pzxid << "\n"; + std::cout << "ctime = " << stat.ctime << "\n"; + std::cout << "mtime = " << stat.mtime << "\n"; + std::cout << "version = " << stat.version << "\n"; + std::cout << "cversion = " << stat.cversion << "\n"; + std::cout << "aversion = " << stat.aversion << "\n"; + std::cout << "ephemeralOwner = " << stat.ephemeralOwner << "\n"; + std::cout << "dataLength = " << stat.dataLength << "\n"; + std::cout << "numChildren = " << stat.numChildren << "\n"; +} + +bool FindSuperNodes::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + ASTPtr threshold; + if (!ParserUnsignedInteger{}.parse(pos, threshold, expected)) + return false; + + node->args.push_back(threshold->as().value); + + String path; + if (!parseKeeperPath(pos, expected, path)) + path = "."; + + node->args.push_back(std::move(path)); + return true; +} + +void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + auto threshold = query->args[0].safeGet(); + auto path = client->getAbsolutePath(query->args[1].safeGet()); + + Coordination::Stat stat; + client->zookeeper->get(path, &stat); + + if (stat.numChildren >= static_cast(threshold)) + { + std::cout << static_cast(path) << "\t" << stat.numChildren << "\n"; + return; + } + + auto children = client->zookeeper->getChildren(path); + std::sort(children.begin(), children.end()); + for (const auto & child : children) + { + auto next_query = *query; + next_query.args[1] = DB::Field(path / child); + execute(&next_query, client); + } +} + +bool DeleteStaleBackups::parse(IParser::Pos & /* pos */, std::shared_ptr & /* node */, Expected & /* expected */) const +{ + return true; +} + +void DeleteStaleBackups::execute(const ASTKeeperQuery * /* query */, KeeperClient * client) const +{ + client->askConfirmation( + "You are going to delete all inactive backups in /clickhouse/backups.", + [client] + { + fs::path backup_root = "/clickhouse/backups"; + auto backups = client->zookeeper->getChildren(backup_root); + std::sort(backups.begin(), backups.end()); + + for (const auto & child : backups) + { + auto backup_path = backup_root / child; + std::cout << "Found backup " << backup_path << ", checking if it's active\n"; + + String stage_path = backup_path / "stage"; + auto stages = client->zookeeper->getChildren(stage_path); + + bool is_active = false; + for (const auto & stage : stages) + { + if (startsWith(stage, "alive")) + { + is_active = true; + break; + } + } + + if (is_active) + { + std::cout << "Backup " << backup_path << " is active, not going to delete\n"; + continue; + } + + std::cout << "Backup " << backup_path << " is not active, deleting it\n"; + client->zookeeper->removeRecursive(backup_path); + } + }); +} + +bool FindBigFamily::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String path; + if (!parseKeeperPath(pos, expected, path)) + path = "."; + + node->args.push_back(std::move(path)); + + ASTPtr count; + if (ParserUnsignedInteger{}.parse(pos, count, expected)) + node->args.push_back(count->as().value); + else + node->args.push_back(UInt64(10)); + + return true; +} + +void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + auto path = client->getAbsolutePath(query->args[0].safeGet()); + auto n = query->args[1].safeGet(); + + std::vector> result; + + std::queue queue; + queue.push(path); + while (!queue.empty()) + { + auto next_path = queue.front(); + queue.pop(); + + auto children = client->zookeeper->getChildren(next_path); + std::transform(children.cbegin(), children.cend(), children.begin(), [&](const String & child) { return next_path / child; }); + + auto response = client->zookeeper->get(children); + + for (size_t i = 0; i < response.size(); ++i) + { + result.emplace_back(response[i].stat.numChildren, children[i]); + queue.push(children[i]); + } + } + + std::sort(result.begin(), result.end(), std::greater()); + for (UInt64 i = 0; i < std::min(result.size(), static_cast(n)); ++i) + std::cout << std::get<1>(result[i]) << "\t" << std::get<0>(result[i]) << "\n"; +} + bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { - String arg; - if (!parseKeeperPath(pos, expected, arg)) + String path; + if (!parseKeeperPath(pos, expected, path)) return false; - node->args.push_back(std::move(arg)); + node->args.push_back(std::move(path)); return true; } @@ -147,10 +357,10 @@ void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { - String arg; - if (!parseKeeperPath(pos, expected, arg)) + String path; + if (!parseKeeperPath(pos, expected, path)) return false; - node->args.push_back(std::move(arg)); + node->args.push_back(std::move(path)); return true; } @@ -162,6 +372,70 @@ void RMRCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co [client, path]{ client->zookeeper->removeRecursive(path); }); } +bool ReconfigCommand::parse(IParser::Pos & pos, std::shared_ptr & node, DB::Expected & expected) const +{ + ReconfigCommand::Operation operation; + if (ParserKeyword{"ADD"}.ignore(pos, expected)) + operation = ReconfigCommand::Operation::ADD; + else if (ParserKeyword{"REMOVE"}.ignore(pos, expected)) + operation = ReconfigCommand::Operation::REMOVE; + else if (ParserKeyword{"SET"}.ignore(pos, expected)) + operation = ReconfigCommand::Operation::SET; + else + return false; + + node->args.push_back(operation); + ParserToken{TokenType::Whitespace}.ignore(pos); + + String arg; + if (!parseKeeperArg(pos, expected, arg)) + return false; + node->args.push_back(std::move(arg)); + + return true; +} + +void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const +{ + String joining; + String leaving; + String new_members; + + auto operation = query->args[0].get(); + switch (operation) + { + case static_cast(ReconfigCommand::Operation::ADD): + joining = query->args[1].safeGet(); + break; + case static_cast(ReconfigCommand::Operation::REMOVE): + leaving = query->args[1].safeGet(); + break; + case static_cast(ReconfigCommand::Operation::SET): + new_members = query->args[1].safeGet(); + break; + default: + UNREACHABLE(); + } + + auto response = client->zookeeper->reconfig(joining, leaving, new_members); + std::cout << response.value << '\n'; +} + +bool SyncCommand::parse(IParser::Pos & pos, std::shared_ptr & node, DB::Expected & expected) const +{ + String path; + if (!parseKeeperPath(pos, expected, path)) + return false; + node->args.push_back(std::move(path)); + + return true; +} + +void SyncCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const +{ + std::cout << client->zookeeper->sync(client->getAbsolutePath(query->args[0].safeGet())) << "\n"; +} + bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptr & /* node */, Expected & /* expected */) const { return true; @@ -170,7 +444,7 @@ bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptrgetHelpMessage() << "\n"; + std::cout << pair.second->generateHelpString() << "\n"; } bool FourLetterWordCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const diff --git a/programs/keeper-client/Commands.h b/programs/keeper-client/Commands.h index e4debd53e42..f9d4292584b 100644 --- a/programs/keeper-client/Commands.h +++ b/programs/keeper-client/Commands.h @@ -21,6 +21,12 @@ public: virtual String getName() const = 0; virtual ~IKeeperClientCommand() = default; + + String generateHelpString() const + { + return fmt::vformat(getHelpMessage(), fmt::make_format_args(getName())); + } + }; using Command = std::shared_ptr; @@ -34,7 +40,7 @@ class LSCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "ls [path] -- Lists the nodes for the given path (default: cwd)"; } + String getHelpMessage() const override { return "{} [path] -- Lists the nodes for the given path (default: cwd)"; } }; class CDCommand : public IKeeperClientCommand @@ -45,7 +51,7 @@ class CDCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "cd [path] -- Change the working path (default `.`)"; } + String getHelpMessage() const override { return "{} [path] -- Change the working path (default `.`)"; } }; class SetCommand : public IKeeperClientCommand @@ -58,7 +64,7 @@ class SetCommand : public IKeeperClientCommand String getHelpMessage() const override { - return "set [version] -- Updates the node's value. Only update if version matches (default: -1)"; + return "{} [version] -- Updates the node's value. Only update if version matches (default: -1)"; } }; @@ -70,7 +76,18 @@ class CreateCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "create -- Creates new node"; } + String getHelpMessage() const override { return "{} [mode] -- Creates new node with the set value"; } +}; + +class TouchCommand : public IKeeperClientCommand +{ + String getName() const override { return "touch"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "{} -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists"; } }; class GetCommand : public IKeeperClientCommand @@ -81,9 +98,74 @@ class GetCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "get -- Returns the node's value"; } + String getHelpMessage() const override { return "{} -- Returns the node's value"; } }; +class ExistsCommand : public IKeeperClientCommand +{ + String getName() const override { return "exists"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "{} -- Returns `1` if node exists, `0` otherwise"; } +}; + +class GetStatCommand : public IKeeperClientCommand +{ + String getName() const override { return "get_stat"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "{} [path] -- Returns the node's stat (default `.`)"; } +}; + +class FindSuperNodes : public IKeeperClientCommand +{ + String getName() const override { return "find_super_nodes"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override + { + return "{} [path] -- Finds nodes with number of children larger than some threshold for the given path (default `.`)"; + } +}; + +class DeleteStaleBackups : public IKeeperClientCommand +{ + String getName() const override { return "delete_stale_backups"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override + { + return "{} -- Deletes ClickHouse nodes used for backups that are now inactive"; + } +}; + +class FindBigFamily : public IKeeperClientCommand +{ + String getName() const override { return "find_big_family"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override + { + return "{} [path] [n] -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)"; + } +}; + + class RMCommand : public IKeeperClientCommand { String getName() const override { return "rm"; } @@ -92,7 +174,7 @@ class RMCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "remove -- Remove the node"; } + String getHelpMessage() const override { return "{} -- Remove the node"; } }; class RMRCommand : public IKeeperClientCommand @@ -103,7 +185,36 @@ class RMRCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "rmr -- Recursively deletes path. Confirmation required"; } + String getHelpMessage() const override { return "{} -- Recursively deletes path. Confirmation required"; } +}; + +class ReconfigCommand : public IKeeperClientCommand +{ + enum class Operation : UInt8 + { + ADD = 0, + REMOVE = 1, + SET = 2, + }; + + String getName() const override { return "reconfig"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "{} \"\" [version] -- Reconfigure Keeper cluster. See https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper#reconfiguration"; } +}; + +class SyncCommand: public IKeeperClientCommand +{ + String getName() const override { return "sync"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "{} -- Synchronizes node between processes and leader"; } }; class HelpCommand : public IKeeperClientCommand @@ -114,7 +225,7 @@ class HelpCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "help -- Prints this message"; } + String getHelpMessage() const override { return "{} -- Prints this message"; } }; class FourLetterWordCommand : public IKeeperClientCommand @@ -125,7 +236,7 @@ class FourLetterWordCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "flwc -- Executes four-letter-word command"; } + String getHelpMessage() const override { return "{} -- Executes four-letter-word command"; } }; } diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index f41dca1e27a..890b937e384 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -2,6 +2,7 @@ #include "Commands.h" #include #include +#include #include #include #include @@ -83,8 +84,11 @@ std::vector KeeperClient::getCompletions(const String & prefix) const void KeeperClient::askConfirmation(const String & prompt, std::function && callback) { + if (!ask_confirmation) + return callback(); + std::cout << prompt << " Continue?\n"; - need_confirmation = true; + waiting_confirmation = true; confirmation_callback = callback; } @@ -131,7 +135,7 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options) .binding("host")); options.addOption( - Poco::Util::Option("port", "p", "server port. default `2181`") + Poco::Util::Option("port", "p", "server port. default `9181`") .argument("") .binding("port")); @@ -155,6 +159,11 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options) .argument("") .binding("operation-timeout")); + options.addOption( + Poco::Util::Option("config-file", "c", "if set, will try to get a connection string from clickhouse config. default `config.xml`") + .argument("") + .binding("config-file")); + options.addOption( Poco::Util::Option("history-file", "", "set path of history file. default `~/.keeper-client-history`") .argument("") @@ -164,6 +173,14 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options) Poco::Util::Option("log-level", "", "set log level") .argument("") .binding("log-level")); + + options.addOption( + Poco::Util::Option("no-confirmation", "", "if set, will not require a confirmation on several commands. default false for interactive and true for query") + .binding("no-confirmation")); + + options.addOption( + Poco::Util::Option("tests-mode", "", "run keeper-client in a special mode for tests. all commands output are separated by special symbols. default false") + .binding("tests-mode")); } void KeeperClient::initialize(Poco::Util::Application & /* self */) @@ -176,9 +193,17 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */) std::make_shared(), std::make_shared(), std::make_shared(), + std::make_shared(), std::make_shared(), + std::make_shared(), + std::make_shared(), + std::make_shared(), + std::make_shared(), + std::make_shared(), std::make_shared(), std::make_shared(), + std::make_shared(), + std::make_shared(), std::make_shared(), std::make_shared(), }); @@ -206,23 +231,18 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */) } } - Poco::Logger::root().setLevel(config().getString("log-level", "error")); + String default_log_level; + if (config().has("query")) + /// We don't want to see any information log in query mode, unless it was set explicitly + default_log_level = "error"; + else + default_log_level = "information"; + + Poco::Logger::root().setLevel(config().getString("log-level", default_log_level)); EventNotifier::init(); } -void KeeperClient::executeQuery(const String & query) -{ - std::vector queries; - boost::algorithm::split(queries, query, boost::is_any_of(";")); - - for (const auto & query_text : queries) - { - if (!query_text.empty()) - processQueryText(query_text); - } -} - bool KeeperClient::processQueryText(const String & text) { if (exit_strings.find(text) != exit_strings.end()) @@ -230,29 +250,44 @@ bool KeeperClient::processQueryText(const String & text) try { - if (need_confirmation) + if (waiting_confirmation) { - need_confirmation = false; + waiting_confirmation = false; if (text.size() == 1 && (text == "y" || text == "Y")) confirmation_callback(); return true; } KeeperParser parser; - String message; const char * begin = text.data(); - ASTPtr res = tryParseQuery(parser, begin, begin + text.size(), message, true, "", false, 0, 0, false); + const char * end = begin + text.size(); - if (!res) + while (begin < end) { - std::cerr << message << "\n"; - return true; + String message; + ASTPtr res = tryParseQuery( + parser, + begin, + end, + /* out_error_message = */ message, + /* hilite = */ true, + /* description = */ "", + /* allow_multi_statements = */ true, + /* max_query_size = */ 0, + /* max_parser_depth = */ 0, + /* skip_insignificant = */ false); + + if (!res) + { + std::cerr << message << "\n"; + return true; + } + + auto * query = res->as(); + + auto command = KeeperClient::commands.find(query->command); + command->second->execute(query, this); } - - auto * query = res->as(); - - auto command = KeeperClient::commands.find(query->command); - command->second->execute(query, this); } catch (Coordination::Exception & err) { @@ -261,19 +296,27 @@ bool KeeperClient::processQueryText(const String & text) return true; } -void KeeperClient::runInteractive() +void KeeperClient::runInteractiveReplxx() { LineReader::Patterns query_extenders = {"\\"}; LineReader::Patterns query_delimiters = {}; + char word_break_characters[] = " \t\v\f\a\b\r\n/"; - ReplxxLineReader lr(suggest, history_file, false, query_extenders, query_delimiters, {}); + ReplxxLineReader lr( + suggest, + history_file, + /* multiline= */ false, + query_extenders, + query_delimiters, + word_break_characters, + /* highlighter_= */ {}); lr.enableBracketedPaste(); while (true) { String prompt; - if (need_confirmation) + if (waiting_confirmation) prompt = "[y/n] "; else prompt = cwd.string() + " :) "; @@ -287,6 +330,26 @@ void KeeperClient::runInteractive() } } +void KeeperClient::runInteractiveInputStream() +{ + for (String input; std::getline(std::cin, input);) + { + if (!processQueryText(input)) + break; + + std::cout << "\a\a\a\a" << std::endl; + std::cerr << std::flush; + } +} + +void KeeperClient::runInteractive() +{ + if (config().hasOption("tests-mode")) + runInteractiveInputStream(); + else + runInteractiveReplxx(); +} + int KeeperClient::main(const std::vector & /* args */) { if (config().hasOption("help")) @@ -298,16 +361,51 @@ int KeeperClient::main(const std::vector & /* args */) return 0; } - auto host = config().getString("host", "localhost"); - auto port = config().getString("port", "2181"); - zk_args.hosts = {host + ":" + port}; + DB::ConfigProcessor config_processor(config().getString("config-file", "config.xml")); + + /// This will handle a situation when clickhouse is running on the embedded config, but config.d folder is also present. + config_processor.registerEmbeddedConfig("config.xml", ""); + auto clickhouse_config = config_processor.loadConfig(); + + Poco::Util::AbstractConfiguration::Keys keys; + clickhouse_config.configuration->keys("zookeeper", keys); + + if (!config().has("host") && !config().has("port") && !keys.empty()) + { + LOG_INFO(&Poco::Logger::get("KeeperClient"), "Found keeper node in the config.xml, will use it for connection"); + + for (const auto & key : keys) + { + String prefix = "zookeeper." + key; + String host = clickhouse_config.configuration->getString(prefix + ".host"); + String port = clickhouse_config.configuration->getString(prefix + ".port"); + + if (clickhouse_config.configuration->has(prefix + ".secure")) + host = "secure://" + host; + + zk_args.hosts.push_back(host + ":" + port); + } + } + else + { + String host = config().getString("host", "localhost"); + String port = config().getString("port", "9181"); + + zk_args.hosts.push_back(host + ":" + port); + } + zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000; zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000; zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000; zookeeper = std::make_unique(zk_args); + if (config().has("no-confirmation") || config().has("query")) + ask_confirmation = false; + if (config().has("query")) - executeQuery(config().getString("query")); + { + processQueryText(config().getString("query")); + } else runInteractive(); diff --git a/programs/keeper-client/KeeperClient.h b/programs/keeper-client/KeeperClient.h index e7fa5cdc5fe..0d3db3c2f02 100644 --- a/programs/keeper-client/KeeperClient.h +++ b/programs/keeper-client/KeeperClient.h @@ -49,8 +49,10 @@ public: protected: void runInteractive(); + void runInteractiveReplxx(); + void runInteractiveInputStream(); + bool processQueryText(const String & text); - void executeQuery(const String & query); void loadCommands(std::vector && new_commands); @@ -61,7 +63,8 @@ protected: zkutil::ZooKeeperArgs zk_args; - bool need_confirmation = false; + bool ask_confirmation = true; + bool waiting_confirmation = false; std::vector registered_commands_and_four_letter_words; }; diff --git a/programs/keeper-client/Parser.cpp b/programs/keeper-client/Parser.cpp index 3420ccb2219..228ce93b78d 100644 --- a/programs/keeper-client/Parser.cpp +++ b/programs/keeper-client/Parser.cpp @@ -7,43 +7,34 @@ namespace DB bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result) { - expected.add(pos, getTokenName(TokenType::BareWord)); - - if (pos->type == TokenType::BareWord) + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) { - result = String(pos->begin, pos->end); - ++pos; + if (!parseIdentifierOrStringLiteral(pos, expected, result)) + return false; + ParserToken{TokenType::Whitespace}.ignore(pos); - return true; } - bool status = parseIdentifierOrStringLiteral(pos, expected, result); - ParserToken{TokenType::Whitespace}.ignore(pos); - return status; -} - -bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path) -{ - expected.add(pos, "path"); - - if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) - return parseIdentifierOrStringLiteral(pos, expected, path); - - String result; - while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream) + while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream && pos->type != TokenType::Semicolon) { result.append(pos->begin, pos->end); ++pos; } + ParserToken{TokenType::Whitespace}.ignore(pos); if (result.empty()) return false; - path = result; return true; } +bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path) +{ + expected.add(pos, "path"); + return parseKeeperArg(pos, expected, path); +} + bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto query = std::make_shared(); diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 940e6848597..cdb1d89b18e 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -1,16 +1,3 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - -if (OS_LINUX) - set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") - # for some reason INTERFACE linkage doesn't work for standalone binary - set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") -endif () - -clickhouse_embed_binaries( - TARGET clickhouse_keeper_configs - RESOURCES keeper_config.xml keeper_embedded.xml -) - set(CLICKHOUSE_KEEPER_SOURCES Keeper.cpp ) @@ -29,7 +16,6 @@ set (CLICKHOUSE_KEEPER_LINK clickhouse_program_add(keeper) install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper) -add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs) if (BUILD_STANDALONE_KEEPER) # Straight list of all required sources @@ -71,6 +57,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/IO/ReadBuffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPPathHints.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperTCPHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/TCPServer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/NotFoundHandler.cpp @@ -79,6 +66,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp @@ -94,6 +82,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp @@ -144,8 +133,6 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/ProxyListConfiguration.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -187,7 +174,7 @@ if (BUILD_STANDALONE_KEEPER) clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES}) # Remove some redundant dependencies - target_compile_definitions (clickhouse-keeper PRIVATE -DCLICKHOUSE_PROGRAM_STANDALONE_BUILD) + target_compile_definitions (clickhouse-keeper PRIVATE -DCLICKHOUSE_KEEPER_STANDALONE_BUILD) target_compile_definitions (clickhouse-keeper PUBLIC -DWITHOUT_TEXT_LOG) if (ENABLE_CLICKHOUSE_KEEPER_CLIENT AND TARGET ch_rust::skim) @@ -215,7 +202,6 @@ if (BUILD_STANDALONE_KEEPER) ${LINK_RESOURCE_LIB_STANDALONE_KEEPER} ) - add_dependencies(clickhouse-keeper clickhouse_keeper_configs) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) if (SPLIT_DEBUG_SYMBOLS) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 6034d63a016..c0a6a4af07a 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -67,7 +67,7 @@ int mainEntryClickHouseKeeper(int argc, char ** argv) } } -#ifdef CLICKHOUSE_PROGRAM_STANDALONE_BUILD +#ifdef CLICKHOUSE_KEEPER_STANDALONE_BUILD // Weak symbols don't work correctly on Darwin // so we have a stub implementation to avoid linker errors @@ -110,19 +110,18 @@ void Keeper::createServer(const std::string & listen_host, const char * port_nam } catch (const Poco::Exception &) { - std::string message = "Listen [" + listen_host + "]:" + std::to_string(port) + " failed: " + getCurrentExceptionMessage(false); - if (listen_try) { - LOG_WARNING(&logger(), "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to " + LOG_WARNING(&logger(), "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, " + "then consider to " "specify not disabled IPv4 or IPv6 address to listen in element of configuration " "file. Example for disabled IPv6: 0.0.0.0 ." " Example for disabled IPv4: ::", - message); + listen_host, port, getCurrentExceptionMessage(false)); } else { - throw Exception::createDeprecated(message, ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false)); } } } @@ -150,7 +149,7 @@ int Keeper::run() } if (config().hasOption("version")) { - std::cout << DBMS_NAME << " keeper version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; + std::cout << VERSION_NAME << " keeper version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; return 0; } @@ -288,13 +287,27 @@ try std::string path; if (config().has("keeper_server.storage_path")) + { path = config().getString("keeper_server.storage_path"); + } else if (config().has("keeper_server.log_storage_path")) + { path = std::filesystem::path(config().getString("keeper_server.log_storage_path")).parent_path(); + } else if (config().has("keeper_server.snapshot_storage_path")) + { path = std::filesystem::path(config().getString("keeper_server.snapshot_storage_path")).parent_path(); + } + else if (std::filesystem::is_directory(std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination")) + { + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "By default 'keeper.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper.storage_path' in the keeper configuration explicitly", + KEEPER_DEFAULT_PATH, String{std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"}); + } else - path = std::filesystem::path{KEEPER_DEFAULT_PATH}; + { + path = KEEPER_DEFAULT_PATH; + } std::filesystem::create_directories(path); @@ -330,6 +343,7 @@ try auto global_context = Context::createGlobal(shared_context.get()); global_context->makeGlobalContext(); + global_context->setApplicationType(Context::ApplicationType::KEEPER); global_context->setPath(path); global_context->setRemoteHostFilter(config()); @@ -365,7 +379,7 @@ try } /// Initialize keeper RAFT. Do nothing if no keeper_server in config. - global_context->initializeKeeperDispatcher(/* start_async = */ true); + global_context->initializeKeeperDispatcher(/* start_async = */ false); FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); auto config_getter = [&] () -> const Poco::Util::AbstractConfiguration & @@ -457,8 +471,10 @@ try const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); std::vector extra_paths = {include_from_path}; - if (!cert_path.empty()) extra_paths.emplace_back(cert_path); - if (!key_path.empty()) extra_paths.emplace_back(key_path); + if (!cert_path.empty()) + extra_paths.emplace_back(cert_path); + if (!key_path.empty()) + extra_paths.emplace_back(key_path); /// ConfigReloader have to strict parameters which are redundant in our case auto main_config_reloader = std::make_unique( @@ -539,11 +555,13 @@ catch (...) void Keeper::logRevision() const { - Poco::Logger::root().information("Starting ClickHouse Keeper " + std::string{VERSION_STRING} - + "(revision : " + std::to_string(ClickHouseRevision::getVersionRevision()) - + ", git hash: " + (git_hash.empty() ? "" : git_hash) - + ", build id: " + (build_id.empty() ? "" : build_id) + ")" - + ", PID " + std::to_string(getpid())); + LOG_INFO(&Poco::Logger::get("Application"), + "Starting ClickHouse Keeper {} (revision: {}, git hash: {}, build id: {}), PID {}", + VERSION_STRING, + ClickHouseRevision::getVersionRevision(), + git_hash.empty() ? "" : git_hash, + build_id.empty() ? "" : build_id, + getpid()); } diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 1cacc391ca5..dd0bf67cb64 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -13,10 +13,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES library-bridge.cpp ) -if (OS_LINUX) - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") -endif () - clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) target_link_libraries(clickhouse-library-bridge PRIVATE diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 3c2a8ae3152..b677a29cb5c 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -266,6 +268,10 @@ void LocalServer::tryInitPath() global_context->setUserFilesPath(""); // user's files are everywhere + std::string user_scripts_path = config().getString("user_scripts_path", fs::path(path) / "user_scripts/"); + global_context->setUserScriptsPath(user_scripts_path); + fs::create_directories(user_scripts_path); + /// top_level_domains_lists const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/"); if (!top_level_domains_path.empty()) @@ -313,7 +319,7 @@ static bool checkIfStdinIsRegularFile() std::string LocalServer::getInitialCreateTableQuery() { - if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!checkIfStdinIsRegularFile() || !config().has("query"))) + if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!checkIfStdinIsRegularFile() || queries.empty())) return {}; auto table_name = backQuoteIfNeed(config().getString("table-name", "table")); @@ -455,7 +461,7 @@ try if (first_time) { - if (queries_files.empty() && !config().has("query")) + if (queries_files.empty() && queries.empty()) { std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode." << "\033[0m" << std::endl; std::cerr << "\033[31m" << "You have to provide a query with --query or --queries-file option." << "\033[0m" << std::endl; @@ -467,7 +473,7 @@ try #else is_interactive = stdin_is_a_tty && (config().hasOption("interactive") - || (!config().has("query") && !config().has("table-structure") && queries_files.empty() && !config().has("table-file"))); + || (queries.empty() && !config().has("table-structure") && queries_files.empty() && !config().has("table-file"))); #endif if (!is_interactive) { @@ -490,6 +496,17 @@ try applyCmdSettings(global_context); + /// try to load user defined executable functions, throw on error and die + try + { + global_context->loadOrReloadUserDefinedExecutableFunctions(config()); + } + catch (...) + { + tryLogCurrentException(&logger(), "Caught exception while loading user defined executable functions."); + throw; + } + if (is_interactive) { clearTerminal(); @@ -552,10 +569,10 @@ void LocalServer::updateLoggerLevel(const String & logs_level) void LocalServer::processConfig() { - if (config().has("query") && config().has("queries-file")) + if (!queries.empty() && config().has("queries-file")) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time"); - delayed_interactive = config().has("interactive") && (config().has("query") || config().has("queries-file")); + delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file")); if (is_interactive && !delayed_interactive) { if (config().has("multiquery")) @@ -569,7 +586,9 @@ void LocalServer::processConfig() } print_stack_trace = config().getBool("stacktrace", false); - load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false); + const std::string clickhouse_dialect{"clickhouse"}; + load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false) + && config().getString("dialect", clickhouse_dialect) == clickhouse_dialect; auto logging = (config().has("logger.console") || config().has("logger.level") @@ -638,43 +657,67 @@ void LocalServer::processConfig() /// There is no need for concurrent queries, override max_concurrent_queries. global_context->getProcessList().setMaxSize(0); - /// Size of cache for uncompressed blocks. Zero means disabled. - String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", ""); - size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0); - if (uncompressed_cache_size) - global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size); + const size_t physical_server_memory = getMemoryAmount(); + const double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5); + const size_t max_cache_size = static_cast(physical_server_memory * cache_size_to_ram_max_ratio); - /// Size of cache for marks (index of MergeTree family of tables). - String mark_cache_policy = config().getString("mark_cache_policy", ""); - size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); - if (mark_cache_size) - global_context->setMarkCache(mark_cache_policy, mark_cache_size); + String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY); + size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE); + double uncompressed_cache_size_ratio = config().getDouble("uncompressed_cache_size_ratio", DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO); + if (uncompressed_cache_size > max_cache_size) + { + uncompressed_cache_size = max_cache_size; + LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio); - /// Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled. - size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", 0); - if (index_uncompressed_cache_size) - global_context->setIndexUncompressedCache(index_uncompressed_cache_size); + String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY); + size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE); + double mark_cache_size_ratio = config().getDouble("mark_cache_size_ratio", DEFAULT_MARK_CACHE_SIZE_RATIO); + if (!mark_cache_size) + LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation."); + if (mark_cache_size > max_cache_size) + { + mark_cache_size = max_cache_size; + LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size)); + } + global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio); - /// Size of cache for index marks (index of MergeTree skip indices). - size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", 0); - if (index_mark_cache_size) - global_context->setIndexMarkCache(index_mark_cache_size); + String index_uncompressed_cache_policy = config().getString("index_uncompressed_cache_policy", DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY); + size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE); + double index_uncompressed_cache_size_ratio = config().getDouble("index_uncompressed_cache_size_ratio", DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO); + if (index_uncompressed_cache_size > max_cache_size) + { + index_uncompressed_cache_size = max_cache_size; + LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio); - /// A cache for mmapped files. - size_t mmap_cache_size = config().getUInt64("mmap_cache_size", 1000); /// The choice of default is arbitrary. - if (mmap_cache_size) - global_context->setMMappedFileCache(mmap_cache_size); + String index_mark_cache_policy = config().getString("index_mark_cache_policy", DEFAULT_INDEX_MARK_CACHE_POLICY); + size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE); + double index_mark_cache_size_ratio = config().getDouble("index_mark_cache_size_ratio", DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO); + if (index_mark_cache_size > max_cache_size) + { + index_mark_cache_size = max_cache_size; + LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio); + + size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE); + if (mmap_cache_size > max_cache_size) + { + mmap_cache_size = max_cache_size; + LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setMMappedFileCache(mmap_cache_size); + + /// Initialize a dummy query cache. + global_context->setQueryCache(0, 0, 0, 0); #if USE_EMBEDDED_COMPILER - /// 128 MB - constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128; - size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default); - - constexpr size_t compiled_expression_cache_elements_size_default = 10000; - size_t compiled_expression_cache_elements_size - = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default); - - CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size); + size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE); + size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES); + CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements); #endif /// NOTE: it is important to apply any overrides before @@ -838,6 +881,8 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp config().setBool("no-system-tables", true); if (options.count("only-system-tables")) config().setBool("only-system-tables", true); + if (options.count("database")) + config().setString("default_database", options["database"].as()); if (options.count("input-format")) config().setString("table-data-format", options["input-format"].as()); @@ -905,48 +950,66 @@ int mainEntryClickHouseLocal(int argc, char ** argv) #if defined(FUZZING_MODE) +// linked from programs/main.cpp +bool isClickhouseApp(const std::string & app_suffix, std::vector & argv); + std::optional fuzz_app; extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) { - int & argc = *pargc; - char ** argv = *pargv; + std::vector argv(*pargv, *pargv + (*pargc + 1)); + + if (!isClickhouseApp("local", argv)) + { + std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode, only clickhouse local is available." << "\033[0m" << std::endl; + exit(1); + } /// As a user you can add flags to clickhouse binary in fuzzing mode as follows - /// clickhouse -- + /// clickhouse local -- - /// Calculate the position of delimiter "--" that separates arguments - /// of clickhouse-local and libfuzzer - int pos_delim = argc; - for (int i = 0; i < argc; ++i) - { - if (strcmp(argv[i], "--") == 0) + char **p = &(*pargv)[1]; + + auto it = argv.begin() + 1; + for (; *it; ++it) + if (strcmp(*it, "--") == 0) { - pos_delim = i; + ++it; break; } - } + + while (*it) + if (strncmp(*it, "--", 2) != 0) + { + *(p++) = *it; + it = argv.erase(it); + } + else + ++it; + + *pargc = static_cast(p - &(*pargv)[0]); + *p = nullptr; /// Initialize clickhouse-local app fuzz_app.emplace(); - fuzz_app->init(pos_delim, argv); + fuzz_app->init(static_cast(argv.size() - 1), argv.data()); - /// We will leave clickhouse-local specific arguments as is, because libfuzzer will ignore - /// all keys starting with -- return 0; } extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) -try { - auto input = String(reinterpret_cast(data), size); - DB::FunctionGetFuzzerData::update(input); - fuzz_app->run(); + try + { + auto input = String(reinterpret_cast(data), size); + DB::FunctionGetFuzzerData::update(input); + fuzz_app->run(); + } + catch (...) + { + } + return 0; } -catch (...) -{ - return 1; -} #endif diff --git a/programs/main.cpp b/programs/main.cpp index 9a3ad47a86e..5857e8d5ee4 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -165,26 +165,6 @@ int printHelp(int, char **) std::cerr << "clickhouse " << application.first << " [args] " << std::endl; return -1; } - -bool isClickhouseApp(const std::string & app_suffix, std::vector & argv) -{ - /// Use app if the first arg 'app' is passed (the arg should be quietly removed) - if (argv.size() >= 2) - { - auto first_arg = argv.begin() + 1; - - /// 'clickhouse --client ...' and 'clickhouse client ...' are Ok - if (*first_arg == "--" + app_suffix || *first_arg == app_suffix) - { - argv.erase(first_arg); - return true; - } - } - - /// Use app if clickhouse binary is run through symbolic link with name clickhouse-app - std::string app_name = "clickhouse-" + app_suffix; - return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name)); -} #endif @@ -351,7 +331,7 @@ struct Checker ; -#if !defined(USE_MUSL) +#if !defined(FUZZING_MODE) && !defined(USE_MUSL) /// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. void checkHarmfulEnvironmentVariables(char ** argv) { @@ -407,6 +387,25 @@ void checkHarmfulEnvironmentVariables(char ** argv) } +bool isClickhouseApp(const std::string & app_suffix, std::vector & argv) +{ + /// Use app if the first arg 'app' is passed (the arg should be quietly removed) + if (argv.size() >= 2) + { + auto first_arg = argv.begin() + 1; + + /// 'clickhouse --client ...' and 'clickhouse client ...' are Ok + if (*first_arg == "--" + app_suffix || *first_arg == app_suffix) + { + argv.erase(first_arg); + return true; + } + } + + /// Use app if clickhouse binary is run through symbolic link with name clickhouse-app + std::string app_name = "clickhouse-" + app_suffix; + return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name)); +} /// Don't allow dlopen in the main ClickHouse binary, because it is harmful and insecure. /// We don't use it. But it can be used by some libraries for implementation of "plugins". @@ -466,6 +465,11 @@ int main(int argc_, char ** argv_) checkHarmfulEnvironmentVariables(argv_); #endif + /// This is used for testing. For example, + /// clickhouse-local should be able to run a simple query without throw/catch. + if (getenv("CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION")) // NOLINT(concurrency-mt-unsafe) + DB::terminate_on_any_exception = true; + /// Reset new handler to default (that throws std::bad_alloc) /// It is needed because LLVM library clobbers it. std::set_new_handler(nullptr); diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 3042ae2bb57..15997ec986e 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -365,17 +365,14 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI hash.update(seed); hash.update(i); + const auto checksum = getSipHash128AsArray(hash); if (size >= 16) { - char * hash_dst = reinterpret_cast(std::min(pos, end - 16)); - hash.get128(hash_dst); + auto * hash_dst = std::min(pos, end - 16); + memcpy(hash_dst, checksum.data(), checksum.size()); } else - { - char value[16]; - hash.get128(value); - memcpy(dst, value, end - dst); - } + memcpy(dst, checksum.data(), end - dst); pos += 16; ++i; @@ -393,7 +390,10 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI static void transformUUID(const UUID & src_uuid, UUID & dst_uuid, UInt64 seed) { - const UInt128 & src = src_uuid.toUnderType(); + auto src_copy = src_uuid; + transformEndianness(src_copy); + + const UInt128 & src = src_copy.toUnderType(); UInt128 & dst = dst_uuid.toUnderType(); SipHash hash; @@ -401,10 +401,11 @@ static void transformUUID(const UUID & src_uuid, UUID & dst_uuid, UInt64 seed) hash.update(reinterpret_cast(&src), sizeof(UUID)); /// Saving version and variant from an old UUID - hash.get128(reinterpret_cast(&dst)); + dst = hash.get128(); - dst.items[1] = (dst.items[1] & 0x1fffffffffffffffull) | (src.items[1] & 0xe000000000000000ull); - dst.items[0] = (dst.items[0] & 0xffffffffffff0fffull) | (src.items[0] & 0x000000000000f000ull); + const UInt64 trace[2] = {0x000000000000f000ull, 0xe000000000000000ull}; + UUIDHelpers::getLowBytes(dst_uuid) = (UUIDHelpers::getLowBytes(dst_uuid) & (0xffffffffffffffffull - trace[1])) | (UUIDHelpers::getLowBytes(src_uuid) & trace[1]); + UUIDHelpers::getHighBytes(dst_uuid) = (UUIDHelpers::getHighBytes(dst_uuid) & (0xffffffffffffffffull - trace[0])) | (UUIDHelpers::getHighBytes(src_uuid) & trace[0]); } class FixedStringModel : public IModel @@ -1301,18 +1302,14 @@ try if (structure.empty()) { - ReadBufferIterator read_buffer_iterator = [&](ColumnsDescription &) - { - auto file = std::make_unique(STDIN_FILENO); + auto file = std::make_unique(STDIN_FILENO); - /// stdin must be seekable - auto res = lseek(file->getFD(), 0, SEEK_SET); - if (-1 == res) - throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); - - return file; - }; + /// stdin must be seekable + auto res = lseek(file->getFD(), 0, SEEK_SET); + if (-1 == res) + throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + SingleReadBufferIterator read_buffer_iterator(std::move(file)); schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, false, context_const); } else diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 118610e4dcd..56373601b95 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -15,12 +15,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES validateODBCConnectionString.cpp ) -if (OS_LINUX) - # clickhouse-odbc-bridge is always a separate binary. - # Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") -endif () - clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) target_link_libraries(clickhouse-odbc-bridge PRIVATE diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index 855973d10e1..81440b03690 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -1,12 +1,8 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - set(CLICKHOUSE_SERVER_SOURCES MetricsTransmitter.cpp Server.cpp ) -set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") - set (CLICKHOUSE_SERVER_LINK PRIVATE clickhouse_aggregate_functions @@ -35,8 +31,6 @@ clickhouse_program_add(server) install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) -clickhouse_embed_binaries( - TARGET clickhouse_server_configs - RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js -) -add_dependencies(clickhouse-server-lib clickhouse_server_configs) +if (ENABLE_FUZZING) + add_subdirectory(fuzzers) +endif() diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8c0248580bd..e85ab82c2e1 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -128,6 +130,10 @@ # include #endif +#include +/// A minimal file used when the server is run without installation +INCBIN(resource_embedded_xml, SOURCE_DIR "/programs/server/embedded.xml"); + namespace CurrentMetrics { extern const Metric Revision; @@ -321,19 +327,18 @@ void Server::createServer( } catch (const Poco::Exception &) { - std::string message = "Listen [" + listen_host + "]:" + std::to_string(port) + " failed: " + getCurrentExceptionMessage(false); - if (listen_try) { - LOG_WARNING(&logger(), "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to " + LOG_WARNING(&logger(), "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, " + "then consider to " "specify not disabled IPv4 or IPv6 address to listen in element of configuration " "file. Example for disabled IPv6: 0.0.0.0 ." " Example for disabled IPv4: ::", - message); + listen_host, port, getCurrentExceptionMessage(false)); } else { - throw Exception::createDeprecated(message, ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false)); } } } @@ -385,7 +390,7 @@ int Server::run() } if (config().hasOption("version")) { - std::cout << DBMS_NAME << " server version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; + std::cout << VERSION_NAME << " server version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; return 0; } return Application::run(); // NOLINT @@ -393,6 +398,7 @@ int Server::run() void Server::initialize(Poco::Util::Application & self) { + ConfigProcessor::registerEmbeddedConfig("config.xml", std::string_view(reinterpret_cast(gresource_embedded_xmlData), gresource_embedded_xmlSize)); BaseDaemon::initialize(self); logger().information("starting up"); @@ -445,11 +451,11 @@ void checkForUsersNotInMainConfig( /// Unused in other builds #if defined(OS_LINUX) -static String readString(const String & path) +static String readLine(const String & path) { ReadBufferFromFile in(path); String contents; - readStringUntilEOF(contents, in); + readStringUntilNewlineInto(contents, in); return contents; } @@ -474,9 +480,16 @@ static void sanityChecks(Server & server) #if defined(OS_LINUX) try { + const std::unordered_set fastClockSources = { + // ARM clock + "arch_sys_counter", + // KVM guest clock + "kvm-clock", + // X86 clock + "tsc", + }; const char * filename = "/sys/devices/system/clocksource/clocksource0/current_clocksource"; - String clocksource = readString(filename); - if (clocksource.find("tsc") == std::string::npos && clocksource.find("kvm-clock") == std::string::npos) + if (!fastClockSources.contains(readLine(filename))) server.context()->addWarningMessage("Linux is not using a fast clock source. Performance can be degraded. Check " + String(filename)); } catch (...) @@ -496,7 +509,7 @@ static void sanityChecks(Server & server) try { const char * filename = "/sys/kernel/mm/transparent_hugepage/enabled"; - if (readString(filename).find("[always]") != std::string::npos) + if (readLine(filename).find("[always]") != std::string::npos) server.context()->addWarningMessage("Linux transparent hugepages are set to \"always\". Check " + String(filename)); } catch (...) @@ -653,10 +666,10 @@ try global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); #endif - const auto memory_amount = getMemoryAmount(); + const size_t physical_server_memory = getMemoryAmount(); LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.", - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(physical_server_memory), getNumberOfPhysicalCPUCores(), // on ARM processors it can show only enabled at current moment cores std::thread::hardware_concurrency()); @@ -742,6 +755,7 @@ try std::lock_guard lock(servers_lock); metrics.reserve(servers_to_start_before_tables.size() + servers.size()); + for (const auto & server : servers_to_start_before_tables) metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); @@ -871,6 +885,8 @@ try } } + FailPointInjection::enableFromGlobalConfig(config()); + int default_oom_score = 0; #if !defined(NDEBUG) @@ -1025,36 +1041,6 @@ try fs::create_directories(path / "metadata_dropped/"); } -#if USE_ROCKSDB - /// Initialize merge tree metadata cache - if (config().has("merge_tree_metadata_cache")) - { - fs::create_directories(path / "rocksdb/"); - size_t size = config().getUInt64("merge_tree_metadata_cache.lru_cache_size", 256 << 20); - bool continue_if_corrupted = config().getBool("merge_tree_metadata_cache.continue_if_corrupted", false); - try - { - LOG_DEBUG(log, "Initializing MergeTree metadata cache, lru_cache_size: {} continue_if_corrupted: {}", - ReadableSize(size), continue_if_corrupted); - global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size); - } - catch (...) - { - if (continue_if_corrupted) - { - /// Rename rocksdb directory and reinitialize merge tree metadata cache - time_t now = time(nullptr); - fs::rename(path / "rocksdb", path / ("rocksdb.old." + std::to_string(now))); - global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size); - } - else - { - throw; - } - } - } -#endif - if (config().has("interserver_http_port") && config().has("interserver_https_port")) throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "Both http and https interserver ports are specified"); @@ -1094,6 +1080,75 @@ try if (config().has("macros")) global_context->setMacros(std::make_unique(config(), "macros", log)); + /// Set up caches. + + const size_t max_cache_size = static_cast(physical_server_memory * server_settings.cache_size_to_ram_max_ratio); + + String uncompressed_cache_policy = server_settings.uncompressed_cache_policy; + size_t uncompressed_cache_size = server_settings.uncompressed_cache_size; + double uncompressed_cache_size_ratio = server_settings.uncompressed_cache_size_ratio; + if (uncompressed_cache_size > max_cache_size) + { + uncompressed_cache_size = max_cache_size; + LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio); + + String mark_cache_policy = server_settings.mark_cache_policy; + size_t mark_cache_size = server_settings.mark_cache_size; + double mark_cache_size_ratio = server_settings.mark_cache_size_ratio; + if (mark_cache_size > max_cache_size) + { + mark_cache_size = max_cache_size; + LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size)); + } + global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio); + + String index_uncompressed_cache_policy = server_settings.index_uncompressed_cache_policy; + size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size; + double index_uncompressed_cache_size_ratio = server_settings.index_uncompressed_cache_size_ratio; + if (index_uncompressed_cache_size > max_cache_size) + { + index_uncompressed_cache_size = max_cache_size; + LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio); + + String index_mark_cache_policy = server_settings.index_mark_cache_policy; + size_t index_mark_cache_size = server_settings.index_mark_cache_size; + double index_mark_cache_size_ratio = server_settings.index_mark_cache_size_ratio; + if (index_mark_cache_size > max_cache_size) + { + index_mark_cache_size = max_cache_size; + LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio); + + size_t mmap_cache_size = server_settings.mmap_cache_size; + if (mmap_cache_size > max_cache_size) + { + mmap_cache_size = max_cache_size; + LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setMMappedFileCache(mmap_cache_size); + + size_t query_cache_max_size_in_bytes = config().getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE); + size_t query_cache_max_entries = config().getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES); + size_t query_cache_query_cache_max_entry_size_in_bytes = config().getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES); + size_t query_cache_max_entry_size_in_rows = config().getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS); + if (query_cache_max_size_in_bytes > max_cache_size) + { + query_cache_max_size_in_bytes = max_cache_size; + LOG_INFO(log, "Lowered query cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows); + +#if USE_EMBEDDED_COMPILER + size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE); + size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES); + CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements); +#endif + /// Initialize main config reloader. std::string include_from_path = config().getString("include_from", "/etc/metrika.xml"); @@ -1106,8 +1161,10 @@ try const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); std::vector extra_paths = {include_from_path}; - if (!cert_path.empty()) extra_paths.emplace_back(cert_path); - if (!key_path.empty()) extra_paths.emplace_back(key_path); + if (!cert_path.empty()) + extra_paths.emplace_back(cert_path); + if (!key_path.empty()) + extra_paths.emplace_back(key_path); auto main_config_reloader = std::make_unique( config_path, @@ -1123,9 +1180,10 @@ try server_settings_.loadSettingsFromConfig(*config); size_t max_server_memory_usage = server_settings_.max_server_memory_usage; - double max_server_memory_usage_to_ram_ratio = server_settings_.max_server_memory_usage_to_ram_ratio; - size_t default_max_server_memory_usage = static_cast(memory_amount * max_server_memory_usage_to_ram_ratio); + + size_t current_physical_server_memory = getMemoryAmount(); /// With cgroups, the amount of memory available to the server can be changed dynamically. + size_t default_max_server_memory_usage = static_cast(current_physical_server_memory * max_server_memory_usage_to_ram_ratio); if (max_server_memory_usage == 0) { @@ -1133,7 +1191,7 @@ try LOG_INFO(log, "Setting max_server_memory_usage was set to {}" " ({} available * {:.2f} max_server_memory_usage_to_ram_ratio)", formatReadableSizeWithBinarySuffix(max_server_memory_usage), - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(current_physical_server_memory), max_server_memory_usage_to_ram_ratio); } else if (max_server_memory_usage > default_max_server_memory_usage) @@ -1144,7 +1202,7 @@ try " calculated as {} available" " * {:.2f} max_server_memory_usage_to_ram_ratio", formatReadableSizeWithBinarySuffix(max_server_memory_usage), - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(current_physical_server_memory), max_server_memory_usage_to_ram_ratio); } @@ -1154,14 +1212,14 @@ try size_t merges_mutations_memory_usage_soft_limit = server_settings_.merges_mutations_memory_usage_soft_limit; - size_t default_merges_mutations_server_memory_usage = static_cast(memory_amount * server_settings_.merges_mutations_memory_usage_to_ram_ratio); + size_t default_merges_mutations_server_memory_usage = static_cast(current_physical_server_memory * server_settings_.merges_mutations_memory_usage_to_ram_ratio); if (merges_mutations_memory_usage_soft_limit == 0) { merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage; LOG_INFO(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)", formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit), - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(current_physical_server_memory), server_settings_.merges_mutations_memory_usage_to_ram_ratio); } else if (merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage) @@ -1170,7 +1228,7 @@ try LOG_WARNING(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)", formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit), - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(current_physical_server_memory), server_settings_.merges_mutations_memory_usage_to_ram_ratio); } @@ -1310,7 +1368,14 @@ try global_context->updateStorageConfiguration(*config); global_context->updateInterserverCredentials(*config); + + global_context->updateUncompressedCacheConfiguration(*config); + global_context->updateMarkCacheConfiguration(*config); + global_context->updateIndexUncompressedCacheConfiguration(*config); + global_context->updateIndexMarkCacheConfiguration(*config); + global_context->updateMMappedFileCacheConfiguration(*config); global_context->updateQueryCacheConfiguration(*config); + CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs"); #if USE_SSL CertificateReloader::instance().tryLoad(*config); @@ -1449,77 +1514,52 @@ try access_control.reload(AccessControl::ReloadMode::USERS_CONFIG_ONLY); }); + global_context->setStopServersCallback([&](const ServerType & server_type) + { + stopServers(servers, server_type); + }); + + global_context->setStartServersCallback([&](const ServerType & server_type) + { + createServers( + config(), + listen_hosts, + listen_try, + server_pool, + async_metrics, + servers, + /* start_servers= */ true, + server_type); + }); + /// Limit on total number of concurrently executed queries. global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries); - /// Set up caches. - - size_t max_cache_size = static_cast(memory_amount * server_settings.cache_size_to_ram_max_ratio); - - String uncompressed_cache_policy = server_settings.uncompressed_cache_policy; - LOG_INFO(log, "Uncompressed cache policy name {}", uncompressed_cache_policy); - size_t uncompressed_cache_size = server_settings.uncompressed_cache_size; - if (uncompressed_cache_size > max_cache_size) - { - uncompressed_cache_size = max_cache_size; - LOG_INFO(log, "Uncompressed cache size was lowered to {} because the system has low amount of memory", - formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); - } - global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size); - /// Load global settings from default_profile and system_profile. global_context->setDefaultProfiles(config()); - const Settings & settings = global_context->getSettingsRef(); /// Initialize background executors after we load default_profile config. /// This is needed to load proper values of background_pool_size etc. global_context->initializeBackgroundExecutorsIfNeeded(); - if (settings.async_insert_threads) + if (server_settings.async_insert_threads) + { global_context->setAsynchronousInsertQueue(std::make_shared( global_context, - settings.async_insert_threads)); - - size_t mark_cache_size = server_settings.mark_cache_size; - String mark_cache_policy = server_settings.mark_cache_policy; - if (!mark_cache_size) - LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation."); - if (mark_cache_size > max_cache_size) - { - mark_cache_size = max_cache_size; - LOG_INFO(log, "Mark cache size was lowered to {} because the system has low amount of memory", - formatReadableSizeWithBinarySuffix(mark_cache_size)); + server_settings.async_insert_threads, + server_settings.async_insert_queue_flush_on_shutdown)); } - global_context->setMarkCache(mark_cache_policy, mark_cache_size); - - if (server_settings.index_uncompressed_cache_size) - global_context->setIndexUncompressedCache(server_settings.index_uncompressed_cache_size); - - if (server_settings.index_mark_cache_size) - global_context->setIndexMarkCache(server_settings.index_mark_cache_size); - - if (server_settings.mmap_cache_size) - global_context->setMMappedFileCache(server_settings.mmap_cache_size); - - /// A cache for query results. - global_context->setQueryCache(config()); - -#if USE_EMBEDDED_COMPILER - /// 128 MB - constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128; - size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default); - - constexpr size_t compiled_expression_cache_elements_size_default = 10000; - size_t compiled_expression_cache_elements_size = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default); - - CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size); -#endif /// Set path for format schema files fs::path format_schema_path(config().getString("format_schema_path", path / "format_schemas/")); global_context->setFormatSchemaPath(format_schema_path); fs::create_directories(format_schema_path); + /// Set path for filesystem caches + fs::path filesystem_caches_path(config().getString("filesystem_caches_path", "")); + if (!filesystem_caches_path.empty()) + global_context->setFilesystemCachesPath(filesystem_caches_path); + /// Check sanity of MergeTreeSettings on server startup { size_t background_pool_tasks = global_context->getMergeMutateExecutor()->getMaxTasksCount(); @@ -1617,6 +1657,10 @@ try database_catalog.initializeAndLoadTemporaryDatabase(); loadMetadataSystem(global_context); maybeConvertSystemDatabase(global_context); + /// This has to be done before the initialization of system logs, + /// otherwise there is a race condition between the system database initialization + /// and creation of new tables in the database. + startupSystemTables(); /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); @@ -1635,7 +1679,6 @@ try /// Then, load remaining databases loadMetadata(global_context, default_database); convertDatabasesEnginesIfNeed(global_context); - startupSystemTables(); database_catalog.startupBackgroundCleanup(); /// After loading validate that default database exists database_catalog.assertDatabaseExists(default_database); @@ -1658,17 +1701,26 @@ try global_context->initializeTraceCollector(); /// Set up server-wide memory profiler (for total memory tracker). - UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0); - if (total_memory_profiler_step) + if (server_settings.total_memory_profiler_step) { - total_memory_tracker.setProfilerStep(total_memory_profiler_step); + total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step); } - double total_memory_tracker_sample_probability = config().getDouble("total_memory_tracker_sample_probability", 0); - if (total_memory_tracker_sample_probability > 0.0) + if (server_settings.total_memory_tracker_sample_probability > 0.0) { - total_memory_tracker.setSampleProbability(total_memory_tracker_sample_probability); + total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability); } + + if (server_settings.total_memory_profiler_sample_min_allocation_size) + { + total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size); + } + + if (server_settings.total_memory_profiler_sample_max_allocation_size) + { + total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size); + } + } #endif @@ -1988,7 +2040,8 @@ void Server::createServers( Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, std::vector & servers, - bool start_servers) + bool start_servers, + const ServerType & server_type) { const Settings & settings = global_context->getSettingsRef(); @@ -2002,24 +2055,26 @@ void Server::createServers( for (const auto & protocol : protocols) { + if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol)) + continue; + + std::string prefix = "protocols." + protocol + "."; + std::string port_name = prefix + "port"; + std::string description {" protocol"}; + if (config.has(prefix + "description")) + description = config.getString(prefix + "description"); + + if (!config.has(prefix + "port")) + continue; + std::vector hosts; - if (config.has("protocols." + protocol + ".host")) - hosts.push_back(config.getString("protocols." + protocol + ".host")); + if (config.has(prefix + "host")) + hosts.push_back(config.getString(prefix + "host")); else hosts = listen_hosts; for (const auto & host : hosts) { - std::string conf_name = "protocols." + protocol; - std::string prefix = conf_name + "."; - - if (!config.has(prefix + "port")) - continue; - - std::string description {" protocol"}; - if (config.has(prefix + "description")) - description = config.getString(prefix + "description"); - std::string port_name = prefix + "port"; bool is_secure = false; auto stack = buildProtocolStackFromConfig(config, protocol, http_params, async_metrics, is_secure); @@ -2048,162 +2103,190 @@ void Server::createServers( for (const auto & listen_host : listen_hosts) { - /// HTTP - const char * port_name = "http_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); + const char * port_name; - return ProtocolServerAdapter( - listen_host, - port_name, - "http://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); - }); - - /// HTTPS - port_name = "https_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::HTTP)) { + /// HTTP + port_name = "http_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + + return ProtocolServerAdapter( + listen_host, + port_name, + "http://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); + }); + } + + if (server_type.shouldStart(ServerType::Type::HTTPS)) + { + /// HTTPS + port_name = "https_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { #if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "https://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "https://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); #else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support."); + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support."); #endif - }); + }); + } - /// TCP - port_name = "tcp_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::TCP)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "native protocol (tcp): " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); + /// TCP + port_name = "tcp_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "native protocol (tcp): " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + } - /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt - port_name = "tcp_with_proxy_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "native protocol (tcp) with PROXY: " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); + /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt + port_name = "tcp_with_proxy_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "native protocol (tcp) with PROXY: " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + } - /// TCP with SSL - port_name = "tcp_port_secure"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::TCP_SECURE)) { -#if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "secure native protocol (tcp_secure): " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), - server_pool, - socket, - new Poco::Net::TCPServerParams)); -#else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); -#endif - }); + /// TCP with SSL + port_name = "tcp_port_secure"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + #if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "secure native protocol (tcp_secure): " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + #else + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); + #endif + }); + } - port_name = "mysql_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::MYSQL)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(Poco::Timespan()); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "MySQL compatibility protocol: " + address.toString(), - std::make_unique(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); - }); + port_name = "mysql_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "MySQL compatibility protocol: " + address.toString(), + std::make_unique(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + }); + } - port_name = "postgresql_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::POSTGRESQL)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(Poco::Timespan()); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "PostgreSQL compatibility protocol: " + address.toString(), - std::make_unique(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); - }); + port_name = "postgresql_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "PostgreSQL compatibility protocol: " + address.toString(), + std::make_unique(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + }); + } #if USE_GRPC - port_name = "grpc_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::GRPC)) { - Poco::Net::SocketAddress server_address(listen_host, port); - return ProtocolServerAdapter( - listen_host, - port_name, - "gRPC protocol: " + server_address.toString(), - std::make_unique(*this, makeSocketAddress(listen_host, port, &logger()))); - }); + port_name = "grpc_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::SocketAddress server_address(listen_host, port); + return ProtocolServerAdapter( + listen_host, + port_name, + "gRPC protocol: " + server_address.toString(), + std::make_unique(*this, makeSocketAddress(listen_host, port, &logger()))); + }); + } #endif - - /// Prometheus (if defined and not setup yet with http_port) - port_name = "prometheus.port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::PROMETHEUS)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "Prometheus: http://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); - }); + /// Prometheus (if defined and not setup yet with http_port) + port_name = "prometheus.port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "Prometheus: http://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); + }); + } } } @@ -2214,7 +2297,8 @@ void Server::createInterserverServers( Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, std::vector & servers, - bool start_servers) + bool start_servers, + const ServerType & server_type) { const Settings & settings = global_context->getSettingsRef(); @@ -2226,52 +2310,97 @@ void Server::createInterserverServers( /// Now iterate over interserver_listen_hosts for (const auto & interserver_listen_host : interserver_listen_hosts) { - /// Interserver IO HTTP - const char * port_name = "interserver_http_port"; - createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, interserver_listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - interserver_listen_host, - port_name, - "replica communication (interserver): http://" + address.toString(), - std::make_unique( - httpContext(), - createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), - server_pool, - socket, - http_params)); - }); + const char * port_name; - port_name = "interserver_https_port"; - createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP)) { + /// Interserver IO HTTP + port_name = "interserver_http_port"; + createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, interserver_listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + interserver_listen_host, + port_name, + "replica communication (interserver): http://" + address.toString(), + std::make_unique( + httpContext(), + createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), + server_pool, + socket, + http_params)); + }); + } + + if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS)) + { + port_name = "interserver_https_port"; + createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { #if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - interserver_listen_host, - port_name, - "secure replica communication (interserver): https://" + address.toString(), - std::make_unique( - httpContext(), - createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"), - server_pool, - socket, - http_params)); + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + interserver_listen_host, + port_name, + "secure replica communication (interserver): https://" + address.toString(), + std::make_unique( + httpContext(), + createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"), + server_pool, + socket, + http_params)); #else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif - }); + }); + } } } +void Server::stopServers( + std::vector & servers, + const ServerType & server_type +) const +{ + Poco::Logger * log = &logger(); + + /// Remove servers once all their connections are closed + auto check_server = [&log](const char prefix[], auto & server) + { + if (!server.isStopping()) + return false; + size_t current_connections = server.currentConnections(); + LOG_DEBUG(log, "Server {}{}: {} ({} connections)", + server.getDescription(), + prefix, + !current_connections ? "finished" : "waiting", + current_connections); + return !current_connections; + }; + + std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)")); + + for (auto & server : servers) + { + if (!server.isStopping()) + { + const std::string server_port_name = server.getPortName(); + + if (server_type.shouldStop(server_port_name)) + server.stop(); + } + } + + std::erase_if(servers, std::bind_front(check_server, "")); +} + void Server::updateServers( Poco::Util::AbstractConfiguration & config, Poco::ThreadPool & server_pool, diff --git a/programs/server/Server.h b/programs/server/Server.h index d13378dcd65..3f03dd137ef 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -3,8 +3,9 @@ #include #include -#include "Server/HTTP/HTTPContext.h" +#include #include +#include #include /** Server provides three interfaces: @@ -106,7 +107,8 @@ private: Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, std::vector & servers, - bool start_servers = false); + bool start_servers = false, + const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL)); void createInterserverServers( Poco::Util::AbstractConfiguration & config, @@ -115,7 +117,8 @@ private: Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, std::vector & servers, - bool start_servers = false); + bool start_servers = false, + const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL)); void updateServers( Poco::Util::AbstractConfiguration & config, @@ -123,6 +126,11 @@ private: AsynchronousMetrics & async_metrics, std::vector & servers, std::vector & servers_to_start_before_tables); + + void stopServers( + std::vector & servers, + const ServerType & server_type + ) const; }; } diff --git a/programs/server/config.d/clusters.xml b/programs/server/config.d/clusters.xml new file mode 120000 index 00000000000..97289377736 --- /dev/null +++ b/programs/server/config.d/clusters.xml @@ -0,0 +1 @@ +../../../tests/config/config.d/clusters.xml \ No newline at end of file diff --git a/programs/server/config.xml b/programs/server/config.xml index 2a7dc1e576a..117be72d758 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -317,7 +317,7 @@ 0 - 100 + 1000 10000 + false + /var/lib/clickhouse/ @@ -1026,6 +1028,14 @@ 7500 + + 1048576 + + 8192 + + 524288 + + false @@ -1039,6 +1049,11 @@ toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + + false @@ -1084,7 +1111,11 @@ system metric_log
7500 + 1048576 + 8192 + 524288 1000 + false @@ -1151,6 +1198,10 @@ toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false + - - int needs explicit cast /// 2. customized types needs explicit cast template - enable_if_not_field_or_bool_or_stringlike_t & /// NOLINT + requires not_field_or_bool_or_stringlike + Field & /// NOLINT operator=(T && rhs); Field & operator= (bool rhs) @@ -449,7 +447,7 @@ public: Types::Which getType() const { return which; } - constexpr std::string_view getTypeName() const { return magic_enum::enum_name(which); } + std::string_view getTypeName() const; bool isNull() const { return which == Types::Null; } template @@ -839,7 +837,7 @@ template <> struct Field::EnumToType { using Type = Dec template <> struct Field::EnumToType { using Type = DecimalField; }; template <> struct Field::EnumToType { using Type = DecimalField; }; template <> struct Field::EnumToType { using Type = DecimalField; }; -template <> struct Field::EnumToType { using Type = DecimalField; }; +template <> struct Field::EnumToType { using Type = AggregateFunctionStateData; }; template <> struct Field::EnumToType { using Type = CustomType; }; template <> struct Field::EnumToType { using Type = UInt64; }; @@ -872,7 +870,7 @@ NearestFieldType> & Field::get() // Disregard signedness when converting between int64 types. constexpr Field::Types::Which target = TypeToEnum::value; if (target != which - && (!isInt64OrUInt64orBoolFieldType(target) || !isInt64OrUInt64orBoolFieldType(which))) + && (!isInt64OrUInt64orBoolFieldType(target) || !isInt64OrUInt64orBoolFieldType(which)) && target != Field::Types::IPv4) throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Field get from type {} to type {}", which, target); #endif @@ -897,14 +895,16 @@ auto & Field::safeGet() template -Field::Field(T && rhs, enable_if_not_field_or_bool_or_stringlike_t) +requires not_field_or_bool_or_stringlike +Field::Field(T && rhs) { auto && val = castToNearestFieldType(std::forward(rhs)); createConcrete(std::forward(val)); } template -Field::enable_if_not_field_or_bool_or_stringlike_t & /// NOLINT +requires not_field_or_bool_or_stringlike +Field & /// NOLINT Field::operator=(T && rhs) { auto && val = castToNearestFieldType(std::forward(rhs)); @@ -1005,8 +1005,7 @@ void writeFieldText(const Field & x, WriteBuffer & buf); String toString(const Field & x); -String fieldTypeToString(Field::Types::Which type); - +std::string_view fieldTypeToString(Field::Types::Which type); } template <> diff --git a/src/Core/MultiEnum.h b/src/Core/MultiEnum.h index 32aae93c6d5..36b69b1b41b 100644 --- a/src/Core/MultiEnum.h +++ b/src/Core/MultiEnum.h @@ -12,9 +12,9 @@ struct MultiEnum MultiEnum() = default; - template ...>>> - constexpr explicit MultiEnum(EnumValues ... v) - : MultiEnum((toBitFlag(v) | ... | 0u)) + template + requires std::conjunction_v...> + constexpr explicit MultiEnum(EnumValues... v) : MultiEnum((toBitFlag(v) | ... | 0u)) {} template diff --git a/src/Core/MySQL/MySQLCharset.cpp b/src/Core/MySQL/MySQLCharset.cpp index 869941ebd84..787e4edcf88 100644 --- a/src/Core/MySQL/MySQLCharset.cpp +++ b/src/Core/MySQL/MySQLCharset.cpp @@ -1,6 +1,5 @@ #include "MySQLCharset.h" #include "config.h" -#include #include #if USE_ICU diff --git a/src/Core/MySQL/MySQLGtid.cpp b/src/Core/MySQL/MySQLGtid.cpp index 5cbc826d0d0..2b46c3d14ad 100644 --- a/src/Core/MySQL/MySQLGtid.cpp +++ b/src/Core/MySQL/MySQLGtid.cpp @@ -174,8 +174,8 @@ String GTIDSets::toPayload() const for (const auto & set : sets) { // MySQL UUID is big-endian. - writeBinaryBigEndian(set.uuid.toUnderType().items[0], buffer); - writeBinaryBigEndian(set.uuid.toUnderType().items[1], buffer); + writeBinaryBigEndian(UUIDHelpers::getHighBytes(set.uuid), buffer); + writeBinaryBigEndian(UUIDHelpers::getLowBytes(set.uuid), buffer); UInt64 intervals_size = set.intervals.size(); buffer.write(reinterpret_cast(&intervals_size), 8); diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index dcb407daa90..3042ae44a3d 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -940,13 +940,8 @@ namespace MySQLReplication payload.readStrict(reinterpret_cast(&commit_flag), 1); // MySQL UUID is big-endian. - UInt64 high = 0UL; - UInt64 low = 0UL; - readBigEndianStrict(payload, reinterpret_cast(&low), 8); - gtid.uuid.toUnderType().items[0] = low; - - readBigEndianStrict(payload, reinterpret_cast(&high), 8); - gtid.uuid.toUnderType().items[1] = high; + readBinaryBigEndian(UUIDHelpers::getHighBytes(gtid.uuid), payload); + readBinaryBigEndian(UUIDHelpers::getLowBytes(gtid.uuid), payload); payload.readStrict(reinterpret_cast(>id.seq_no), 8); diff --git a/src/Core/MySQL/MySQLReplication.h b/src/Core/MySQL/MySQLReplication.h index e4287e8769b..1584dbd42ac 100644 --- a/src/Core/MySQL/MySQLReplication.h +++ b/src/Core/MySQL/MySQLReplication.h @@ -33,8 +33,10 @@ namespace MySQLReplication inline void readBigEndianStrict(ReadBuffer & payload, char * to, size_t n) { payload.readStrict(to, n); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ char *start = to, *end = to + n; std::reverse(start, end); +#endif } inline void readTimeFractionalPart(ReadBuffer & payload, UInt32 & factional, UInt16 meta) diff --git a/src/Core/PostgreSQL/Connection.h b/src/Core/PostgreSQL/Connection.h index 96cc19babea..efc10b6ed20 100644 --- a/src/Core/PostgreSQL/Connection.h +++ b/src/Core/PostgreSQL/Connection.h @@ -47,6 +47,8 @@ public: void tryUpdateConnection(); + bool isConnected() const { return connection != nullptr && connection->is_open(); } + const ConnectionInfo & getConnectionInfo() { return connection_info; } String getInfoForLog() const { return connection_info.host_port; } diff --git a/src/Core/PostgreSQL/ConnectionHolder.h b/src/Core/PostgreSQL/ConnectionHolder.h index 16803c823ba..ad311f9cc2f 100644 --- a/src/Core/PostgreSQL/ConnectionHolder.h +++ b/src/Core/PostgreSQL/ConnectionHolder.h @@ -28,10 +28,25 @@ public: ConnectionHolder(const ConnectionHolder & other) = delete; + void setBroken() { is_broken = true; } + ~ConnectionHolder() { if (auto_close) + { connection.reset(); + } + else if (is_broken) + { + try + { + connection->getRef().reset(); + } + catch (...) + { + connection.reset(); + } + } pool->returnObject(std::move(connection)); } @@ -49,6 +64,7 @@ private: PoolPtr pool; ConnectionPtr connection; bool auto_close; + bool is_broken = false; }; using ConnectionHolderPtr = std::unique_ptr; diff --git a/src/Core/PostgreSQLProtocol.h b/src/Core/PostgreSQLProtocol.h index 8c0654b559f..b0d7646a5f7 100644 --- a/src/Core/PostgreSQLProtocol.h +++ b/src/Core/PostgreSQLProtocol.h @@ -805,20 +805,9 @@ protected: const String & user_name, const String & password, Session & session, - Messaging::MessageTransport & mt, const Poco::Net::SocketAddress & address) { - try - { - session.authenticate(user_name, password, address); - } - catch (const Exception &) - { - mt.send( - Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "28P01", "Invalid user or password"), - true); - throw; - } + session.authenticate(user_name, password, address); } public: @@ -839,10 +828,10 @@ public: void authenticate( const String & user_name, Session & session, - Messaging::MessageTransport & mt, + [[maybe_unused]] Messaging::MessageTransport & mt, const Poco::Net::SocketAddress & address) override { - return setPassword(user_name, "", session, mt, address); + return setPassword(user_name, "", session, address); } AuthenticationType getType() const override @@ -866,7 +855,7 @@ public: if (type == Messaging::FrontMessageType::PASSWORD_MESSAGE) { std::unique_ptr password = mt.receive(); - return setPassword(user_name, password->password, session, mt, address); + return setPassword(user_name, password->password, session, address); } else throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, @@ -901,20 +890,30 @@ public: Messaging::MessageTransport & mt, const Poco::Net::SocketAddress & address) { - const AuthenticationType user_auth_type = session.getAuthenticationTypeOrLogInFailure(user_name); - if (type_to_method.find(user_auth_type) != type_to_method.end()) + AuthenticationType user_auth_type; + try { - type_to_method[user_auth_type]->authenticate(user_name, session, mt, address); - mt.send(Messaging::AuthenticationOk(), true); - LOG_DEBUG(log, "Authentication for user {} was successful.", user_name); - return; + user_auth_type = session.getAuthenticationTypeOrLogInFailure(user_name); + if (type_to_method.find(user_auth_type) != type_to_method.end()) + { + type_to_method[user_auth_type]->authenticate(user_name, session, mt, address); + mt.send(Messaging::AuthenticationOk(), true); + LOG_DEBUG(log, "Authentication for user {} was successful.", user_name); + return; + } + } + catch (const Exception&) + { + mt.send(Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "28P01", "Invalid user or password"), + true); + + throw; } - mt.send( - Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "0A000", "Authentication method is not supported"), - true); + mt.send(Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "0A000", "Authentication method is not supported"), + true); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Authentication type {} is not supported.", user_auth_type); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Authentication method is not supported: {}", user_auth_type); } }; } diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index 97a2831ffe8..7f2b3cfa26a 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -81,7 +81,7 @@ namespace Protocol /// This is such an inverted logic, where server sends requests /// And client returns back response ProfileEvents = 14, /// Packet with profile events from server. - MergeTreeAllRangesAnnounecement = 15, + MergeTreeAllRangesAnnouncement = 15, MergeTreeReadTaskRequest = 16, /// Request from a MergeTree replica to a coordinator TimezoneUpdate = 17, /// Receive server's (session-wide) default timezone MAX = TimezoneUpdate, @@ -110,7 +110,7 @@ namespace Protocol "PartUUIDs", "ReadTaskRequest", "ProfileEvents", - "MergeTreeAllRangesAnnounecement", + "MergeTreeAllRangesAnnouncement", "MergeTreeReadTaskRequest", "TimezoneUpdate", }; diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index dbe13d9502a..9897f314aa8 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -46,15 +46,6 @@ #define DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION 54454 -/// Version of ClickHouse TCP protocol. -/// -/// Should be incremented manually on protocol changes. -/// -/// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, -/// later is just a number for server version (one number instead of commit SHA) -/// for simplicity (sometimes it may be more convenient in some use cases). -#define DBMS_TCP_PROTOCOL_VERSION 54464 - #define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449 #define DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS_IN_INSERT 54456 @@ -77,3 +68,14 @@ #define DBMS_MIN_PROTOCOL_VERSION_WITH_TOTAL_BYTES_IN_PROGRESS 54463 #define DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES 54464 + +#define DBMS_MIN_REVISION_WITH_SPARSE_SERIALIZATION 54465 + +/// Version of ClickHouse TCP protocol. +/// +/// Should be incremented manually on protocol changes. +/// +/// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, +/// later is just a number for server version (one number instead of commit SHA) +/// for simplicity (sometimes it may be more convenient in some use cases). +#define DBMS_TCP_PROTOCOL_VERSION 54465 diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp index 2b4ee6485bc..fbf86d3e9ad 100644 --- a/src/Core/ServerSettings.cpp +++ b/src/Core/ServerSettings.cpp @@ -25,7 +25,7 @@ void ServerSettings::loadSettingsFromConfig(const Poco::Util::AbstractConfigurat "max_remote_write_network_bandwidth_for_server", }; - for (auto setting : all()) + for (const auto & setting : all()) { const auto & name = setting.getName(); if (config.has(name)) diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 1a9f226041b..7e346f3596c 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -2,6 +2,7 @@ #include +#include namespace Poco::Util @@ -38,29 +39,38 @@ namespace DB M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \ M(Int32, max_connections, 1024, "Max server connections.", 0) \ M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \ - M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating asynchronous metrics.", 0) \ + M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \ M(String, default_database, "default", "Default database name.", 0) \ M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \ M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \ M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \ - M(UInt64, max_server_memory_usage, 0, "Limit on total memory usage. Zero means Unlimited.", 0) \ - M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to ram ratio. Allows to lower max memory on low-memory systems.", 0) \ - M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Limit on total memory usage for merges and mutations. Zero means Unlimited.", 0) \ - M(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to ram ratio. Allows to lower memory limit on low-memory systems.", 0) \ + M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \ + M(UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0) \ + M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0) \ + M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0) \ + M(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0) \ M(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \ + M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ + M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \ \ - M(UInt64, max_concurrent_queries, 0, "Limit on total number of concurrently executed queries. Zero means Unlimited.", 0) \ - M(UInt64, max_concurrent_insert_queries, 0, "Limit on total number of concurrently insert queries. Zero means Unlimited.", 0) \ - M(UInt64, max_concurrent_select_queries, 0, "Limit on total number of concurrently select queries. Zero means Unlimited.", 0) \ + M(UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0) \ + M(UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0) \ + M(UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0) \ \ - M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro ram max ratio. Allows to lower cache size on low-memory systems.", 0) \ - M(String, uncompressed_cache_policy, "SLRU", "Uncompressed cache policy name.", 0) \ - M(UInt64, uncompressed_cache_size, 0, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \ - M(UInt64, mark_cache_size, 5368709120, "Size of cache for marks (index of MergeTree family of tables).", 0) \ - M(String, mark_cache_policy, "SLRU", "Mark cache policy name.", 0) \ - M(UInt64, index_uncompressed_cache_size, 0, "Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.", 0) \ - M(UInt64, index_mark_cache_size, 0, "Size of cache for index marks. Zero means disabled.", 0) \ - M(UInt64, mmap_cache_size, 1000, "A cache for mmapped files.", 0) /* The choice of default is arbitrary. */ \ + M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \ + M(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \ + M(UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \ + M(Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0) \ + M(String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0) \ + M(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \ + M(Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0) \ + M(String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Index uncompressed cache policy name.", 0) \ + M(UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.", 0) \ + M(Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the index uncompressed cache relative to the cache's total size.", 0) \ + M(String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Index mark cache policy name.", 0) \ + M(UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for index marks. Zero means disabled.", 0) \ + M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the index mark cache relative to the cache's total size.", 0) \ + M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \ \ M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \ M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \ @@ -68,7 +78,7 @@ namespace DB \ M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \ M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ - M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means Unlimited.", 0) \ + M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \ M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \ \ M(UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0) \ @@ -81,8 +91,13 @@ namespace DB M(UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \ M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \ M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \ - M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) - + M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \ + \ + M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \ + M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ + M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ + M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ + M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS) diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 1e8db920745..ed0868eaa4d 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -87,7 +87,7 @@ void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfigura return; Settings settings; - for (auto setting : settings.all()) + for (const auto & setting : settings.all()) { const auto & name = setting.getName(); if (config.has(name) && !setting.isObsolete()) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 24be644ee55..c5633856ade 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -45,7 +45,9 @@ class IColumn; M(UInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. Values 0 or 1 means that INSERT SELECT is not run in parallel. Higher values will lead to higher memory usage. Parallel INSERT SELECT has effect only if the SELECT part is run on parallel, see 'max_threads' setting.", 0) \ M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \ M(MaxThreads, max_final_threads, 0, "The maximum number of threads to read from table with FINAL.", 0) \ + M(UInt64, max_threads_for_indexes, 0, "The maximum number of threads process indices.", 0) \ M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \ + M(Bool, use_concurrency_control, true, "Respect the server's concurrency control (see the `concurrent_threads_soft_limit_num` and `concurrent_threads_soft_limit_ratio_to_cores` global server settings). If disabled, it allows using a larger number of threads even if the server is overloaded (not recommended for normal usage, and needed mostly for tests).", 0) \ M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \ M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \ M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \ @@ -78,7 +80,7 @@ class IColumn; M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \ M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \ - M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor.", 0) \ + M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.", 0) \ M(UInt64, s3_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited. You ", 0) \ M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \ M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \ @@ -104,6 +106,7 @@ class IColumn; M(UInt64, s3_retry_attempts, 10, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ M(UInt64, s3_request_timeout_ms, 3000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \ + M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \ M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \ M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \ M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \ @@ -259,7 +262,7 @@ class IColumn; \ M(Float, memory_tracker_fault_probability, 0., "For testing of `exception safety` - throw an exception every time you allocate memory with the specified probability.", 0) \ \ - M(Bool, enable_http_compression, false, "Compress the result if the client over HTTP said that it understands data compressed by gzip or deflate.", 0) \ + M(Bool, enable_http_compression, false, "Compress the result if the client over HTTP said that it understands data compressed by gzip, deflate, zstd, br, lz4, bz2, xz.", 0) \ M(Int64, http_zlib_compression_level, 3, "Compression level - used if the client on HTTP said that it understands data compressed by gzip or deflate.", 0) \ \ M(Bool, http_native_compression_disable_checksumming_on_decompress, false, "If you uncompress the POST data from the client compressed by the native format, do not check the checksum.", 0) \ @@ -268,7 +271,7 @@ class IColumn; \ M(Bool, add_http_cors_header, false, "Write add http CORS header.", 0) \ \ - M(UInt64, max_http_get_redirects, 0, "Max number of http GET redirects hops allowed. Make sure additional security measures are in place to prevent a malicious server to redirect your requests to unexpected services.", 0) \ + M(UInt64, max_http_get_redirects, 0, "Max number of http GET redirects hops allowed. Ensures additional security measures are in place to prevent a malicious server to redirect your requests to unexpected services.\n\nIt is the case when an external server redirects to another address, but that address appears to be internal to the company's infrastructure, and by sending an HTTP request to an internal server, you could request an internal API from the internal network, bypassing the auth, or even query other services, such as Redis or Memcached. When you don't have an internal infrastructure (including something running on your localhost), or you trust the server, it is safe to allow redirects. Although keep in mind, that if the URL uses HTTP instead of HTTPS, and you will have to trust not only the remote server but also your ISP and every network in the middle.", 0) \ \ M(Bool, use_client_time_zone, false, "Use client timezone for interpreting DateTime string values, instead of adopting server timezone.", 0) \ \ @@ -305,6 +308,12 @@ class IColumn; M(Bool, final, false, "Query with the FINAL modifier by default. If the engine does not support final, it does not have any effect. On queries with multiple tables final is applied only on those that support it. It also works on distributed tables", 0) \ \ M(Bool, partial_result_on_first_cancel, false, "Allows query to return a partial result after cancel.", 0) \ + \ + M(Milliseconds, partial_result_update_duration_ms, 0, "Interval (in milliseconds) for sending updates with partial data about the result table to the client (in interactive mode) during query execution. Setting to 0 disables partial results. Only supported for single-threaded GROUP BY without key, ORDER BY, LIMIT and OFFSET.", 0) \ + M(UInt64, max_rows_in_partial_result, 10, "Maximum rows to show in the partial result after every real-time update while the query runs (use partial result limit + OFFSET as a value in case of OFFSET in the query).", 0) \ + \ + M(Bool, ignore_on_cluster_for_replicated_udf_queries, false, "Ignore ON CLUSTER clause for replicated UDF management queries.", 0) \ + M(Bool, ignore_on_cluster_for_replicated_access_entities_queries, false, "Ignore ON CLUSTER clause for replicated access entities management queries.", 0) \ /** Settings for testing hedged requests */ \ M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \ M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \ @@ -386,6 +395,8 @@ class IColumn; M(UInt64, max_temporary_columns, 0, "If a query generates more than the specified number of temporary columns in memory as a result of intermediate calculation, exception is thrown. Zero value means unlimited. This setting is useful to prevent too complex queries.", 0) \ M(UInt64, max_temporary_non_const_columns, 0, "Similar to the 'max_temporary_columns' setting but applies only to non-constant columns. This makes sense, because constant columns are cheap and it is reasonable to allow more of them.", 0) \ \ + M(UInt64, max_sessions_for_user, 0, "Maximum number of simultaneous sessions for a user.", 0) \ + \ M(UInt64, max_subquery_depth, 100, "If a query has more than specified number of nested subqueries, throw an exception. This allows you to have a sanity check to protect the users of your cluster from going insane with their queries.", 0) \ M(UInt64, max_analyze_depth, 5000, "Maximum number of analyses performed by interpreter.", 0) \ M(UInt64, max_ast_depth, 1000, "Maximum depth of query syntax tree. Checked after parsing.", 0) \ @@ -427,7 +438,9 @@ class IColumn; M(UInt64, memory_overcommit_ratio_denominator_for_user, 1_GiB, "It represents soft memory limit on the global level. This value is used to compute query overcommit ratio.", 0) \ M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \ M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \ - M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ + M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ + M(UInt64, memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ + M(UInt64, memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ M(Bool, trace_profile_events, false, "Send to system.trace_log profile event and value of increment on each increment with 'ProfileEvent' trace_type", 0) \ \ M(UInt64, memory_usage_overcommit_max_wait_microseconds, 5'000'000, "Maximum time thread will wait for memory to be freed in the case of memory overcommit. If timeout is reached and memory is not freed, exception is thrown.", 0) \ @@ -495,6 +508,7 @@ class IColumn; M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in functions 'formatDateTime()' and 'parseDateTime()' produces the month name instead of minutes.", 0) \ \ M(UInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.", 0) \ + M(Bool, throw_on_max_partitions_per_insert_block, true, "Used with max_partitions_per_insert_block. If true (default), an exception will be thrown when max_partitions_per_insert_block is reached. If false, details of the insert query reaching this limit with the number of partitions will be logged. This can be useful if you're trying to understand the impact on users when changing max_partitions_per_insert_block.", 0) \ M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited.", 0) \ M(Bool, check_query_single_value_result, true, "Return check query result as single 1/0 value", 0) \ M(Bool, allow_drop_detached, false, "Allow ALTER TABLE ... DROP DETACHED PART[ITION] ... queries", 0) \ @@ -525,16 +539,18 @@ class IColumn; M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \ M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \ M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \ + M(Bool, optimize_count_from_files, true, "Optimize counting rows from files in supported input formats", 0) \ + M(Bool, use_cache_for_count_from_files, true, "Use cache to count the number of rows in files", 0) \ M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \ M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ + M(Bool, rewrite_count_distinct_if_with_count_distinct_implementation, false, "Rewrite countDistinctIf with count_distinct_implementation configuration", 0) \ M(Bool, convert_query_to_cnf, false, "Convert SELECT query to CNF", 0) \ M(Bool, optimize_or_like_chain, false, "Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases.", 0) \ M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \ - M(Bool, optimize_duplicate_order_by_and_distinct, false, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \ M(Bool, optimize_redundant_functions_in_order_by, true, "Remove functions from ORDER BY if its argument is also in ORDER BY", 0) \ M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ @@ -585,6 +601,9 @@ class IColumn; M(Bool, insert_null_as_default, true, "Insert DEFAULT values instead of NULL in INSERT SELECT (UNION ALL)", 0) \ M(Bool, describe_extend_object_types, false, "Deduce concrete type of columns of type Object in DESCRIBE query", 0) \ M(Bool, describe_include_subcolumns, false, "If true, subcolumns of all table columns will be included into result of DESCRIBE query", 0) \ + M(Bool, mutations_execute_nondeterministic_on_initiator, false, "If true nondeterministic function are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \ + M(Bool, mutations_execute_subqueries_on_initiator, false, "If true scalar subqueries are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \ + M(UInt64, mutations_max_literal_size_to_replace, 16384, "The maximum size of serialized literal in bytes to replace in UPDATE and DELETE queries", 0) \ \ M(Bool, use_query_cache, false, "Enable the query cache", 0) \ M(Bool, enable_writes_to_query_cache, true, "Enable storing results of SELECT queries in the query cache", 0) \ @@ -623,6 +642,7 @@ class IColumn; M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \ M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \ M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \ + M(Bool, enable_url_encoding, true, " Allows to enable/disable decoding/encoding path in uri in URL table engine", 0) \ M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \ M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \ M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \ @@ -630,7 +650,7 @@ class IColumn; M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \ M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \ M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \ - M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \ + M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw'", 0) \ M(UInt64, distributed_ddl_entry_format_version, 5, "Compatibility version of distributed DDL (ON CLUSTER) queries", 0) \ \ M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \ @@ -659,7 +679,8 @@ class IColumn; M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \ M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \ \ - M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ + M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function `range` per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ + M(UInt64, function_sleep_max_microseconds_per_block, 3000000, "Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::pread, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \ @@ -673,8 +694,8 @@ class IColumn; M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \ M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) \ M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \ + M(Bool, merge_tree_determine_task_size_by_prewhere_columns, true, "Whether to use only prewhere columns size to determine reading task size.", 0) \ \ - M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \ M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \ M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \ @@ -694,15 +715,15 @@ class IColumn; \ M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \ M(Bool, enable_filesystem_read_prefetches_log, false, "Log to system.filesystem prefetch_log during query. Should be used only for testing or debugging, not recommended to be turned on by default", 0) \ - M(Bool, allow_prefetched_read_pool_for_remote_filesystem, false, "Prefer prefethed threadpool if all parts are on remote filesystem", 0) \ + M(Bool, allow_prefetched_read_pool_for_remote_filesystem, true, "Prefer prefethed threadpool if all parts are on remote filesystem", 0) \ M(Bool, allow_prefetched_read_pool_for_local_filesystem, false, "Prefer prefethed threadpool if all parts are on remote filesystem", 0) \ \ M(UInt64, prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the prefetch buffer to read from the filesystem.", 0) \ M(UInt64, filesystem_prefetch_step_bytes, 0, "Prefetch step in bytes. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task", 0) \ M(UInt64, filesystem_prefetch_step_marks, 0, "Prefetch step in marks. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task", 0) \ M(UInt64, filesystem_prefetch_min_bytes_for_single_read_task, "8Mi", "Do not parallelize within one file read less than this amount of bytes. E.g. one reader will not receive a read task of size less than this amount. This setting is recommended to avoid spikes of time for aws getObject requests to aws", 0) \ - M(UInt64, filesystem_prefetch_max_memory_usage, "1Gi", "Maximum memory usage for prefetches. Zero means unlimited", 0) \ - M(UInt64, filesystem_prefetches_limit, 0, "Maximum number of prefetches. Zero means unlimited. A setting `filesystem_prefetches_max_memory_usage` is more recommended if you want to limit the number of prefetches", 0) \ + M(UInt64, filesystem_prefetch_max_memory_usage, "1Gi", "Maximum memory usage for prefetches.", 0) \ + M(UInt64, filesystem_prefetches_limit, 200, "Maximum number of prefetches. Zero means unlimited. A setting `filesystem_prefetches_max_memory_usage` is more recommended if you want to limit the number of prefetches", 0) \ \ M(UInt64, use_structure_from_insertion_table_in_table_functions, 2, "Use structure from insertion table instead of schema inference from data. Possible values: 0 - disabled, 1 - enabled, 2 - auto", 0) \ \ @@ -757,6 +778,7 @@ class IColumn; M(Bool, function_json_value_return_type_allow_nullable, false, "Allow function JSON_VALUE to return nullable type.", 0) \ M(Bool, function_json_value_return_type_allow_complex, false, "Allow function JSON_VALUE to return complex type, such as: struct, array, map.", 0) \ M(Bool, use_with_fill_by_sorting_prefix, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently", 0) \ + M(Bool, optimize_uniq_to_count, true, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \ \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ @@ -764,6 +786,8 @@ class IColumn; M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ + M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \ + M(Bool, allow_experimental_s3queue, false, "Allows to use S3Queue engine. Disabled by default, because this feature is experimental", 0) \ M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \ M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \ @@ -777,6 +801,7 @@ class IColumn; M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \ M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\ + M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0)\ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. @@ -807,7 +832,6 @@ class IColumn; MAKE_OBSOLETE(M, UInt64, merge_tree_clear_old_parts_interval_seconds, 1) \ MAKE_OBSOLETE(M, UInt64, partial_merge_join_optimizations, 0) \ MAKE_OBSOLETE(M, MaxThreads, max_alter_threads, 0) \ - MAKE_OBSOLETE(M, Bool, allow_experimental_query_cache, true) \ /* moved to config.xml: see also src/Core/ServerSettings.h */ \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_buffer_flush_schedule_pool_size, 16) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_pool_size, 16) \ @@ -820,6 +844,7 @@ class IColumn; MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_distributed_schedule_pool_size, 16) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_read_network_bandwidth_for_server, 0) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_write_network_bandwidth_for_server, 0) \ + MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, async_insert_threads, 16) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0) \ /* ---- */ \ @@ -831,6 +856,10 @@ class IColumn; MAKE_OBSOLETE(M, Seconds, drain_timeout, 3) \ MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \ MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \ + MAKE_OBSOLETE(M, Bool, input_format_arrow_import_nested, false) \ + MAKE_OBSOLETE(M, Bool, input_format_parquet_import_nested, false) \ + MAKE_OBSOLETE(M, Bool, input_format_orc_import_nested, false) \ + MAKE_OBSOLETE(M, Bool, optimize_duplicate_order_by_and_distinct, false) \ /** The section above is for obsolete settings. Do not add anything there. */ @@ -851,17 +880,17 @@ class IColumn; M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices.", 0) \ M(Bool, input_format_null_as_default, true, "Initialize null fields with default values if the data type of this field is not nullable and it is supported by the input format", 0) \ - M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \ - M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \ M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \ M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \ - M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \ M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \ M(Bool, input_format_parquet_preserve_order, false, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \ + M(Bool, input_format_parquet_filter_push_down, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata.", 0) \ M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \ M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \ + M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \ M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \ + M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, "Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format", 0) \ M(Bool, input_format_arrow_allow_missing_columns, false, "Allow missing columns while reading Arrow input formats", 0) \ M(Char, input_format_hive_text_fields_delimiter, '\x01', "Delimiter between fields in Hive Text File", 0) \ M(Char, input_format_hive_text_collection_items_delimiter, '\x02', "Delimiter between collection(array or map) items in Hive Text File", 0) \ @@ -876,6 +905,10 @@ class IColumn; M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \ M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \ M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialization failed on bad value", 0) \ + M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \ + M(Bool, input_format_tsv_allow_variable_number_of_columns, false, "Ignore extra columns in TSV input (if file has more columns than expected) and treat missing fields in TSV input as default values", 0) \ + M(Bool, input_format_custom_allow_variable_number_of_columns, false, "Ignore extra columns in CustomSeparated input (if file has more columns than expected) and treat missing fields in CustomSeparated input as default values", 0) \ + M(Bool, input_format_json_compact_allow_variable_number_of_columns, false, "Ignore extra columns in JSONCompact(EachRow) input (if file has more columns than expected) and treat missing fields in JSONCompact(EachRow) input as default values", 0) \ M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \ M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \ M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \ @@ -898,6 +931,7 @@ class IColumn; M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ + M(Bool, output_format_markdown_escape_special_characters, false, "Escape special characters in Markdown", 0) \ M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \ M(Bool, output_format_protobuf_nullables_with_google_wrappers, false, "When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized", 0) \ M(UInt64, input_format_csv_skip_first_lines, 0, "Skip specified number of lines at the beginning of data in CSV format", 0) \ @@ -951,6 +985,10 @@ class IColumn; M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \ M(ParquetCompression, output_format_parquet_compression_method, "lz4", "Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)", 0) \ M(Bool, output_format_parquet_compliant_nested_types, true, "In parquet file schema, use name 'element' instead of 'item' for list elements. This is a historical artifact of Arrow library implementation. Generally increases compatibility, except perhaps with some old versions of Arrow.", 0) \ + M(Bool, output_format_parquet_use_custom_encoder, false, "Use a faster Parquet encoder implementation.", 0) \ + M(Bool, output_format_parquet_parallel_encoding, true, "Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder.", 0) \ + M(UInt64, output_format_parquet_data_page_size, 1024 * 1024, "Target page size in bytes, before compression.", 0) \ + M(UInt64, output_format_parquet_batch_size, 1024, "Check page size every this many rows. Consider decreasing if you have columns with average values size above a few KBs.", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ @@ -1000,6 +1038,10 @@ class IColumn; \ M(CapnProtoEnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::CapnProtoEnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \ \ + M(Bool, format_capn_proto_use_autogenerated_schema, true, "Use autogenerated CapnProto schema when format_schema is not set", 0) \ + M(Bool, format_protobuf_use_autogenerated_schema, true, "Use autogenerated Protobuf when format_schema is not set", 0) \ + M(String, output_format_schema, "", "The path to the file where the automatically generated schema will be saved", 0) \ + \ M(String, input_format_mysql_dump_table_name, "", "Name of the table in MySQL dump from which to read data", 0) \ M(Bool, input_format_mysql_dump_map_column_names, true, "Match columns from table in MySQL dump and columns from ClickHouse table by names", 0) \ \ @@ -1014,9 +1056,11 @@ class IColumn; \ M(Bool, format_display_secrets_in_show_and_select, false, "Do not hide secrets in SHOW and SELECT queries.", IMPORTANT) \ M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \ + M(Bool, regexp_dict_flag_case_insensitive, false, "Use case-insensitive matching for a regexp_tree dictionary. Can be overridden in individual expressions with (?i) and (?-i).", 0) \ + M(Bool, regexp_dict_flag_dotall, false, "Allow '.' to match newline characters for a regexp_tree dictionary.", 0) \ \ M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \ - M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \ + M(Bool, precise_float_parsing, false, "Prefer more precise (but slower) float parsing algorithm", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. @@ -1032,7 +1076,7 @@ DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(SettingsTraits, LIST_OF_SETTINGS) /** Settings of query execution. * These settings go to users.xml. */ -struct Settings : public BaseSettings, public IHints<2, Settings> +struct Settings : public BaseSettings, public IHints<2> { Settings() = default; diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 2886cdd288d..77a8476ffc8 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,6 +80,9 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"23.8", {{"output_format_markdown_escape_special_characters", false, true, "Escape special characters in Markdown"}}}, + {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, + {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 86400954e2f..a30d8040f47 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -138,7 +138,6 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS, IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, {{"clickhouse", Dialect::clickhouse}, - {"kusto", Dialect::kusto}, {"kusto", Dialect::kusto}, {"prql", Dialect::prql}}) // FIXME: do not add 'kusto_auto' to the list. Maybe remove it from code completely? @@ -175,4 +174,19 @@ IMPLEMENT_SETTING_ENUM(ORCCompression, ErrorCodes::BAD_ARGUMENTS, {"zlib", FormatSettings::ORCCompression::ZLIB}, {"lz4", FormatSettings::ORCCompression::LZ4}}) +IMPLEMENT_SETTING_ENUM(S3QueueMode, ErrorCodes::BAD_ARGUMENTS, + {{"ordered", S3QueueMode::ORDERED}, + {"unordered", S3QueueMode::UNORDERED}}) + +IMPLEMENT_SETTING_ENUM(S3QueueAction, ErrorCodes::BAD_ARGUMENTS, + {{"keep", S3QueueAction::KEEP}, + {"delete", S3QueueAction::DELETE}}) + +IMPLEMENT_SETTING_ENUM(ExternalCommandStderrReaction, ErrorCodes::BAD_ARGUMENTS, + {{"none", ExternalCommandStderrReaction::NONE}, + {"log", ExternalCommandStderrReaction::LOG}, + {"log_first", ExternalCommandStderrReaction::LOG_FIRST}, + {"log_last", ExternalCommandStderrReaction::LOG_LAST}, + {"throw", ExternalCommandStderrReaction::THROW}}) + } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index c61afbd2bbf..034e4c8c887 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -221,4 +222,23 @@ enum class ParallelReplicasCustomKeyFilterType : uint8_t DECLARE_SETTING_ENUM(ParallelReplicasCustomKeyFilterType) DECLARE_SETTING_ENUM(LocalFSReadMethod) + +enum class S3QueueMode +{ + ORDERED, + UNORDERED, +}; + +DECLARE_SETTING_ENUM(S3QueueMode) + +enum class S3QueueAction +{ + KEEP, + DELETE, +}; + +DECLARE_SETTING_ENUM(S3QueueAction) + +DECLARE_SETTING_ENUM(ExternalCommandStderrReaction) + } diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 9b8677a9888..80197cfbe22 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -337,7 +337,7 @@ void SettingFieldString::readBinary(ReadBuffer & in) /// that. The linker does not complain only because clickhouse-keeper does not call any of below /// functions. A cleaner alternative would be more modular libraries, e.g. one for data types, which /// could then be linked by the server and the linker. -#ifndef CLICKHOUSE_PROGRAM_STANDALONE_BUILD +#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD SettingFieldMap::SettingFieldMap(const Field & f) : value(fieldToMap(f)) {} diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 32fffd3af06..99f280d3641 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -10,6 +10,8 @@ #include #include #include +#include + namespace DB { @@ -245,7 +247,7 @@ struct SettingFieldString void readBinary(ReadBuffer & in); }; -#ifdef CLICKHOUSE_PROGRAM_STANDALONE_BUILD +#ifdef CLICKHOUSE_KEEPER_STANDALONE_BUILD #define NORETURN [[noreturn]] #else #define NORETURN @@ -426,9 +428,8 @@ constexpr auto getEnumValues() auto it = map.find(value); \ if (it != map.end()) \ return it->second; \ - throw Exception::createDeprecated( \ - "Unexpected value of " #NEW_NAME ":" + std::to_string(std::underlying_type::type(value)), \ - ERROR_CODE_FOR_UNEXPECTED_NAME); \ + throw Exception(ERROR_CODE_FOR_UNEXPECTED_NAME, \ + "Unexpected value of " #NEW_NAME ":{}", std::to_string(std::underlying_type::type(value))); \ } \ \ typename SettingField##NEW_NAME::EnumType SettingField##NEW_NAME##Traits::fromString(std::string_view str) \ @@ -442,7 +443,7 @@ constexpr auto getEnumValues() auto it = map.find(str); \ if (it != map.end()) \ return it->second; \ - String msg = "Unexpected value of " #NEW_NAME ": '" + String{str} + "'. Must be one of ["; \ + String msg; \ bool need_comma = false; \ for (auto & name : map | boost::adaptors::map_keys) \ { \ @@ -450,8 +451,7 @@ constexpr auto getEnumValues() msg += ", "; \ msg += "'" + String{name} + "'"; \ } \ - msg += "]"; \ - throw Exception::createDeprecated(msg, ERROR_CODE_FOR_UNEXPECTED_NAME); \ + throw Exception(ERROR_CODE_FOR_UNEXPECTED_NAME, "Unexpected value of " #NEW_NAME ": '{}'. Must be one of [{}]", String{str}, msg); \ } // Mostly like SettingFieldEnum, but can have multiple enum values (or none) set at once. diff --git a/src/Core/SettingsQuirks.cpp b/src/Core/SettingsQuirks.cpp index 37a0f2db3e2..1a79c23d955 100644 --- a/src/Core/SettingsQuirks.cpp +++ b/src/Core/SettingsQuirks.cpp @@ -72,12 +72,6 @@ void applySettingsQuirks(Settings & settings, Poco::Logger * log) } } -#if defined(THREAD_SANITIZER) - settings.use_hedged_requests.value = false; - if (log) - LOG_WARNING(log, "use_hedged_requests has been disabled for the build with Thread Sanitizer, because they are using fibers, leading to a failed assertion inside TSan"); -#endif - if (!queryProfilerWorks()) { if (settings.query_profiler_real_time_period_ns) diff --git a/src/Core/SortDescription.cpp b/src/Core/SortDescription.cpp index ae6aedf202d..9ba7df8ef24 100644 --- a/src/Core/SortDescription.cpp +++ b/src/Core/SortDescription.cpp @@ -133,8 +133,7 @@ void compileSortDescriptionIfNeeded(SortDescription & description, const DataTyp SipHash sort_description_dump_hash; sort_description_dump_hash.update(description_dump); - UInt128 sort_description_hash_key; - sort_description_dump_hash.get128(sort_description_hash_key); + const auto sort_description_hash_key = sort_description_dump_hash.get128(); { std::lock_guard lock(mutex); diff --git a/src/Core/UUID.cpp b/src/Core/UUID.cpp index ef1e10f5063..10350964f50 100644 --- a/src/Core/UUID.cpp +++ b/src/Core/UUID.cpp @@ -9,10 +9,11 @@ namespace UUIDHelpers { UUID generateV4() { - UInt128 res{thread_local_rng(), thread_local_rng()}; - res.items[0] = (res.items[0] & 0xffffffffffff0fffull) | 0x0000000000004000ull; - res.items[1] = (res.items[1] & 0x3fffffffffffffffull) | 0x8000000000000000ull; - return UUID{res}; + UUID uuid; + getHighBytes(uuid) = (thread_local_rng() & 0xffffffffffff0fffull) | 0x0000000000004000ull; + getLowBytes(uuid) = (thread_local_rng() & 0x3fffffffffffffffull) | 0x8000000000000000ull; + + return uuid; } } diff --git a/src/Core/UUID.h b/src/Core/UUID.h index a24dcebdc9e..2bdefe9d3fc 100644 --- a/src/Core/UUID.h +++ b/src/Core/UUID.h @@ -2,6 +2,59 @@ #include +/** + * Implementation Details + * ^^^^^^^^^^^^^^^^^^^^^^ + * The underlying implementation for a UUID has it represented as a 128-bit unsigned integer. Underlying this, a wide + * integer with a 64-bit unsigned integer as its base is utilized. This wide integer can be interfaced with as an array + * to access different components of the base. For example, on a Little Endian platform, accessing at index 0 will give + * you the 8 higher bytes, and index 1 will give you the 8 lower bytes. On a Big Endian platform, this is reversed where + * index 0 will give you the 8 lower bytes, and index 1 will give you the 8 higher bytes. + * + * uuid.toUnderType().items[0] + * + * // uint64_t uint64_t + * // [xxxxxxxx] [ ] + * + * uuid.toUnderType().items[1] + * + * // uint64_t uint64_t + * // [ ] [xxxxxxxx] + * + * The way that data is stored in the underlying wide integer treats the data as two 64-bit chunks sequenced in the + * array. On a Little Endian platform, this results in the following layout + * + * // Suppose uuid contains 61f0c404-5cb3-11e7-907b-a6006ad3dba0 + * + * uuid.toUnderType().items[0] + * + * // uint64_t as HEX + * // [E7 11 B3 5C 04 C4 F0 61] [A0 DB D3 6A 00 A6 7B 90] + * // ^^^^^^^^^^^^^^^^^^^^^^^ + * + * uuid.toUnderType().items[1] + * + * // uint64_t as HEX + * // [E7 11 B3 5C 04 C4 F0 61] [A0 DB D3 6A 00 A6 7B 90] + * // ^^^^^^^^^^^^^^^^^^^^^^^ + * + * while on a Big Endian platform this would be + * + * // Suppose uuid contains 61f0c404-5cb3-11e7-907b-a6006ad3dba0 + * + * uuid.toUnderType().items[0] + * + * // uint64_t as HEX + * // [90 7B A6 00 6A D3 DB A0] [61 F0 C4 04 5C B3 11 E7] + * // ^^^^^^^^^^^^^^^^^^^^^^^ + * + * uuid.toUnderType().items[1] + * + * // uint64_t as HEX + * // [90 7B A6 00 6A D3 DB A0] [61 F0 C4 04 5C B3 11 E7] + * // ^^^^^^^^^^^^^^^^^^^^^^^ +*/ + namespace DB { @@ -11,6 +64,29 @@ namespace UUIDHelpers /// Generate random UUID. UUID generateV4(); + constexpr size_t HighBytes = (std::endian::native == std::endian::little) ? 0 : 1; + constexpr size_t LowBytes = (std::endian::native == std::endian::little) ? 1 : 0; + + inline uint64_t getHighBytes(const UUID & uuid) + { + return uuid.toUnderType().items[HighBytes]; + } + + inline uint64_t & getHighBytes(UUID & uuid) + { + return uuid.toUnderType().items[HighBytes]; + } + + inline uint64_t getLowBytes(const UUID & uuid) + { + return uuid.toUnderType().items[LowBytes]; + } + + inline uint64_t & getLowBytes(UUID & uuid) + { + return uuid.toUnderType().items[LowBytes]; + } + const UUID Nil{}; } diff --git a/src/Core/examples/CMakeLists.txt b/src/Core/examples/CMakeLists.txt index 868173e0e31..2326eada96d 100644 --- a/src/Core/examples/CMakeLists.txt +++ b/src/Core/examples/CMakeLists.txt @@ -9,6 +9,3 @@ target_link_libraries (string_ref_hash PRIVATE clickhouse_common_io) clickhouse_add_executable (mysql_protocol mysql_protocol.cpp) target_link_libraries (mysql_protocol PRIVATE dbms) - -clickhouse_add_executable (coro coro.cpp) -target_link_libraries (coro PRIVATE clickhouse_common_io) diff --git a/src/Core/examples/coro.cpp b/src/Core/examples/coro.cpp deleted file mode 100644 index fbccc261e9d..00000000000 --- a/src/Core/examples/coro.cpp +++ /dev/null @@ -1,194 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#if defined(__clang__) -#include - -namespace std // NOLINT(cert-dcl58-cpp) -{ - using namespace experimental::coroutines_v1; // NOLINT(cert-dcl58-cpp) -} - -#if __has_warning("-Wdeprecated-experimental-coroutine") -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wdeprecated-experimental-coroutine" -#endif - -#else -#include -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif - - -template -struct suspend_value // NOLINT(readability-identifier-naming) -{ - constexpr bool await_ready() const noexcept { return true; } // NOLINT(readability-identifier-naming) - constexpr void await_suspend(std::coroutine_handle<>) const noexcept {} // NOLINT(readability-identifier-naming) - constexpr T await_resume() const noexcept // NOLINT(readability-identifier-naming) - { - std::cout << " ret " << val << std::endl; - return val; - } - - T val; -}; - -template -struct Task -{ - struct promise_type // NOLINT(readability-identifier-naming) - { - using coro_handle = std::coroutine_handle; - auto get_return_object() { return coro_handle::from_promise(*this); } // NOLINT(readability-identifier-naming) - auto initial_suspend() { return std::suspend_never(); } // NOLINT(readability-identifier-naming) - auto final_suspend() noexcept { return suspend_value{*r->value}; } // NOLINT(readability-identifier-naming) - //void return_void() {} - void return_value(T value_) { r->value = value_; } // NOLINT(readability-identifier-naming) - void unhandled_exception() // NOLINT(readability-identifier-naming) - { - DB::tryLogCurrentException("Logger"); - r->exception = std::current_exception(); // NOLINT(bugprone-throw-keyword-missing) - } - - explicit promise_type(std::string tag_) : tag(tag_) {} - ~promise_type() { std::cout << "~promise_type " << tag << std::endl; } - std::string tag; - coro_handle next; - Task * r = nullptr; - }; - - using coro_handle = std::coroutine_handle; - - bool await_ready() const noexcept { return false; } // NOLINT(readability-identifier-naming) - void await_suspend(coro_handle g) noexcept // NOLINT(readability-identifier-naming) - { - std::cout << " await_suspend " << my.promise().tag << std::endl; - std::cout << " g tag " << g.promise().tag << std::endl; - g.promise().next = my; - } - T await_resume() noexcept // NOLINT(readability-identifier-naming) - { - std::cout << " await_res " << my.promise().tag << std::endl; - return *value; - } - - Task(coro_handle handle) : my(handle), tag(handle.promise().tag) // NOLINT(google-explicit-constructor) - { - assert(handle); - my.promise().r = this; - std::cout << " Task " << tag << std::endl; - } - Task(Task &) = delete; - Task(Task &&rhs) noexcept : my(rhs.my), tag(rhs.tag) - { - rhs.my = {}; - std::cout << " Task&& " << tag << std::endl; - } - static bool resumeImpl(Task *r) - { - if (r->value) - return false; - - auto & next = r->my.promise().next; - - if (next) - { - if (resumeImpl(next.promise().r)) - return true; - next = {}; - } - - if (!r->value) - { - r->my.resume(); - if (r->exception) - std::rethrow_exception(r->exception); - } - return !r->value; - } - - bool resume() - { - return resumeImpl(this); - } - - T res() - { - return *value; - } - - ~Task() - { - std::cout << " ~Task " << tag << std::endl; - } - -private: - coro_handle my; - std::string tag; - std::optional value; - std::exception_ptr exception; -}; - -Task boo([[maybe_unused]] std::string tag) -{ - std::cout << "x" << std::endl; - co_await std::suspend_always(); - std::cout << StackTrace().toString(); - std::cout << "y" << std::endl; - co_return 1; -} - -Task bar([[maybe_unused]] std::string tag) -{ - std::cout << "a" << std::endl; - int res1 = co_await boo("boo1"); - std::cout << "b " << res1 << std::endl; - int res2 = co_await boo("boo2"); - if (res2 == 1) - throw DB::Exception(1, "hello"); - std::cout << "c " << res2 << std::endl; - co_return res1 + res2; // 1 + 1 = 2 -} - -Task foo([[maybe_unused]] std::string tag) -{ - std::cout << "Hello" << std::endl; - auto res1 = co_await bar("bar1"); - std::cout << "Coro " << res1 << std::endl; - auto res2 = co_await bar("bar2"); - std::cout << "World " << res2 << std::endl; - co_return res1 * res2; // 2 * 2 = 4 -} - -int main() -{ - Poco::AutoPtr app_channel(new Poco::ConsoleChannel(std::cerr)); - Poco::Logger::root().setChannel(app_channel); - Poco::Logger::root().setLevel("trace"); - - LOG_INFO(&Poco::Logger::get(""), "Starting"); - - try - { - auto t = foo("foo"); - std::cout << ".. started" << std::endl; - while (t.resume()) - std::cout << ".. yielded" << std::endl; - std::cout << ".. done: " << t.res() << std::endl; - } - catch (DB::Exception & e) - { - std::cout << "Got exception " << e.what() << std::endl; - std::cout << e.getStackTraceString() << std::endl; - } -} diff --git a/src/Core/fuzzers/CMakeLists.txt b/src/Core/fuzzers/CMakeLists.txt index 269217392e7..51db6fa0b53 100644 --- a/src/Core/fuzzers/CMakeLists.txt +++ b/src/Core/fuzzers/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (names_and_types_fuzzer names_and_types_fuzzer.cpp) -target_link_libraries (names_and_types_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) +target_link_libraries (names_and_types_fuzzer PRIVATE dbms) diff --git a/src/Core/fuzzers/names_and_types_fuzzer.cpp b/src/Core/fuzzers/names_and_types_fuzzer.cpp index cc4a2920c66..6fdd8703014 100644 --- a/src/Core/fuzzers/names_and_types_fuzzer.cpp +++ b/src/Core/fuzzers/names_and_types_fuzzer.cpp @@ -1,18 +1,18 @@ -#include #include #include extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) -try { - DB::ReadBufferFromMemory in(data, size); - DB::NamesAndTypesList res; - res.readText(in); + try + { + DB::ReadBufferFromMemory in(data, size); + DB::NamesAndTypesList res; + res.readText(in); + } + catch (...) + { + } return 0; } -catch (...) -{ - return 1; -} diff --git a/src/Core/tests/gtest_settings.cpp b/src/Core/tests/gtest_settings.cpp index cbeb84ef2e7..a6d8763bfb8 100644 --- a/src/Core/tests/gtest_settings.cpp +++ b/src/Core/tests/gtest_settings.cpp @@ -121,7 +121,7 @@ GTEST_TEST(SettingMySQLDataTypesSupport, SetString) ASSERT_EQ(Field("decimal,datetime64"), setting); // comma with spaces - setting = " datetime64 , decimal "; + setting = " datetime64 , decimal "; /// bad punctuation is ok here ASSERT_TRUE(setting.changed); ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); @@ -166,4 +166,3 @@ GTEST_TEST(SettingMySQLDataTypesSupport, SetInvalidString) ASSERT_TRUE(setting.changed); ASSERT_EQ(0, setting.value.getValue()); } - diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 3852ec5ada5..be323dc6786 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -38,7 +38,6 @@ #include #include -#include #include #include #include @@ -467,6 +466,10 @@ private: if (collectCrashLog) collectCrashLog(sig, thread_num, query_id, stack_trace); +#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD + Context::getGlobalContextInstance()->handleCrash(); +#endif + /// Send crash report to developers (if configured) if (sig != SanitizerTrap) { @@ -498,7 +501,7 @@ private: } /// ClickHouse Keeper does not link to some part of Settings. -#ifndef CLICKHOUSE_PROGRAM_STANDALONE_BUILD +#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// List changed settings. if (!query_id.empty()) { diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index 7aa1e8ad1a0..952cf61d8e0 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -2,7 +2,6 @@ #include #include -#include #include #include #include diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp index e38d339d088..81ab103be02 100644 --- a/src/Daemon/SentryWriter.cpp +++ b/src/Daemon/SentryWriter.cpp @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -13,12 +12,11 @@ #include #include #include -#include #include "config.h" #include "config_version.h" -#if USE_SENTRY && !defined(CLICKHOUSE_PROGRAM_STANDALONE_BUILD) +#if USE_SENTRY && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) # include # include diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index b580ecc9592..be60886d74b 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -117,6 +117,33 @@ Field DataTypeAggregateFunction::getDefault() const return field; } +bool DataTypeAggregateFunction::strictEquals(const DataTypePtr & lhs_state_type, const DataTypePtr & rhs_state_type) +{ + const auto * lhs_state = typeid_cast(lhs_state_type.get()); + const auto * rhs_state = typeid_cast(rhs_state_type.get()); + + if (!lhs_state || !rhs_state) + return false; + + if (lhs_state->function->getName() != rhs_state->function->getName()) + return false; + + if (lhs_state->parameters.size() != rhs_state->parameters.size()) + return false; + + for (size_t i = 0; i < lhs_state->parameters.size(); ++i) + if (lhs_state->parameters[i] != rhs_state->parameters[i]) + return false; + + if (lhs_state->argument_types.size() != rhs_state->argument_types.size()) + return false; + + for (size_t i = 0; i < lhs_state->argument_types.size(); ++i) + if (!lhs_state->argument_types[i]->equals(*rhs_state->argument_types[i])) + return false; + + return true; +} bool DataTypeAggregateFunction::equals(const IDataType & rhs) const { @@ -126,34 +153,7 @@ bool DataTypeAggregateFunction::equals(const IDataType & rhs) const auto lhs_state_type = function->getNormalizedStateType(); auto rhs_state_type = typeid_cast(rhs).function->getNormalizedStateType(); - if (typeid(lhs_state_type.get()) != typeid(rhs_state_type.get())) - return false; - - if (const auto * lhs_state = typeid_cast(lhs_state_type.get())) - { - const auto & rhs_state = typeid_cast(*rhs_state_type); - - if (lhs_state->function->getName() != rhs_state.function->getName()) - return false; - - if (lhs_state->parameters.size() != rhs_state.parameters.size()) - return false; - - for (size_t i = 0; i < lhs_state->parameters.size(); ++i) - if (lhs_state->parameters[i] != rhs_state.parameters[i]) - return false; - - if (lhs_state->argument_types.size() != rhs_state.argument_types.size()) - return false; - - for (size_t i = 0; i < lhs_state->argument_types.size(); ++i) - if (!lhs_state->argument_types[i]->equals(*rhs_state.argument_types[i])) - return false; - - return true; - } - - return lhs_state_type->equals(*rhs_state_type); + return strictEquals(lhs_state_type, rhs_state_type); } diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index 83c9f10f407..6331c23222f 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -60,6 +60,7 @@ public: Field getDefault() const override; + static bool strictEquals(const DataTypePtr & lhs_state_type, const DataTypePtr & rhs_state_type); bool equals(const IDataType & rhs) const override; bool isParametric() const override { return true; } diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp index b2c712b2f36..e31f10046b7 100644 --- a/src/DataTypes/DataTypeArray.cpp +++ b/src/DataTypes/DataTypeArray.cpp @@ -11,6 +11,7 @@ #include #include +#include namespace DB @@ -20,6 +21,7 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +using FieldType = Array; DataTypeArray::DataTypeArray(const DataTypePtr & nested_) @@ -33,7 +35,6 @@ MutableColumnPtr DataTypeArray::createColumn() const return ColumnArray::create(nested->createColumn(), ColumnArray::ColumnOffsets::create()); } - Field DataTypeArray::getDefault() const { return Array(); diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h index 2714ca1d023..68b574b8ded 100644 --- a/src/DataTypes/DataTypeArray.h +++ b/src/DataTypes/DataTypeArray.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -15,6 +16,8 @@ private: DataTypePtr nested; public: + using FieldType = Array; + using ColumnType = ColumnArray; static constexpr bool is_parametric = true; explicit DataTypeArray(const DataTypePtr & nested_); @@ -42,6 +45,7 @@ public: MutableColumnPtr createColumn() const override; + Field getDefault() const override; bool equals(const IDataType & rhs) const override; diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h index f6d8d07a312..d2a414cb073 100644 --- a/src/DataTypes/DataTypeLowCardinality.h +++ b/src/DataTypes/DataTypeLowCardinality.h @@ -46,6 +46,7 @@ public: bool canBeUsedInBooleanContext() const override { return dictionary_type->canBeUsedInBooleanContext(); } bool isValueRepresentedByNumber() const override { return dictionary_type->isValueRepresentedByNumber(); } bool isValueRepresentedByInteger() const override { return dictionary_type->isValueRepresentedByInteger(); } + bool isValueRepresentedByUnsignedInteger() const override { return dictionary_type->isValueRepresentedByUnsignedInteger(); } bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; } bool haveMaximumSizeOfValue() const override { return dictionary_type->haveMaximumSizeOfValue(); } size_t getMaximumSizeOfValueInMemory() const override { return dictionary_type->getMaximumSizeOfValueInMemory(); } diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index ea05e6ae59b..0bf3f3ac8b3 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -37,6 +37,7 @@ public: bool canBeInsideNullable() const override { return false; } bool supportsSparseSerialization() const override { return true; } + bool canBeInsideSparseColumns() const override { return false; } MutableColumnPtr createColumn() const override; MutableColumnPtr createColumn(const ISerialization & serialization) const override; diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index fa044d4ac9c..6529ce09456 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -74,21 +74,30 @@ SerializationPtr DataTypeDecimal::doGetDefaultSerialization() const static DataTypePtr create(const ASTPtr & arguments) { - if (!arguments || arguments->children.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Decimal data type family must have exactly two arguments: precision and scale"); + UInt64 precision = 10; + UInt64 scale = 0; + if (arguments) + { + if (arguments->children.empty() || arguments->children.size() > 2) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Decimal data type family must have precision and optional scale arguments"); - const auto * precision = arguments->children[0]->as(); - const auto * scale = arguments->children[1]->as(); + const auto * precision_arg = arguments->children[0]->as(); + if (!precision_arg || precision_arg->value.getType() != Field::Types::UInt64) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Decimal argument precision is invalid"); + precision = precision_arg->value.get(); - if (!precision || precision->value.getType() != Field::Types::UInt64 || - !scale || !(scale->value.getType() == Field::Types::Int64 || scale->value.getType() == Field::Types::UInt64)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Decimal data type family must have two numbers as its arguments"); + if (arguments->children.size() == 2) + { + const auto * scale_arg = arguments->children[1]->as(); + if (!scale_arg || !isInt64OrUInt64FieldType(scale_arg->value.getType())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Decimal argument scale is invalid"); + scale = scale_arg->value.get(); + } + } - UInt64 precision_value = precision->value.get(); - UInt64 scale_value = scale->value.get(); - - return createDecimal(precision_value, scale_value); + return createDecimal(precision, scale); } template diff --git a/src/DataTypes/EnumValues.h b/src/DataTypes/EnumValues.h index 2e6628adcf3..5189f7a56f5 100644 --- a/src/DataTypes/EnumValues.h +++ b/src/DataTypes/EnumValues.h @@ -13,7 +13,7 @@ namespace ErrorCodes } template -class EnumValues : public IHints<1, EnumValues> +class EnumValues : public IHints<> { public: using Value = std::pair; diff --git a/src/DataTypes/FieldToDataType.cpp b/src/DataTypes/FieldToDataType.cpp index 210dab9921e..837aae6753a 100644 --- a/src/DataTypes/FieldToDataType.cpp +++ b/src/DataTypes/FieldToDataType.cpp @@ -136,8 +136,17 @@ DataTypePtr FieldToDataType::operator() (const Array & x) const DataTypes element_types; element_types.reserve(x.size()); + bool has_signed_int = false; + bool uint64_convert_possible = true; for (const Field & elem : x) - element_types.emplace_back(applyVisitor(*this, elem)); + { + DataTypePtr type = applyVisitor(*this, elem); + element_types.emplace_back(type); + checkUInt64ToIn64Conversion(has_signed_int, uint64_convert_possible, type, elem); + } + + if (has_signed_int && uint64_convert_possible) + convertUInt64ToInt64IfPossible(element_types); return std::make_shared(getLeastSupertype(element_types)); } @@ -165,14 +174,28 @@ DataTypePtr FieldToDataType::operator() (const Map & map) const key_types.reserve(map.size()); value_types.reserve(map.size()); + bool k_has_signed_int = false; + bool k_uint64_convert_possible = true; + bool v_has_signed_int = false; + bool v_uint64_convert_possible = true; for (const auto & elem : map) { const auto & tuple = elem.safeGet(); assert(tuple.size() == 2); - key_types.push_back(applyVisitor(*this, tuple[0])); - value_types.push_back(applyVisitor(*this, tuple[1])); + DataTypePtr k_type = applyVisitor(*this, tuple[0]); + key_types.push_back(k_type); + checkUInt64ToIn64Conversion(k_has_signed_int, k_uint64_convert_possible, k_type, tuple[0]); + DataTypePtr v_type = applyVisitor(*this, tuple[1]); + value_types.push_back(v_type); + checkUInt64ToIn64Conversion(v_has_signed_int, v_uint64_convert_possible, v_type, tuple[1]); } + if (k_has_signed_int && k_uint64_convert_possible) + convertUInt64ToInt64IfPossible(key_types); + + if (v_has_signed_int && v_uint64_convert_possible) + convertUInt64ToInt64IfPossible(value_types); + return std::make_shared( getLeastSupertype(key_types), getLeastSupertype(value_types)); @@ -204,6 +227,28 @@ DataTypePtr FieldToDataType::operator()(const bool &) const return DataTypeFactory::instance().get("Bool"); } +template +void FieldToDataType::checkUInt64ToIn64Conversion(bool & has_signed_int, bool & uint64_convert_possible, const DataTypePtr & type, const Field & elem) const +{ + if (uint64_convert_possible) + { + bool is_native_int = WhichDataType(type).isNativeInt(); + + if (is_native_int) + has_signed_int |= is_native_int; + else if (type->getTypeId() == TypeIndex::UInt64) + uint64_convert_possible &= (elem.template get() <= std::numeric_limits::max()); + } +} + +template +void FieldToDataType::convertUInt64ToInt64IfPossible(DataTypes & data_types) const +{ + for (auto& type : data_types) + if (type->getTypeId() == TypeIndex::UInt64) + type = std::make_shared(); +} + template class FieldToDataType; template class FieldToDataType; template class FieldToDataType; diff --git a/src/DataTypes/FieldToDataType.h b/src/DataTypes/FieldToDataType.h index 8febadc1a0d..d1a3f11e8de 100644 --- a/src/DataTypes/FieldToDataType.h +++ b/src/DataTypes/FieldToDataType.h @@ -45,6 +45,16 @@ public: DataTypePtr operator() (const UInt256 & x) const; DataTypePtr operator() (const Int256 & x) const; DataTypePtr operator() (const bool & x) const; + +private: + // The conditions for converting UInt64 to Int64 are: + // 1. The existence of Int. + // 2. The existence of UInt64, and the UInt64 value must be <= Int64.max. + void checkUInt64ToIn64Conversion(bool& has_signed_int, bool& uint64_convert_possible, const DataTypePtr & type, const Field & elem) const; + + // Convert the UInt64 type to Int64 in order to cover other signed_integer types + // and obtain the least super type of all ints. + void convertUInt64ToInt64IfPossible(DataTypes & data_types) const; }; } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 4adafe5d212..54cb3d0d5c2 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -110,6 +110,7 @@ public: /// TODO: support more types. virtual bool supportsSparseSerialization() const { return !haveSubtypes(); } + virtual bool canBeInsideSparseColumns() const { return supportsSparseSerialization(); } SerializationPtr getDefaultSerialization() const; SerializationPtr getSparseSerialization() const; diff --git a/src/DataTypes/NumberTraits.h b/src/DataTypes/NumberTraits.h index 6b068b0d8b1..cf283d3358c 100644 --- a/src/DataTypes/NumberTraits.h +++ b/src/DataTypes/NumberTraits.h @@ -174,7 +174,7 @@ template struct ResultOfBitNot * Float, [U]Int -> Float * Decimal, Decimal -> Decimal * UUID, UUID -> UUID - * UInt64 , Int -> Error + * UInt64, Int -> Error * Float, [U]Int64 -> Error */ template diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index 782b890841a..e70dc6a2380 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -261,6 +261,12 @@ void ISerialization::deserializeTextRaw(IColumn & column, ReadBuffer & istr, con deserializeWholeText(column, buf, settings); } +void ISerialization::serializeTextMarkdown( + const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const +{ + serializeTextEscaped(column, row_num, ostr, settings); +} + void ISerialization::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { serializeText(column, row_num, ostr, settings); diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 17e6dfb85bc..ed1ad037ea0 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -366,6 +366,8 @@ public: virtual void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; virtual void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const; + virtual void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const; + static String getFileNameForStream(const NameAndTypePair & column, const SubstreamPath & path); static String getFileNameForStream(const String & name_in_storage, const SubstreamPath & path); static String getSubcolumnNameForStream(const SubstreamPath & path); diff --git a/src/DataTypes/Serializations/PathInData.cpp b/src/DataTypes/Serializations/PathInData.cpp index 2a5f508650f..cf78d7cbb14 100644 --- a/src/DataTypes/Serializations/PathInData.cpp +++ b/src/DataTypes/Serializations/PathInData.cpp @@ -65,9 +65,7 @@ UInt128 PathInData::getPartsHash(const Parts::const_iterator & begin, const Part hash.update(part_it->anonymous_array_level); } - UInt128 res; - hash.get128(res); - return res; + return hash.get128(); } void PathInData::buildPath(const Parts & other_parts) diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index cedcca870dd..d6f36e45e64 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -129,7 +129,7 @@ namespace for (size_t i = offset; i < end; ++i) { ColumnArray::Offset current_offset = offset_values[i]; - writeIntBinary(current_offset - prev_offset, ostr); + writeBinaryLittleEndian(current_offset - prev_offset, ostr); prev_offset = current_offset; } } @@ -145,7 +145,7 @@ namespace while (i < initial_size + limit && !istr.eof()) { ColumnArray::Offset current_size = 0; - readIntBinary(current_size, istr); + readBinaryLittleEndian(current_size, istr); if (unlikely(current_size > MAX_ARRAY_SIZE)) throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array size is too large: {}", current_size); @@ -493,7 +493,10 @@ void SerializationArray::deserializeText(IColumn & column, ReadBuffer & istr, co deserializeTextImpl(column, istr, [&](IColumn & nested_column) { - nested->deserializeTextQuoted(nested_column, istr, settings); + if (settings.null_as_default) + SerializationNullable::deserializeTextQuotedImpl(nested_column, istr, settings, nested); + else + nested->deserializeTextQuoted(nested_column, istr, settings); }, false); if (whole && !istr.eof()) @@ -604,7 +607,10 @@ void SerializationArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr, deserializeTextImpl(column, rb, [&](IColumn & nested_column) { - nested->deserializeTextCSV(nested_column, rb, settings); + if (settings.null_as_default) + SerializationNullable::deserializeTextCSVImpl(nested_column, rb, settings, nested); + else + nested->deserializeTextCSV(nested_column, rb, settings); }, true); } else @@ -612,7 +618,10 @@ void SerializationArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr, deserializeTextImpl(column, rb, [&](IColumn & nested_column) { - nested->deserializeTextQuoted(nested_column, rb, settings); + if (settings.null_as_default) + SerializationNullable::deserializeTextQuotedImpl(nested_column, rb, settings, nested); + else + nested->deserializeTextQuoted(nested_column, rb, settings); }, true); } } diff --git a/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp b/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp index c35e1120ce8..d094341b166 100644 --- a/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp +++ b/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp @@ -94,4 +94,13 @@ void SerializationCustomSimpleText::serializeTextXML(const IColumn & column, siz writeXMLStringForTextElement(serializeToString(*this, column, row_num, settings), ostr); } +void SerializationCustomSimpleText::serializeTextMarkdown( + const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + if (settings.output_format_markdown_escape_special_characters) + writeMarkdownEscapedString(serializeToString(*this, column, row_num, settings), ostr); + else + writeEscapedString(serializeToString(*this, column, row_num, settings), ostr); +} + } diff --git a/src/DataTypes/Serializations/SerializationCustomSimpleText.h b/src/DataTypes/Serializations/SerializationCustomSimpleText.h index 21d6f8af650..0c909350002 100644 --- a/src/DataTypes/Serializations/SerializationCustomSimpleText.h +++ b/src/DataTypes/Serializations/SerializationCustomSimpleText.h @@ -54,6 +54,8 @@ public: /** Text serialization for putting into the XML format. */ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; }; } diff --git a/src/DataTypes/Serializations/SerializationDateTime.cpp b/src/DataTypes/Serializations/SerializationDateTime.cpp index 7238d3ce190..2ba24f5351b 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB { @@ -145,12 +146,29 @@ void SerializationDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & is char maybe_quote = *istr.position(); if (maybe_quote == '\'' || maybe_quote == '\"') + { ++istr.position(); - - readText(x, istr, settings, time_zone, utc_time_zone); - - if (maybe_quote == '\'' || maybe_quote == '\"') + readText(x, istr, settings, time_zone, utc_time_zone); assertChar(maybe_quote, istr); + } + else + { + if (settings.csv.delimiter != ',' || settings.date_time_input_format == FormatSettings::DateTimeInputFormat::Basic) + { + readText(x, istr, settings, time_zone, utc_time_zone); + } + /// Best effort parsing supports datetime in format like "01.01.2000, 00:00:00" + /// and can mistakenly read comma as a part of datetime. + /// For example data "...,01.01.2000,some string,..." cannot be parsed correctly. + /// To fix this problem we first read CSV string and then try to parse it as datetime. + else + { + String datetime_str; + readCSVString(datetime_str, istr, settings.csv); + ReadBufferFromString buf(datetime_str); + readText(x, buf, settings, time_zone, utc_time_zone); + } + } if (x < 0) x = 0; diff --git a/src/DataTypes/Serializations/SerializationDateTime64.cpp b/src/DataTypes/Serializations/SerializationDateTime64.cpp index 78c7ea56529..c5964f1bd97 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime64.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB { @@ -143,12 +144,29 @@ void SerializationDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer & char maybe_quote = *istr.position(); if (maybe_quote == '\'' || maybe_quote == '\"') + { ++istr.position(); - - readText(x, scale, istr, settings, time_zone, utc_time_zone); - - if (maybe_quote == '\'' || maybe_quote == '\"') + readText(x, scale, istr, settings, time_zone, utc_time_zone); assertChar(maybe_quote, istr); + } + else + { + if (settings.csv.delimiter != ',' || settings.date_time_input_format == FormatSettings::DateTimeInputFormat::Basic) + { + readText(x, scale, istr, settings, time_zone, utc_time_zone); + } + /// Best effort parsing supports datetime in format like "01.01.2000, 00:00:00" + /// and can mistakenly read comma as a part of datetime. + /// For example data "...,01.01.2000,some string,..." cannot be parsed correctly. + /// To fix this problem we first read CSV string and then try to parse it as datetime. + else + { + String datetime_str; + readCSVString(datetime_str, istr, settings.csv); + ReadBufferFromString buf(datetime_str); + readText(x, scale, buf, settings, time_zone, utc_time_zone); + } + } assert_cast(column).getData().push_back(x); } diff --git a/src/DataTypes/Serializations/SerializationDecimalBase.cpp b/src/DataTypes/Serializations/SerializationDecimalBase.cpp index 642ea1c7cd8..b7f91e6833e 100644 --- a/src/DataTypes/Serializations/SerializationDecimalBase.cpp +++ b/src/DataTypes/Serializations/SerializationDecimalBase.cpp @@ -7,6 +7,7 @@ #include #include +#include namespace DB { @@ -15,34 +16,35 @@ template void SerializationDecimalBase::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { FieldType x = field.get>(); - writeBinary(x, ostr); + writeBinaryLittleEndian(x, ostr); } template void SerializationDecimalBase::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { const FieldType & x = assert_cast(column).getElement(row_num); - writeBinary(x, ostr); + writeBinaryLittleEndian(x, ostr); } template void SerializationDecimalBase::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const { const typename ColumnType::Container & x = typeid_cast(column).getData(); - - size_t size = x.size(); - - if (limit == 0 || offset + limit > size) + if (const size_t size = x.size(); limit == 0 || offset + limit > size) limit = size - offset; - ostr.write(reinterpret_cast(&x[offset]), sizeof(FieldType) * limit); + if constexpr (std::endian::native == std::endian::big) + for (size_t i = offset; i < offset + limit; ++i) + writeBinaryLittleEndian(x[i], ostr); + else + ostr.write(reinterpret_cast(&x[offset]), sizeof(FieldType) * limit); } template void SerializationDecimalBase::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const { typename FieldType::NativeType x; - readBinary(x, istr); + readBinaryLittleEndian(x, istr); field = DecimalField(T(x), this->scale); } @@ -50,7 +52,7 @@ template void SerializationDecimalBase::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { typename FieldType::NativeType x; - readBinary(x, istr); + readBinaryLittleEndian(x, istr); assert_cast(column).getData().push_back(FieldType(x)); } @@ -58,10 +60,14 @@ template void SerializationDecimalBase::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double) const { typename ColumnType::Container & x = typeid_cast(column).getData(); - size_t initial_size = x.size(); + const size_t initial_size = x.size(); x.resize(initial_size + limit); - size_t size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(FieldType) * limit); + const size_t size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(FieldType) * limit); x.resize(initial_size + size / sizeof(FieldType)); + + if constexpr (std::endian::native == std::endian::big) + for (size_t i = initial_size; i < x.size(); ++i) + transformEndianness(x[i]); } template class SerializationDecimalBase; diff --git a/src/DataTypes/Serializations/SerializationEnum.cpp b/src/DataTypes/Serializations/SerializationEnum.cpp index a1bd63d4327..46bd5e4c04a 100644 --- a/src/DataTypes/Serializations/SerializationEnum.cpp +++ b/src/DataTypes/Serializations/SerializationEnum.cpp @@ -111,6 +111,16 @@ void SerializationEnum::deserializeTextCSV(IColumn & column, ReadBuffer & } } +template +void SerializationEnum::serializeTextMarkdown( + const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + if (settings.output_format_markdown_escape_special_characters) + writeMarkdownEscapedString(this->getNameForValue(assert_cast(column).getData()[row_num]).toView(), ostr); + else + serializeTextEscaped(column, row_num, ostr, settings); +} + template class SerializationEnum; template class SerializationEnum; diff --git a/src/DataTypes/Serializations/SerializationEnum.h b/src/DataTypes/Serializations/SerializationEnum.h index bdd769b59c5..49a0e4943e0 100644 --- a/src/DataTypes/Serializations/SerializationEnum.h +++ b/src/DataTypes/Serializations/SerializationEnum.h @@ -29,6 +29,8 @@ public: void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + FieldType readValue(ReadBuffer & istr) const { FieldType x; diff --git a/src/DataTypes/Serializations/SerializationFixedString.cpp b/src/DataTypes/Serializations/SerializationFixedString.cpp index 3b405f6ec08..1cf8328f033 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.cpp +++ b/src/DataTypes/Serializations/SerializationFixedString.cpp @@ -210,5 +210,16 @@ void SerializationFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & read(*this, column, [&istr, &csv = settings.csv](ColumnFixedString::Chars & data) { readCSVStringInto(data, istr, csv); }); } +void SerializationFixedString::serializeTextMarkdown( + const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const +{ + if (settings.output_format_markdown_escape_special_characters) + { + writeMarkdownEscapedString( + reinterpret_cast(&(assert_cast(column).getChars()[n * row_num])), n, ostr); + } + else + serializeTextEscaped(column, row_num, ostr, settings); +} } diff --git a/src/DataTypes/Serializations/SerializationFixedString.h b/src/DataTypes/Serializations/SerializationFixedString.h index 3db31ab02cb..c27b10ad158 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.h +++ b/src/DataTypes/Serializations/SerializationFixedString.h @@ -41,6 +41,8 @@ public: void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + /// Makes sure that the length of a newly inserted string to `chars` is equal to getN(). /// If the length is less than getN() the function will add zero characters up to getN(). /// If the length is greater than getN() the function will throw an exception. diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index aba77c8d431..3e1cbdb00f5 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -132,13 +132,13 @@ struct IndexesSerializationType val |= NeedGlobalDictionaryBit; if (need_update_dictionary) val |= NeedUpdateDictionary; - writeIntBinary(val, buffer); + writeBinaryLittleEndian(val, buffer); } void deserialize(ReadBuffer & buffer, const ISerialization::DeserializeBinaryBulkSettings & settings) { SerializationType val; - readIntBinary(val, buffer); + readBinaryLittleEndian(val, buffer); checkType(val); has_additional_keys = (val & HasAdditionalKeysBit) != 0; @@ -235,7 +235,7 @@ void SerializationLowCardinality::serializeBinaryBulkStatePrefix( /// Write version and create SerializeBinaryBulkState. UInt64 key_version = KeysSerializationVersion::SharedDictionariesWithAdditionalKeys; - writeIntBinary(key_version, *stream); + writeBinaryLittleEndian(key_version, *stream); state = std::make_shared(key_version); } @@ -259,7 +259,7 @@ void SerializationLowCardinality::serializeBinaryBulkStateSuffix( throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream in SerializationLowCardinality::serializeBinaryBulkStateSuffix"); UInt64 num_keys = nested_column->size(); - writeIntBinary(num_keys, *stream); + writeBinaryLittleEndian(num_keys, *stream); dict_inner_serialization->serializeBinaryBulk(*nested_column, *stream, 0, num_keys); low_cardinality_state->shared_dictionary = nullptr; } @@ -277,7 +277,7 @@ void SerializationLowCardinality::deserializeBinaryBulkStatePrefix( return; UInt64 keys_version; - readIntBinary(keys_version, *stream); + readBinaryLittleEndian(keys_version, *stream); state = std::make_shared(keys_version); } @@ -492,7 +492,7 @@ void SerializationLowCardinality::serializeBinaryBulkWithMultipleStreams( { const auto & nested_column = global_dictionary->getNestedNotNullableColumn(); UInt64 num_keys = nested_column->size(); - writeIntBinary(num_keys, *keys_stream); + writeBinaryLittleEndian(num_keys, *keys_stream); dict_inner_serialization->serializeBinaryBulk(*nested_column, *keys_stream, 0, num_keys); low_cardinality_state->shared_dictionary = nullptr; } @@ -500,12 +500,12 @@ void SerializationLowCardinality::serializeBinaryBulkWithMultipleStreams( if (need_additional_keys) { UInt64 num_keys = keys->size(); - writeIntBinary(num_keys, *indexes_stream); + writeBinaryLittleEndian(num_keys, *indexes_stream); dict_inner_serialization->serializeBinaryBulk(*keys, *indexes_stream, 0, num_keys); } UInt64 num_rows = positions->size(); - writeIntBinary(num_rows, *indexes_stream); + writeBinaryLittleEndian(num_rows, *indexes_stream); auto index_serialization = index_version.getDataType()->getDefaultSerialization(); index_serialization->serializeBinaryBulk(*positions, *indexes_stream, 0, num_rows); } @@ -541,7 +541,7 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( auto read_dictionary = [this, low_cardinality_state, keys_stream]() { UInt64 num_keys; - readIntBinary(num_keys, *keys_stream); + readBinaryLittleEndian(num_keys, *keys_stream); auto keys_type = removeNullable(dictionary_type); auto global_dict_keys = keys_type->createColumn(); @@ -554,7 +554,7 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( auto read_additional_keys = [this, low_cardinality_state, indexes_stream]() { UInt64 num_keys; - readIntBinary(num_keys, *indexes_stream); + readBinaryLittleEndian(num_keys, *indexes_stream); auto keys_type = removeNullable(dictionary_type); auto additional_keys = keys_type->createColumn(); @@ -660,7 +660,7 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( else low_cardinality_state->additional_keys = nullptr; - readIntBinary(low_cardinality_state->num_pending_rows, *indexes_stream); + readBinaryLittleEndian(low_cardinality_state->num_pending_rows, *indexes_stream); } size_t num_rows_to_read = std::min(limit, low_cardinality_state->num_pending_rows); diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index af1d96c4ca7..7588e630689 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -192,7 +192,10 @@ void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, cons deserializeTextImpl(column, istr, [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) { - subcolumn_serialization->deserializeTextQuoted(subcolumn, buf, settings); + if (settings.null_as_default) + SerializationNullable::deserializeTextQuotedImpl(subcolumn, buf, settings, subcolumn_serialization); + else + subcolumn_serialization->deserializeTextQuoted(subcolumn, buf, settings); }); if (whole && !istr.eof()) diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 6e4402740d9..15203bdc9fa 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -189,10 +189,10 @@ void SerializationNullable::serializeBinary(const IColumn & column, size_t row_n /// Deserialize value into ColumnNullable. /// We need to insert both to nested column and to null byte map, or, in case of exception, to not insert at all. -template , ReturnType>* = nullptr> -static ReturnType safeDeserialize( - IColumn & column, const ISerialization &, - CheckForNull && check_for_null, DeserializeNested && deserialize_nested) +template +requires std::same_as +static ReturnType +safeDeserialize(IColumn & column, const ISerialization &, CheckForNull && check_for_null, DeserializeNested && deserialize_nested) { ColumnNullable & col = assert_cast(column); @@ -217,10 +217,10 @@ static ReturnType safeDeserialize( } /// Deserialize value into non-nullable column. In case of NULL, insert default value and return false. -template , ReturnType>* = nullptr> -static ReturnType safeDeserialize( - IColumn & column, const ISerialization &, - CheckForNull && check_for_null, DeserializeNested && deserialize_nested) +template +requires std::same_as +static ReturnType +safeDeserialize(IColumn & column, const ISerialization &, CheckForNull && check_for_null, DeserializeNested && deserialize_nested) { bool insert_default = check_for_null(); if (insert_default) diff --git a/src/DataTypes/Serializations/SerializationNumber.cpp b/src/DataTypes/Serializations/SerializationNumber.cpp index 8cabaec753d..94b44d5cc66 100644 --- a/src/DataTypes/Serializations/SerializationNumber.cpp +++ b/src/DataTypes/Serializations/SerializationNumber.cpp @@ -10,6 +10,8 @@ #include #include +#include + namespace DB { @@ -106,28 +108,28 @@ void SerializationNumber::serializeBinary(const Field & field, WriteBuffer & { /// ColumnVector::ValueType is a narrower type. For example, UInt8, when the Field type is UInt64 typename ColumnVector::ValueType x = static_cast::ValueType>(field.get()); - writeBinary(x, ostr); + writeBinaryLittleEndian(x, ostr); } template void SerializationNumber::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const { typename ColumnVector::ValueType x; - readBinary(x, istr); + readBinaryLittleEndian(x, istr); field = NearestFieldType(x); } template void SerializationNumber::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { - writeBinary(assert_cast &>(column).getData()[row_num], ostr); + writeBinaryLittleEndian(assert_cast &>(column).getData()[row_num], ostr); } template void SerializationNumber::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { typename ColumnVector::ValueType x; - readBinary(x, istr); + readBinaryLittleEndian(x, istr); assert_cast &>(column).getData().push_back(x); } @@ -135,13 +137,16 @@ template void SerializationNumber::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const { const typename ColumnVector::Container & x = typeid_cast &>(column).getData(); - - size_t size = x.size(); - - if (limit == 0 || offset + limit > size) + if (const size_t size = x.size(); limit == 0 || offset + limit > size) limit = size - offset; - if (limit) + if (limit == 0) + return; + + if constexpr (std::endian::native == std::endian::big && sizeof(T) >= 2) + for (size_t i = offset; i < offset + limit; ++i) + writeBinaryLittleEndian(x[i], ostr); + else ostr.write(reinterpret_cast(&x[offset]), sizeof(typename ColumnVector::ValueType) * limit); } @@ -149,10 +154,14 @@ template void SerializationNumber::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const { typename ColumnVector::Container & x = typeid_cast &>(column).getData(); - size_t initial_size = x.size(); + const size_t initial_size = x.size(); x.resize(initial_size + limit); - size_t size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(typename ColumnVector::ValueType) * limit); + const size_t size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(typename ColumnVector::ValueType) * limit); x.resize(initial_size + size / sizeof(typename ColumnVector::ValueType)); + + if constexpr (std::endian::native == std::endian::big && sizeof(T) >= 2) + for (size_t i = initial_size; i < x.size(); ++i) + transformEndianness(x[i]); } template class SerializationNumber; diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index df9489213c8..8bfcb6d4e71 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -513,6 +513,22 @@ void SerializationObject::serializeTextCSV(const IColumn & column, size_ writeCSVString(ostr_str.str(), ostr); } +template +void SerializationObject::serializeTextMarkdown( + const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + if (settings.output_format_markdown_escape_special_characters) + { + WriteBufferFromOwnString ostr_str; + serializeTextImpl(column, row_num, ostr_str, settings); + writeMarkdownEscapedString(ostr_str.str(), ostr); + } + else + { + serializeTextEscaped(column, row_num, ostr, settings); + } +} + template void SerializationObject::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const { diff --git a/src/DataTypes/Serializations/SerializationObject.h b/src/DataTypes/Serializations/SerializationObject.h index de54f5739f5..39e1c514640 100644 --- a/src/DataTypes/Serializations/SerializationObject.h +++ b/src/DataTypes/Serializations/SerializationObject.h @@ -68,6 +68,7 @@ public: void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index 46fd9d5272d..28ebdde3258 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -361,5 +361,13 @@ void SerializationString::deserializeTextCSV(IColumn & column, ReadBuffer & istr read(column, [&](ColumnString::Chars & data) { readCSVStringInto(data, istr, settings.csv); }); } +void SerializationString::serializeTextMarkdown( + const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + if (settings.output_format_markdown_escape_special_characters) + writeMarkdownEscapedString(assert_cast(column).getDataAt(row_num).toView(), ostr); + else + serializeTextEscaped(column, row_num, ostr, settings); +} } diff --git a/src/DataTypes/Serializations/SerializationString.h b/src/DataTypes/Serializations/SerializationString.h index f27a5116c15..cd4cdf79c11 100644 --- a/src/DataTypes/Serializations/SerializationString.h +++ b/src/DataTypes/Serializations/SerializationString.h @@ -32,6 +32,8 @@ public: void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; }; } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 7f3e7619b0d..5c9487b97d4 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -135,7 +135,10 @@ void SerializationTuple::deserializeText(IColumn & column, ReadBuffer & istr, co assertChar(',', istr); skipWhitespaceIfAny(istr); } - elems[i]->deserializeTextQuoted(extractElementColumn(column, i), istr, settings); + if (settings.null_as_default) + SerializationNullable::deserializeTextQuotedImpl(extractElementColumn(column, i), istr, settings, elems[i]); + else + elems[i]->deserializeTextQuoted(extractElementColumn(column, i), istr, settings); } // Special format for one element tuple (1,) @@ -366,7 +369,10 @@ void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, assertChar(settings.csv.tuple_delimiter, istr); skipWhitespaceIfAny(istr); } - elems[i]->deserializeTextCSV(extractElementColumn(column, i), istr, settings); + if (settings.null_as_default) + SerializationNullable::deserializeTextCSVImpl(extractElementColumn(column, i), istr, settings, elems[i]); + else + elems[i]->deserializeTextCSV(extractElementColumn(column, i), istr, settings); } }); } diff --git a/src/DataTypes/Serializations/SerializationUUID.cpp b/src/DataTypes/Serializations/SerializationUUID.cpp index 76be273d7dc..8e8b80cf9d1 100644 --- a/src/DataTypes/Serializations/SerializationUUID.cpp +++ b/src/DataTypes/Serializations/SerializationUUID.cpp @@ -7,6 +7,7 @@ #include #include +#include namespace DB { @@ -111,48 +112,62 @@ void SerializationUUID::deserializeTextCSV(IColumn & column, ReadBuffer & istr, void SerializationUUID::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { UUID x = field.get(); - writeBinary(x, ostr); + writeBinaryLittleEndian(x, ostr); } void SerializationUUID::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const { UUID x; - readBinary(x, istr); + readBinaryLittleEndian(x, istr); field = NearestFieldType(x); } void SerializationUUID::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { - writeBinary(assert_cast &>(column).getData()[row_num], ostr); + writeBinaryLittleEndian(assert_cast &>(column).getData()[row_num], ostr); } void SerializationUUID::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { UUID x; - readBinary(x, istr); + readBinaryLittleEndian(x, istr); assert_cast &>(column).getData().push_back(x); } void SerializationUUID::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const { const typename ColumnVector::Container & x = typeid_cast &>(column).getData(); - - size_t size = x.size(); - - if (limit == 0 || offset + limit > size) + if (const size_t size = x.size(); limit == 0 || offset + limit > size) limit = size - offset; - if (limit) + if (limit == 0) + return; + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" + if constexpr (std::endian::native == std::endian::big) + { + for (size_t i = offset; i < offset + limit; ++i) + writeBinaryLittleEndian(x[i], ostr); + } + else ostr.write(reinterpret_cast(&x[offset]), sizeof(UUID) * limit); +#pragma clang diagnostic pop } void SerializationUUID::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const { typename ColumnVector::Container & x = typeid_cast &>(column).getData(); - size_t initial_size = x.size(); + const size_t initial_size = x.size(); x.resize(initial_size + limit); - size_t size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(UUID) * limit); + const size_t size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(UUID) * limit); x.resize(initial_size + size / sizeof(UUID)); -} +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" + if constexpr (std::endian::native == std::endian::big) + for (size_t i = initial_size; i < x.size(); ++i) + transformEndianness(x[i]); +#pragma clang diagnostic pop +} } diff --git a/src/DataTypes/Serializations/SerializationWrapper.h b/src/DataTypes/Serializations/SerializationWrapper.h index bf922888af9..31900f93148 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.h +++ b/src/DataTypes/Serializations/SerializationWrapper.h @@ -77,7 +77,6 @@ public: void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override; - void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; }; diff --git a/src/DataTypes/fuzzers/CMakeLists.txt b/src/DataTypes/fuzzers/CMakeLists.txt index d9c19cb7d01..939bf5f5e3f 100644 --- a/src/DataTypes/fuzzers/CMakeLists.txt +++ b/src/DataTypes/fuzzers/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable(data_type_deserialization_fuzzer data_type_deserialization_fuzzer.cpp ${SRCS}) -target_link_libraries(data_type_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions ${LIB_FUZZING_ENGINE}) +target_link_libraries(data_type_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions) diff --git a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp index 31e4c470ee7..e40734e0a57 100644 --- a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp +++ b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp @@ -14,69 +14,70 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) -try { - using namespace DB; - - static SharedContextHolder shared_context; - static ContextMutablePtr context; - - auto initialize = [&]() mutable + try { - shared_context = Context::createShared(); - context = Context::createGlobal(shared_context.get()); - context->makeGlobalContext(); - context->setApplicationType(Context::ApplicationType::LOCAL); + using namespace DB; - MainThreadStatus::getInstance(); + static SharedContextHolder shared_context; + static ContextMutablePtr context; - registerAggregateFunctions(); - return true; - }; + auto initialize = [&]() mutable + { + shared_context = Context::createShared(); + context = Context::createGlobal(shared_context.get()); + context->makeGlobalContext(); + context->setApplicationType(Context::ApplicationType::LOCAL); - static bool initialized = initialize(); - (void) initialized; + MainThreadStatus::getInstance(); - total_memory_tracker.resetCounters(); - total_memory_tracker.setHardLimit(1_GiB); - CurrentThread::get().memory_tracker.resetCounters(); - CurrentThread::get().memory_tracker.setHardLimit(1_GiB); + registerAggregateFunctions(); + return true; + }; - /// The input format is as follows: - /// - data type name on the first line, - /// - the data for the rest of the input. + static bool initialized = initialize(); + (void) initialized; - /// Compile the code as follows: - /// mkdir build_asan_fuzz - /// cd build_asan_fuzz - /// CC=clang CXX=clang++ cmake -D SANITIZE=address -D ENABLE_FUZZING=1 -D WITH_COVERAGE=1 .. - /// - /// The corpus is located here: - /// https://github.com/ClickHouse/fuzz-corpus/tree/main/data_type_deserialization - /// - /// The fuzzer can be run as follows: - /// ../../../build_asan_fuzz/src/DataTypes/fuzzers/data_type_deserialization_fuzzer corpus -jobs=64 -rss_limit_mb=8192 + total_memory_tracker.resetCounters(); + total_memory_tracker.setHardLimit(1_GiB); + CurrentThread::get().memory_tracker.resetCounters(); + CurrentThread::get().memory_tracker.setHardLimit(1_GiB); - /// clickhouse-local --query "SELECT toJSONString(*) FROM (SELECT name FROM system.functions UNION ALL SELECT name FROM system.data_type_families)" > dictionary + /// The input format is as follows: + /// - data type name on the first line, + /// - the data for the rest of the input. - DB::ReadBufferFromMemory in(data, size); + /// Compile the code as follows: + /// mkdir build_asan_fuzz + /// cd build_asan_fuzz + /// CC=clang CXX=clang++ cmake -D SANITIZE=address -D ENABLE_FUZZING=1 -D WITH_COVERAGE=1 .. + /// + /// The corpus is located here: + /// https://github.com/ClickHouse/fuzz-corpus/tree/main/data_type_deserialization + /// + /// The fuzzer can be run as follows: + /// ../../../build_asan_fuzz/src/DataTypes/fuzzers/data_type_deserialization_fuzzer corpus -jobs=64 -rss_limit_mb=8192 - String data_type; - readStringUntilNewlineInto(data_type, in); - assertChar('\n', in); + /// clickhouse-local --query "SELECT toJSONString(*) FROM (SELECT name FROM system.functions UNION ALL SELECT name FROM system.data_type_families)" > dictionary - DataTypePtr type = DataTypeFactory::instance().get(data_type); + DB::ReadBufferFromMemory in(data, size); - FormatSettings settings; - settings.max_binary_string_size = 100; - settings.max_binary_array_size = 100; + String data_type; + readStringUntilNewlineInto(data_type, in); + assertChar('\n', in); - Field field; - type->getDefaultSerialization()->deserializeBinary(field, in, settings); + DataTypePtr type = DataTypeFactory::instance().get(data_type); + + FormatSettings settings; + settings.max_binary_string_size = 100; + settings.max_binary_array_size = 100; + + Field field; + type->getDefaultSerialization()->deserializeBinary(field, in, settings); + } + catch (...) + { + } return 0; } -catch (...) -{ - return 1; -} diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index 8080179ad47..2b5c4a0a143 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -22,11 +22,11 @@ enum class ArgumentKind Mandatory }; -String getExceptionMessage( +PreformattedMessage getExceptionMessage( const String & message, size_t argument_index, const char * argument_name, const std::string & context_data_type_name, Field::Types::Which field_type) { - return fmt::format("Parameter #{} '{}' for {}{}, expected {} literal", + return PreformattedMessage::create("Parameter #{} '{}' for {}{}, expected {} literal", argument_index, argument_name, context_data_type_name, message, field_type); } @@ -47,10 +47,10 @@ getArgument(const ASTPtr & arguments, size_t argument_index, const char * argume else { if (argument && argument->value.getType() != field_type) - throw Exception::createDeprecated(getExceptionMessage(fmt::format(" has wrong type: {}", argument->value.getTypeName()), + throw Exception(getExceptionMessage(fmt::format(" has wrong type: {}", argument->value.getTypeName()), argument_index, argument_name, context_data_type_name, field_type), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); else - throw Exception::createDeprecated(getExceptionMessage(" is missing", argument_index, argument_name, context_data_type_name, field_type), + throw Exception(getExceptionMessage(" is missing", argument_index, argument_name, context_data_type_name, field_type), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } } @@ -67,7 +67,7 @@ static DataTypePtr create(const ASTPtr & arguments) const auto timezone = getArgument(arguments, scale ? 1 : 0, "timezone", "DateTime"); if (!scale && !timezone) - throw Exception::createDeprecated(getExceptionMessage(" has wrong type: ", 0, "scale", "DateTime", Field::Types::Which::UInt64), + throw Exception(getExceptionMessage(" has wrong type: ", 0, "scale", "DateTime", Field::Types::Which::UInt64), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); /// If scale is defined, the data type is DateTime when scale = 0 otherwise the data type is DateTime64 diff --git a/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp b/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp index b755bd109d0..0373e55a62d 100644 --- a/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp +++ b/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp @@ -1,8 +1,6 @@ #include #include #include -#include -#include #include #include diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 0f65069db35..bbab279c0e2 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -471,8 +471,16 @@ void DatabaseAtomic::tryCreateSymlink(const String & table_name, const String & { String link = path_to_table_symlinks + escapeForFileName(table_name); fs::path data = fs::canonical(getContext()->getPath()) / actual_data_path; - if (!if_data_path_exist || fs::exists(data)) - fs::create_directory_symlink(data, link); + + /// If it already points where needed. + std::error_code ec; + if (fs::equivalent(data, link, ec)) + return; + + if (if_data_path_exist && !fs::exists(data)) + return; + + fs::create_directory_symlink(data, link); } catch (...) { diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 9d90c61bb41..53d5245770e 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -107,9 +107,6 @@ DatabasePtr DatabaseFactory::get(const ASTCreateQuery & create, const String & m { cckMetadataPathForOrdinary(create, metadata_path); - /// Creates store/xxx/ for Atomic - fs::create_directories(fs::path(metadata_path).parent_path()); - DatabasePtr impl = getImpl(create, metadata_path, context); if (impl && context->hasQueryContext() && context->getSettingsRef().log_queries) diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 7eaf474eea0..49f260034db 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -11,9 +11,11 @@ #include #include #include +#include #include + namespace fs = std::filesystem; namespace DB @@ -75,28 +77,28 @@ bool DatabaseFilesystem::checkTableFilePath(const std::string & table_path, Cont /// Check access for file before checking its existence. if (check_path && !fileOrSymlinkPathStartsWith(table_path, user_files_path)) { - if (throw_on_error) - throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File is not inside {}", user_files_path); - else - return false; + /// Access denied is thrown regardless of 'throw_on_error' + throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File is not inside {}", user_files_path); } - /// Check if the corresponding file exists. - if (!fs::exists(table_path)) + if (!containsGlobs(table_path)) { - if (throw_on_error) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File does not exist: {}", table_path); - else - return false; - } + /// Check if the corresponding file exists. + if (!fs::exists(table_path)) + { + if (throw_on_error) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File does not exist: {}", table_path); + else + return false; + } - if (!fs::is_regular_file(table_path)) - { - if (throw_on_error) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, - "File is directory, but expected a file: {}", table_path); - else - return false; + if (!fs::is_regular_file(table_path)) + { + if (throw_on_error) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File is directory, but expected a file: {}", table_path); + else + return false; + } } return true; @@ -128,27 +130,31 @@ bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr context_) if (tryGetTableFromCache(name)) return true; - return checkTableFilePath(getTablePath(name), context_, /* throw_on_error */false); + return checkTableFilePath(getTablePath(name), context_, /* throw_on_error */ false); } -StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr context_) const +StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr context_, bool throw_on_error) const { /// Check if table exists in loaded tables map. if (auto table = tryGetTableFromCache(name)) return table; auto table_path = getTablePath(name); - checkTableFilePath(table_path, context_, /* throw_on_error */true); + if (!checkTableFilePath(table_path, context_, throw_on_error)) + return {}; - /// If the file exists, create a new table using TableFunctionFile and return it. - auto args = makeASTFunction("file", std::make_shared(table_path)); + auto format = FormatFactory::instance().getFormatFromFileName(table_path, throw_on_error); + if (format.empty()) + return {}; - auto table_function = TableFunctionFactory::instance().get(args, context_); + auto ast_function_ptr = makeASTFunction("file", std::make_shared(table_path), std::make_shared(format)); + + auto table_function = TableFunctionFactory::instance().get(ast_function_ptr, context_); if (!table_function) return nullptr; /// TableFunctionFile throws exceptions, if table cannot be created. - auto table_storage = table_function->execute(args, context_, name); + auto table_storage = table_function->execute(ast_function_ptr, context_, name); if (table_storage) addTable(name, table_storage); @@ -158,7 +164,7 @@ StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr cont StoragePtr DatabaseFilesystem::getTable(const String & name, ContextPtr context_) const { /// getTableImpl can throw exceptions, do not catch them to show correct error to user. - if (auto storage = getTableImpl(name, context_)) + if (auto storage = getTableImpl(name, context_, true)) return storage; throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", @@ -167,20 +173,7 @@ StoragePtr DatabaseFilesystem::getTable(const String & name, ContextPtr context_ StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr context_) const { - try - { - return getTableImpl(name, context_); - } - catch (const Exception & e) - { - /// Ignore exceptions thrown by TableFunctionFile, which indicate that there is no table - /// see tests/02722_database_filesystem.sh for more details. - if (e.code() == ErrorCodes::FILE_DOESNT_EXIST) - { - return nullptr; - } - throw; - } + return getTableImpl(name, context_, false); } bool DatabaseFilesystem::empty() const diff --git a/src/Databases/DatabaseFilesystem.h b/src/Databases/DatabaseFilesystem.h index 7fe620401dc..b72891b9a5c 100644 --- a/src/Databases/DatabaseFilesystem.h +++ b/src/Databases/DatabaseFilesystem.h @@ -48,7 +48,7 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override; protected: - StoragePtr getTableImpl(const String & name, ContextPtr context) const; + StoragePtr getTableImpl(const String & name, ContextPtr context, bool throw_on_error) const; StoragePtr tryGetTableFromCache(const std::string & name) const; diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index d3b3d4b545f..17941d105ba 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -263,7 +263,7 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const std::vector DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr & cluster_) const { Strings paths; - const auto & addresses_with_failover = cluster->getShardsAddresses(); + const auto & addresses_with_failover = cluster_->getShardsAddresses(); const auto & shards_info = cluster_->getShardsInfo(); for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) { @@ -524,6 +524,7 @@ void DatabaseReplicated::startupTables(ThreadPool & thread_pool, LoadingStrictne ddl_worker = std::make_unique(this, getContext()); ddl_worker->startup(); + ddl_worker_initialized = true; } bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool debug_check /* = true */) const @@ -665,7 +666,7 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_ { for (const auto & command : query_alter->command_list->children) { - if (!isSupportedAlterType(command->as().type)) + if (!isSupportedAlterTypeForOnClusterDDLQuery(command->as().type)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type of ALTER query"); } } @@ -817,6 +818,32 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep query_context->setQueryKindReplicatedDatabaseInternal(); query_context->setCurrentDatabase(getDatabaseName()); query_context->setCurrentQueryId(""); + + /// We will execute some CREATE queries for recovery (not ATTACH queries), + /// so we need to allow experimental features that can be used in a CREATE query + query_context->setSetting("allow_experimental_inverted_index", 1); + query_context->setSetting("allow_experimental_codecs", 1); + query_context->setSetting("allow_experimental_live_view", 1); + query_context->setSetting("allow_experimental_window_view", 1); + query_context->setSetting("allow_experimental_funnel_functions", 1); + query_context->setSetting("allow_experimental_nlp_functions", 1); + query_context->setSetting("allow_experimental_hash_functions", 1); + query_context->setSetting("allow_experimental_object_type", 1); + query_context->setSetting("allow_experimental_annoy_index", 1); + query_context->setSetting("allow_experimental_usearch_index", 1); + query_context->setSetting("allow_experimental_bigint_types", 1); + query_context->setSetting("allow_experimental_window_functions", 1); + query_context->setSetting("allow_experimental_geo_types", 1); + query_context->setSetting("allow_experimental_map_type", 1); + + query_context->setSetting("allow_suspicious_low_cardinality_types", 1); + query_context->setSetting("allow_suspicious_fixed_string_types", 1); + query_context->setSetting("allow_suspicious_indices", 1); + query_context->setSetting("allow_suspicious_codecs", 1); + query_context->setSetting("allow_hyperscan", 1); + query_context->setSetting("allow_simdjson", 1); + query_context->setSetting("allow_deprecated_syntax_for_merge_tree", 1); + auto txn = std::make_shared(current_zookeeper, zookeeper_path, false, ""); query_context->initZooKeeperMetadataTransaction(txn); return query_context; @@ -1155,6 +1182,7 @@ void DatabaseReplicated::stopReplication() void DatabaseReplicated::shutdown() { stopReplication(); + ddl_worker_initialized = false; ddl_worker = nullptr; DatabaseAtomic::shutdown(); } @@ -1299,7 +1327,7 @@ bool DatabaseReplicated::canExecuteReplicatedMetadataAlter() const /// It may update the metadata digest (both locally and in ZooKeeper) /// before DatabaseReplicatedDDLWorker::initializeReplication() has finished. /// We should not update metadata until the database is initialized. - return ddl_worker && ddl_worker->isCurrentlyActive(); + return ddl_worker_initialized && ddl_worker->isCurrentlyActive(); } void DatabaseReplicated::detachTablePermanently(ContextPtr local_context, const String & table_name) @@ -1472,7 +1500,7 @@ bool DatabaseReplicated::shouldReplicateQuery(const ContextPtr & query_context, /// Some ALTERs are not replicated on database level if (const auto * alter = query_ptr->as()) { - if (alter->isAttachAlter() || alter->isFetchAlter() || alter->isDropPartitionAlter() || is_keeper_map_table(query_ptr)) + if (alter->isAttachAlter() || alter->isFetchAlter() || alter->isDropPartitionAlter() || is_keeper_map_table(query_ptr) || alter->isFreezeAlter()) return false; if (has_many_shards() || !is_replicated_table(query_ptr)) diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 8e33f482ac1..7ba91e48085 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -134,6 +134,7 @@ private: std::atomic_bool is_readonly = true; std::atomic_bool is_probably_dropped = false; std::atomic_bool is_recovering = false; + std::atomic_bool ddl_worker_initialized = false; std::unique_ptr ddl_worker; UInt32 max_log_ptr_at_creation = 0; diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 4976f54e417..0ffedeb58f1 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -208,7 +208,7 @@ String DatabaseReplicatedDDLWorker::enqueueQueryImpl(const ZooKeeperPtr & zookee zkutil::KeeperMultiException::check(code, ops, res); } - if (iters == 0) + if (counter_path.empty()) throw Exception(ErrorCodes::UNFINISHED, "Cannot enqueue query, because some replica are trying to enqueue another query. " "It may happen on high queries rate or, in rare cases, after connection loss. Client should retry."); @@ -356,7 +356,7 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na /// We use tryRemove(...) because multiple hosts (including initiator) may try to do it concurrently. auto code = zookeeper->tryRemove(try_node_path); if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE) - throw Coordination::Exception(code, try_node_path); + throw Coordination::Exception::fromPath(code, try_node_path); if (!zookeeper->exists(fs::path(entry_path) / "committed")) { diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index bb98e2bd3bb..4ba793d858d 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -292,7 +292,7 @@ void DatabaseWithOwnTablesBase::shutdown() for (const auto & kv : tables_snapshot) { - kv.second->flush(); + kv.second->flushAndPrepareForShutdown(); } for (const auto & kv : tables_snapshot) diff --git a/src/Databases/IDatabase.cpp b/src/Databases/IDatabase.cpp index 9e33548b0dd..09640d2f86e 100644 --- a/src/Databases/IDatabase.cpp +++ b/src/Databases/IDatabase.cpp @@ -1,7 +1,10 @@ +#include #include #include #include #include +#include +#include namespace DB @@ -18,7 +21,12 @@ StoragePtr IDatabase::getTable(const String & name, ContextPtr context) const { if (auto storage = tryGetTable(name, context)) return storage; - throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name)); + TableNameHints hints(this->shared_from_this(), context); + std::vector names = hints.getHints(name); + if (names.empty()) + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} does not exist", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name)); + else + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} does not exist. Maybe you meant {}?", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name), backQuoteIfNeed(names[0])); } std::vector> IDatabase::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 9bed3c4bfc5..01d940b0429 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -372,6 +372,7 @@ protected: }; using DatabasePtr = std::shared_ptr; +using ConstDatabasePtr = std::shared_ptr; using Databases = std::map; } diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp index 653c2dc27b6..f7e669d9feb 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp @@ -65,6 +65,7 @@ void DatabaseMaterializedMySQL::setException(const std::exception_ptr & exceptio void DatabaseMaterializedMySQL::startupTables(ThreadPool & thread_pool, LoadingStrictnessLevel mode) { + LOG_TRACE(log, "Starting MaterializeMySQL tables"); DatabaseAtomic::startupTables(thread_pool, mode); if (mode < LoadingStrictnessLevel::FORCE_ATTACH) @@ -122,6 +123,7 @@ void DatabaseMaterializedMySQL::alterTable(ContextPtr context_, const StorageID void DatabaseMaterializedMySQL::drop(ContextPtr context_) { + LOG_TRACE(log, "Dropping MaterializeMySQL database"); /// Remove metadata info fs::path metadata(getMetadataPath() + "/.metadata"); diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.h b/src/Databases/MySQL/DatabaseMaterializedMySQL.h index 3698abf5542..60a88ea0d67 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.h +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 94e5ba1773e..4ea617dd587 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -77,6 +77,8 @@ DatabaseMySQL::DatabaseMySQL( throw; } + fs::create_directories(metadata_path); + thread = ThreadFromGlobalPool{&DatabaseMySQL::cleanOutdatedTables, this}; } @@ -144,6 +146,7 @@ ASTPtr DatabaseMySQL::getCreateTableQueryImpl(const String & table_name, Context auto table_storage_define = database_engine_define->clone(); { ASTStorage * ast_storage = table_storage_define->as(); + ast_storage->engine->kind = ASTFunction::Kind::TABLE_ENGINE; ASTs storage_children = ast_storage->children; auto storage_engine_arguments = ast_storage->engine->arguments; diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 603bf3d0166..3578362b8dd 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -1,9 +1,11 @@ +#include "Common/logger_useful.h" #include "config.h" #if USE_MYSQL #include #include +#include #include #include #include @@ -139,7 +141,6 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const S { bool first = true; WriteBufferFromOwnString error_message; - error_message << "Illegal MySQL variables, the MaterializedMySQL engine requires "; for (const auto & [variable_name, variable_error_val] : variables_error_message) { error_message << (first ? "" : ", ") << variable_name << "='" << variable_error_val << "'"; @@ -148,7 +149,8 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const S first = false; } - throw Exception::createDeprecated(error_message.str(), ErrorCodes::ILLEGAL_MYSQL_VARIABLE); + throw Exception(ErrorCodes::ILLEGAL_MYSQL_VARIABLE, "Illegal MySQL variables, the MaterializedMySQL engine requires {}", + error_message.str()); } } @@ -342,9 +344,8 @@ static inline String rewriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection, { std::make_shared(), "column_type" } }; - const String & query = "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS" - " WHERE TABLE_SCHEMA = '" + backQuoteIfNeed(database_name) + - "' AND TABLE_NAME = '" + backQuoteIfNeed(table_name) + "' ORDER BY ORDINAL_POSITION"; + String query = "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS" + " WHERE TABLE_SCHEMA = '" + database_name + "' AND TABLE_NAME = '" + table_name + "' ORDER BY ORDINAL_POSITION"; StreamSettings mysql_input_stream_settings(global_settings, false, true); auto mysql_source = std::make_unique(connection, query, tables_columns_sample_block, mysql_input_stream_settings); @@ -499,7 +500,10 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta { throw; } - catch (const mysqlxx::ConnectionFailed &) {} + catch (const mysqlxx::ConnectionFailed & ex) + { + LOG_TRACE(log, "Connection to MySQL failed {}", ex.displayText()); + } catch (const mysqlxx::BadQuery & e) { // Lost connection to MySQL server during query @@ -812,6 +816,7 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even CurrentThread::QueryScope query_scope(query_context); String query = query_event.query; + tryQuoteUnrecognizedTokens(query, query); if (!materialized_tables_list.empty()) { auto table_id = tryParseTableIDFromDDL(query, query_event.schema); diff --git a/src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp b/src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp new file mode 100644 index 00000000000..9c76deb2712 --- /dev/null +++ b/src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp @@ -0,0 +1,289 @@ +#include + +#include + +using namespace DB; + +struct TestCase +{ + String query; + String res; + bool ok; + + TestCase( + const String & query_, + const String & res_, + bool ok_) + : query(query_) + , res(res_) + , ok(ok_) + { + } +}; + +std::ostream & operator<<(std::ostream & ostr, const TestCase & test_case) +{ + return ostr << '"' << test_case.query << "\" -> \"" << test_case.res << "\" ok:" << test_case.ok; +} + +class QuoteUnrecognizedTokensTest : public ::testing::TestWithParam +{ +}; + +TEST_P(QuoteUnrecognizedTokensTest, escape) +{ + const auto & [query, expected, ok] = GetParam(); + String actual; + bool res = tryQuoteUnrecognizedTokens(query, actual); + EXPECT_EQ(ok, res); + EXPECT_EQ(expected, actual); +} + +INSTANTIATE_TEST_SUITE_P(MaterializedMySQL, QuoteUnrecognizedTokensTest, ::testing::ValuesIn(std::initializer_list{ + { + "", + "", + false + }, + { + "test '\"`", + "", + false + }, + { + "SELECT * FROM db.`table`", + "", + false + }, + { + "道渠", + "`道渠`", + true + }, + { + "道", + "`道`", + true + }, + { + "道道(skip) 道(", + "`道道`(skip) `道`(", + true + }, + { + "`道渠`", + "", + false + }, + { + "'道'", + "", + false + }, + { + "\"道\"", + "", + false + }, + { + "` 道 test 渠 `", + "", + false + }, + { + "skip 道 skip 123", + "skip `道` skip 123", + true + }, + { + "skip 123 `道` skip", + "", + false + }, + { + "skip `道 skip 123", + "", + false + }, + { + "skip test道 skip", + "skip `test道` skip", + true + }, + { + "test道2test", + "`test道2test`", + true + }, + { + "skip test道2test 123", + "skip `test道2test` 123", + true + }, + { + "skip 您a您a您a a您a您a您a 1您2您3您4 skip", + "skip `您a您a您a` `a您a您a您a` `1您2您3您4` skip", + true + }, + { + "skip 您a 您a您a b您2您c您4 skip", + "skip `您a` `您a您a` `b您2您c您4` skip", + true + }, + { + "123您a skip 56_您a 您a2 b_您2_您c123您_a4 skip", + "`123您a` skip `56_您a` `您a2` `b_您2_您c123您_a4` skip", + true + }, + { + "_您_ 123 skip 56_您_您_您_您_您_您_您_您_您_a 您a2 abc 123_您_您_321 a1b2c3 aaaaa您您_a4 skip", + "`_您_` 123 skip `56_您_您_您_您_您_您_您_您_您_a` `您a2` abc `123_您_您_321` a1b2c3 `aaaaa您您_a4` skip", + true + }, + { + "TABLE 您2 您(", + "TABLE `您2` `您`(", + true + }, + { + "TABLE 您.a您2(日2日2 INT", + "TABLE `您`.`a您2`(`日2日2` INT", + true + }, + { + "TABLE 您$.a_您2a_($日2日_2 INT, 您Hi好 a您b好c)", + "TABLE `您`$.`a_您2a_`($`日2日_2` INT, `您Hi好` `a您b好c`)", + true + }, + { + "TABLE 您a日.您a您a您a(test INT", + "TABLE `您a日`.`您a您a您a`(test INT", + true + }, + { + "TABLE 您a日.您a您a您a(Hi您Hi好Hi INT", + "TABLE `您a日`.`您a您a您a`(`Hi您Hi好Hi` INT", + true + }, + { + "--TABLE 您a日.您a您a您a(test INT", + "", + false + }, + { + "--您a日.您a您a您a(\n您Hi好", + "--您a日.您a您a您a(\n`您Hi好`", + true + }, + { + " /* TABLE 您a日.您a您a您a(test INT", + "", + false + }, + { + "/*您a日.您a您a您a(*/\n您Hi好", + "/*您a日.您a您a您a(*/\n`您Hi好`", + true + }, + { + " 您a日.您您aa您a /* 您a日.您a您a您a */ a您a日a.a您您您a", + " `您a日`.`您您aa您a` /* 您a日.您a您a您a */ `a您a日a`.`a您您您a`", + true + }, + //{ TODO + // "TABLE 您2.您a您a您a(test INT", + // "TABLE `您2`.`您a您a您a`(test INT", + // true + //}, + { + "skip 您a您a您a skip", + "skip `您a您a您a` skip", + true + }, + { + "test 您a2您3a您a 4 again", + "test `您a2您3a您a` 4 again", + true + }, + { + "CREATE TABLE db.`道渠`", + "", + false + }, + { + "CREATE TABLE db.`道渠", + "", + false + }, + { + "CREATE TABLE db.道渠", + "CREATE TABLE db.`道渠`", + true + }, + { + "CREATE TABLE db. 道渠", + "CREATE TABLE db. `道渠`", + true + }, + { + R"sql( + CREATE TABLE gb2312.`道渠` ( `id` int NOT NULL, + 您 INT, + 道渠 DATETIME, + 您test INT, test您 INT, test您test INT, + 道渠test INT, test道渠 INT, test道渠test INT, + 您_ INT, _您 INT, _您_ INT, + 您您__ INT, __您您 INT, __您您__ INT, + 您2 INT, 2您 INT, 2您2 INT, + 您您22 INT, 22您您 INT, 22您您22 INT, + 您_2 INT, _2您 INT, _2您_2 INT, _2您2_ INT, 2_您_2 INT, + 您您__22 INT, __22您您 INT, __22您您__22 INT, __22您您22__ INT, 22__您您__22 INT, + 您2_ INT, 2_您 INT, 2_您2_ INT, + 您您22__ INT, 22__您您 INT, 22__您您22__ INT, + 您_test INT, _test您 INT, _test您_test INT, _test您test_ INT, test_您test_ INT, test_您_test INT, + 您您_test INT, _test您您 INT, _test您您_test INT, _test您您test_ INT, test_您您test_ INT, test_您您_test INT, + 您test3 INT, test3您 INT, test3您test3 INT, test3您3test INT, + 您您test3 INT, test3您您 INT, test3您您test3 INT, test3您您3test INT, + 您3test INT, 3test您 INT, 3test您3test INT, 3test您test3 INT, + 您您3test INT, 3test您您 INT, 3test您您3test INT, 3test您您test3 INT, + 您_test4 INT, _test4您 INT, _test4您_test4 INT, test4_您_test4 INT, _test4您4test_ INT, _test4您test4_ INT, + 您您_test4 INT, _test4您您 INT, _test4您您_test4 INT, test4_您您_test4 INT, _test4您您4test_ INT, _test4您您test4_ INT, + 您_5test INT, _5test您 INT, _5test您_5test INT, 5test_您_test5 INT, _4test您test4_ INT, + test_日期 varchar(256), test_道_2 varchar(256) NOT NULL , + test_道渠您_3 + BIGINT NOT NULL, + 道您3_test INT, + PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=gb2312; + )sql", + R"sql( + CREATE TABLE gb2312.`道渠` ( `id` int NOT NULL, + `您` INT, + `道渠` DATETIME, + `您test` INT, `test您` INT, `test您test` INT, + `道渠test` INT, `test道渠` INT, `test道渠test` INT, + `您_` INT, `_您` INT, `_您_` INT, + `您您__` INT, `__您您` INT, `__您您__` INT, + `您2` INT, `2您` INT, `2您2` INT, + `您您22` INT, `22您您` INT, `22您您22` INT, + `您_2` INT, `_2您` INT, `_2您_2` INT, `_2您2_` INT, `2_您_2` INT, + `您您__22` INT, `__22您您` INT, `__22您您__22` INT, `__22您您22__` INT, `22__您您__22` INT, + `您2_` INT, `2_您` INT, `2_您2_` INT, + `您您22__` INT, `22__您您` INT, `22__您您22__` INT, + `您_test` INT, `_test您` INT, `_test您_test` INT, `_test您test_` INT, `test_您test_` INT, `test_您_test` INT, + `您您_test` INT, `_test您您` INT, `_test您您_test` INT, `_test您您test_` INT, `test_您您test_` INT, `test_您您_test` INT, + `您test3` INT, `test3您` INT, `test3您test3` INT, `test3您3test` INT, + `您您test3` INT, `test3您您` INT, `test3您您test3` INT, `test3您您3test` INT, + `您3test` INT, `3test您` INT, `3test您3test` INT, `3test您test3` INT, + `您您3test` INT, `3test您您` INT, `3test您您3test` INT, `3test您您test3` INT, + `您_test4` INT, `_test4您` INT, `_test4您_test4` INT, `test4_您_test4` INT, `_test4您4test_` INT, `_test4您test4_` INT, + `您您_test4` INT, `_test4您您` INT, `_test4您您_test4` INT, `test4_您您_test4` INT, `_test4您您4test_` INT, `_test4您您test4_` INT, + `您_5test` INT, `_5test您` INT, `_5test您_5test` INT, `5test_您_test5` INT, `_4test您test4_` INT, + `test_日期` varchar(256), `test_道_2` varchar(256) NOT NULL , + `test_道渠您_3` + BIGINT NOT NULL, + `道您3_test` INT, + PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=gb2312; + )sql", + true + }, +})); diff --git a/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp new file mode 100644 index 00000000000..cd4603ddaec --- /dev/null +++ b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp @@ -0,0 +1,96 @@ +#include +#include +#include + +namespace DB +{ + +/// Checks if there are no any tokens (like whitespaces) between current and previous pos +static bool noWhitespaces(const char * to, const char * from) +{ + return static_cast(from - to) == 0; +} + +/// Checks if the token should be quoted too together with unrecognized +static bool isWordOrNumber(TokenType type) +{ + return type == TokenType::BareWord || type == TokenType::Number; +} + +static void quoteLiteral( + IParser::Pos & pos, + IParser::Pos & pos_prev, + const char *& pos_unrecognized, + const char *& copy_from, + String & rewritten_query) +{ + /// Copy also whitespaces if any + const auto * end = + isWordOrNumber(pos->type) && noWhitespaces(pos_prev->end, pos->begin) + ? pos->end + : pos_prev->end; + String literal(pos_unrecognized, static_cast(end - pos_unrecognized)); + rewritten_query.append(copy_from, pos_unrecognized - copy_from).append(backQuoteMySQL(literal)); + copy_from = end; +} + +bool tryQuoteUnrecognizedTokens(const String & query, String & res) +{ + Tokens tokens(query.data(), query.data() + query.size()); + IParser::Pos pos(tokens, 0); + Expected expected; + String rewritten_query; + const char * copy_from = query.data(); + auto pos_prev = pos; + const char * pos_unrecognized = nullptr; + for (;pos->type != TokenType::EndOfStream; ++pos) + { + /// Commit quotes if any whitespaces found or the token is not a word + bool commit = !noWhitespaces(pos_prev->end, pos->begin) || (pos->type != TokenType::Error && !isWordOrNumber(pos->type)); + if (pos_unrecognized && commit) + { + quoteLiteral( + pos, + pos_prev, + pos_unrecognized, + copy_from, + rewritten_query); + pos_unrecognized = nullptr; + } + if (pos->type == TokenType::Error) + { + /// Find first appearance of the error token + if (!pos_unrecognized) + { + pos_unrecognized = + isWordOrNumber(pos_prev->type) && noWhitespaces(pos_prev->end, pos->begin) + ? pos_prev->begin + : pos->begin; + } + } + pos_prev = pos; + } + + /// There was EndOfStream but not committed unrecognized token + if (pos_unrecognized) + { + quoteLiteral( + pos, + pos_prev, + pos_unrecognized, + copy_from, + rewritten_query); + pos_unrecognized = nullptr; + } + + /// If no Errors found + if (copy_from == query.data()) + return false; + + auto size = static_cast(pos->end - copy_from); + rewritten_query.append(copy_from, size); + res = rewritten_query; + return true; +} + +} diff --git a/src/Databases/MySQL/tryQuoteUnrecognizedTokens.h b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.h new file mode 100644 index 00000000000..582a297c485 --- /dev/null +++ b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ + +bool tryQuoteUnrecognizedTokens(const String & query, String & res); + +} diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index 04807bb3daf..f2b970a39af 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -35,6 +35,7 @@ namespace ErrorCodes extern const int UNKNOWN_TABLE; extern const int BAD_ARGUMENTS; extern const int NOT_IMPLEMENTED; + extern const int CANNOT_GET_CREATE_TABLE_QUERY; } DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL( @@ -221,10 +222,25 @@ ASTPtr DatabaseMaterializedPostgreSQL::getCreateTableQueryImpl(const String & ta std::lock_guard lock(handler_mutex); - /// FIXME TSA - auto storage = std::make_shared(StorageID(TSA_SUPPRESS_WARNING_FOR_READ(database_name), table_name), getContext(), remote_database_name, table_name); - auto ast_storage = replication_handler->getCreateNestedTableQuery(storage.get(), table_name); - assert_cast(ast_storage.get())->uuid = UUIDHelpers::generateV4(); + ASTPtr ast_storage; + try + { + auto storage = std::make_shared(StorageID(TSA_SUPPRESS_WARNING_FOR_READ(database_name), table_name), getContext(), remote_database_name, table_name); + ast_storage = replication_handler->getCreateNestedTableQuery(storage.get(), table_name); + assert_cast(ast_storage.get())->uuid = UUIDHelpers::generateV4(); + } + catch (...) + { + if (throw_on_error) + { + throw Exception(ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY, + "Received error while fetching table structure for table {} from PostgreSQL: {}", + backQuote(table_name), getCurrentExceptionMessage(true)); + } + + tryLogCurrentException(__PRETTY_FUNCTION__); + } + return ast_storage; } diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index 812a0d8717e..e90dcfcd8ad 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -54,6 +54,7 @@ DatabasePostgreSQL::DatabasePostgreSQL( , cache_tables(cache_tables_) , log(&Poco::Logger::get("DatabasePostgreSQL(" + dbname_ + ")")) { + fs::create_directories(metadata_path); cleaner_task = getContext()->getSchedulePool().createTask("PostgreSQLCleanerTask", [this]{ removeOutdatedTables(); }); cleaner_task->deactivate(); } @@ -390,6 +391,7 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co auto create_table_query = std::make_shared(); auto table_storage_define = database_engine_define->clone(); + table_storage_define->as()->engine->kind = ASTFunction::Kind::TABLE_ENGINE; create_table_query->set(create_table_query->storage, table_storage_define); auto columns_declare_list = std::make_shared(); diff --git a/src/Databases/SQLite/DatabaseSQLite.cpp b/src/Databases/SQLite/DatabaseSQLite.cpp index 1cba9d1dc26..d031fd8e420 100644 --- a/src/Databases/SQLite/DatabaseSQLite.cpp +++ b/src/Databases/SQLite/DatabaseSQLite.cpp @@ -187,6 +187,7 @@ ASTPtr DatabaseSQLite::getCreateTableQueryImpl(const String & table_name, Contex } auto table_storage_define = database_engine_define->clone(); ASTStorage * ast_storage = table_storage_define->as(); + ast_storage->engine->kind = ASTFunction::Kind::TABLE_ENGINE; auto storage_engine_arguments = ast_storage->engine->arguments; auto table_id = storage->getStorageID(); /// Add table_name to engine arguments diff --git a/src/Dictionaries/CMakeLists.txt b/src/Dictionaries/CMakeLists.txt index c9dd554a6f1..90d2fedceac 100644 --- a/src/Dictionaries/CMakeLists.txt +++ b/src/Dictionaries/CMakeLists.txt @@ -16,10 +16,20 @@ if (OMIT_HEAVY_DEBUG_SYMBOLS) PROPERTIES COMPILE_FLAGS -g0) endif() -list(REMOVE_ITEM clickhouse_dictionaries_sources DictionaryFactory.cpp DictionarySourceFactory.cpp DictionaryStructure.cpp getDictionaryConfigurationFromAST.cpp) -list(REMOVE_ITEM clickhouse_dictionaries_headers DictionaryFactory.h DictionarySourceFactory.h DictionaryStructure.h getDictionaryConfigurationFromAST.h) +extract_into_parent_list(clickhouse_dictionaries_sources dbms_sources + DictionaryFactory.cpp + DictionarySourceFactory.cpp + DictionaryStructure.cpp + getDictionaryConfigurationFromAST.cpp +) +extract_into_parent_list(clickhouse_dictionaries_headers dbms_headers + DictionaryFactory.h + DictionarySourceFactory.h + DictionaryStructure.h + getDictionaryConfigurationFromAST.h +) -add_library(clickhouse_dictionaries ${clickhouse_dictionaries_sources}) +add_library(clickhouse_dictionaries ${clickhouse_dictionaries_headers} ${clickhouse_dictionaries_sources}) target_link_libraries(clickhouse_dictionaries PRIVATE diff --git a/src/Dictionaries/DictionaryFactory.cpp b/src/Dictionaries/DictionaryFactory.cpp index d091e49d1f0..c3102632167 100644 --- a/src/Dictionaries/DictionaryFactory.cpp +++ b/src/Dictionaries/DictionaryFactory.cpp @@ -17,13 +17,13 @@ namespace ErrorCodes extern const int UNKNOWN_ELEMENT_IN_CONFIG; } -void DictionaryFactory::registerLayout(const std::string & layout_type, LayoutCreateFunction create_layout, bool is_layout_complex) +void DictionaryFactory::registerLayout(const std::string & layout_type, LayoutCreateFunction create_layout, bool is_layout_complex, bool has_layout_complex) { auto it = registered_layouts.find(layout_type); if (it != registered_layouts.end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "DictionaryFactory: the layout name '{}' is not unique", layout_type); - RegisteredLayout layout { .layout_create_function = create_layout, .is_layout_complex = is_layout_complex }; + RegisteredLayout layout { .layout_create_function = create_layout, .is_layout_complex = is_layout_complex, .has_layout_complex = has_layout_complex }; registered_layouts.emplace(layout_type, std::move(layout)); } @@ -89,6 +89,25 @@ bool DictionaryFactory::isComplex(const std::string & layout_type) const return it->second.is_layout_complex; } +bool DictionaryFactory::convertToComplex(std::string & layout_type) const +{ + auto it = registered_layouts.find(layout_type); + + if (it == registered_layouts.end()) + { + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, + "Unknown dictionary layout type: {}", + layout_type); + } + + if (!it->second.is_layout_complex && it->second.has_layout_complex) + { + layout_type = "complex_key_" + layout_type; + return true; + } + return false; +} + DictionaryFactory & DictionaryFactory::instance() { diff --git a/src/Dictionaries/DictionaryFactory.h b/src/Dictionaries/DictionaryFactory.h index b1dad340f4b..35097a5ed24 100644 --- a/src/Dictionaries/DictionaryFactory.h +++ b/src/Dictionaries/DictionaryFactory.h @@ -55,13 +55,18 @@ public: bool isComplex(const std::string & layout_type) const; - void registerLayout(const std::string & layout_type, LayoutCreateFunction create_layout, bool is_layout_complex); + /// If the argument `layout_type` is not complex layout and has corresponding complex layout, + /// change `layout_type` to corresponding complex and return true; otherwise do nothing and return false. + bool convertToComplex(std::string & layout_type) const; + + void registerLayout(const std::string & layout_type, LayoutCreateFunction create_layout, bool is_layout_complex, bool has_layout_complex = true); private: struct RegisteredLayout { LayoutCreateFunction layout_create_function; bool is_layout_complex; + bool has_layout_complex; }; using LayoutRegistry = std::unordered_map; diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h index f726a8a2a46..55060b1592f 100644 --- a/src/Dictionaries/DictionaryStructure.h +++ b/src/Dictionaries/DictionaryStructure.h @@ -34,7 +34,9 @@ enum class AttributeUnderlyingType : TypeIndexUnderlying map_item(Decimal32), map_item(Decimal64), map_item(Decimal128), map_item(Decimal256), map_item(DateTime64), - map_item(UUID), map_item(String), map_item(Array) + map_item(UUID), map_item(String), map_item(Array), + + map_item(IPv4), map_item(IPv6) }; #undef map_item diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index d84967fbae6..36a0642abce 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -288,7 +288,8 @@ public: : ISource(pipeline_.getHeader()) , pipeline(std::move(pipeline_)) , executor(pipeline) - {} + { + } std::string getName() const override { diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp index e40ef07de9e..f1acd610274 100644 --- a/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/src/Dictionaries/ExecutableDictionarySource.cpp @@ -114,10 +114,7 @@ QueryPipeline ExecutableDictionarySource::loadAll() auto command = configuration.command; updateCommandIfNeeded(command, coordinator_configuration.execute_direct, context); - ShellCommandSourceConfiguration command_configuration { - .check_exit_code = true, - }; - return QueryPipeline(coordinator->createPipe(command, configuration.command_arguments, {}, sample_block, context, command_configuration)); + return QueryPipeline(coordinator->createPipe(command, configuration.command_arguments, {}, sample_block, context)); } QueryPipeline ExecutableDictionarySource::loadUpdatedAll() @@ -152,10 +149,7 @@ QueryPipeline ExecutableDictionarySource::loadUpdatedAll() LOG_TRACE(log, "loadUpdatedAll {}", command); - ShellCommandSourceConfiguration command_configuration { - .check_exit_code = true, - }; - return QueryPipeline(coordinator->createPipe(command, command_arguments, {}, sample_block, context, command_configuration)); + return QueryPipeline(coordinator->createPipe(command, command_arguments, {}, sample_block, context)); } QueryPipeline ExecutableDictionarySource::loadIds(const std::vector & ids) @@ -186,11 +180,7 @@ QueryPipeline ExecutableDictionarySource::getStreamForBlock(const Block & block) Pipes shell_input_pipes; shell_input_pipes.emplace_back(std::move(shell_input_pipe)); - ShellCommandSourceConfiguration command_configuration { - .check_exit_code = true, - }; - - auto pipe = coordinator->createPipe(command, configuration.command_arguments, std::move(shell_input_pipes), sample_block, context, command_configuration); + auto pipe = coordinator->createPipe(command, configuration.command_arguments, std::move(shell_input_pipes), sample_block, context); if (configuration.implicit_key) pipe.addTransform(std::make_shared(block, pipe.getHeader())); @@ -275,6 +265,8 @@ void registerDictionarySourceExecutable(DictionarySourceFactory & factory) .command_termination_timeout_seconds = config.getUInt64(settings_config_prefix + ".command_termination_timeout", 10), .command_read_timeout_milliseconds = config.getUInt64(settings_config_prefix + ".command_read_timeout", 10000), .command_write_timeout_milliseconds = config.getUInt64(settings_config_prefix + ".command_write_timeout", 10000), + .stderr_reaction = parseExternalCommandStderrReaction(config.getString(settings_config_prefix + ".stderr_reaction", "none")), + .check_exit_code = config.getBool(settings_config_prefix + ".check_exit_code", true), .is_executable_pool = false, .send_chunk_header = config.getBool(settings_config_prefix + ".send_chunk_header", false), .execute_direct = config.getBool(settings_config_prefix + ".execute_direct", false) diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index 94685060a46..d28c73c9c52 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -132,7 +132,6 @@ QueryPipeline ExecutablePoolDictionarySource::getStreamForBlock(const Block & bl ShellCommandSourceConfiguration command_configuration; command_configuration.read_fixed_number_of_rows = true; command_configuration.number_of_rows_to_read = block.rows(); - command_configuration.check_exit_code = true; Pipes shell_input_pipes; shell_input_pipes.emplace_back(std::move(shell_input_pipe)); @@ -233,6 +232,8 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory) .command_termination_timeout_seconds = config.getUInt64(settings_config_prefix + ".command_termination_timeout", 10), .command_read_timeout_milliseconds = config.getUInt64(settings_config_prefix + ".command_read_timeout", 10000), .command_write_timeout_milliseconds = config.getUInt64(settings_config_prefix + ".command_write_timeout", 10000), + .stderr_reaction = parseExternalCommandStderrReaction(config.getString(settings_config_prefix + ".stderr_reaction", "none")), + .check_exit_code = config.getBool(settings_config_prefix + ".check_exit_code", true), .pool_size = config.getUInt64(settings_config_prefix + ".pool_size", 16), .max_command_execution_time_seconds = max_command_execution_time, .is_executable_pool = true, diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index d3699a150c4..b06137740da 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -395,11 +395,15 @@ void FlatDictionary::updateData() if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) { QueryPipeline pipeline(source_ptr->loadUpdatedAll()); - PullingPipelineExecutor executor(pipeline); + update_field_loaded_block.reset(); Block block; + while (executor.pull(block)) { + if (!block.rows()) + continue; + convertToFullIfSparse(block); /// We are using this to keep saved data if input stream consists of multiple blocks @@ -683,7 +687,7 @@ void registerDictionaryFlat(DictionaryFactory & factory) return std::make_unique(dict_id, dict_struct, std::move(source_ptr), configuration); }; - factory.registerLayout("flat", create_layout, false); + factory.registerLayout("flat", create_layout, false, false); } diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 880f68cea95..45525f1468b 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -409,11 +409,17 @@ void HashedArrayDictionary::updateData() if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) { QueryPipeline pipeline(source_ptr->loadUpdatedAll()); - PullingPipelineExecutor executor(pipeline); + update_field_loaded_block.reset(); Block block; + while (executor.pull(block)) { + if (!block.rows()) + continue; + + convertToFullIfSparse(block); + /// We are using this to keep saved data if input stream consists of multiple blocks if (!update_field_loaded_block) update_field_loaded_block = std::make_shared(block.cloneEmpty()); diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 5f25600db8f..d6ee6e369c4 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -25,6 +25,7 @@ #include #include + namespace CurrentMetrics { extern const Metric HashedDictionaryThreads; @@ -708,11 +709,15 @@ void HashedDictionary::updateData() if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) { QueryPipeline pipeline(source_ptr->loadUpdatedAll()); - PullingPipelineExecutor executor(pipeline); + update_field_loaded_block.reset(); Block block; + while (executor.pull(block)) { + if (!block.rows()) + continue; + convertToFullIfSparse(block); /// We are using this to keep saved data if input stream consists of multiple blocks diff --git a/src/Dictionaries/PolygonDictionaryUtils.h b/src/Dictionaries/PolygonDictionaryUtils.h index 94b8b961577..0238ef0b2b9 100644 --- a/src/Dictionaries/PolygonDictionaryUtils.h +++ b/src/Dictionaries/PolygonDictionaryUtils.h @@ -13,6 +13,7 @@ #include + namespace DB { diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index e1c2168cea3..3f9bad941d5 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -919,11 +919,17 @@ void RangeHashedDictionary::updateData() if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) { QueryPipeline pipeline(source_ptr->loadUpdatedAll()); - PullingPipelineExecutor executor(pipeline); + update_field_loaded_block.reset(); Block block; + while (executor.pull(block)) { + if (!block.rows()) + continue; + + convertToFullIfSparse(block); + /// We are using this to keep saved data if input stream consists of multiple blocks if (!update_field_loaded_block) update_field_loaded_block = std::make_shared(block.cloneEmpty()); diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index a9846dc06e9..29ef71b3ce0 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -206,6 +206,8 @@ void RegExpTreeDictionary::initRegexNodes(Block & block) re2_st::RE2::Options regexp_options; regexp_options.set_log_errors(false); + regexp_options.set_case_sensitive(!flag_case_insensitive); + regexp_options.set_dot_nl(flag_dotall); RegexTreeNodePtr node = std::make_shared(id, parent_id, regex, regexp_options); int num_captures = std::min(node->searcher.NumberOfCapturingGroups() + 1, 10); @@ -228,7 +230,7 @@ void RegExpTreeDictionary::initRegexNodes(Block & block) else { Field field = parseStringToField(value, attr.type); - node->attributes[name_] = RegexTreeNode::AttributeValue{.field = std::move(field), .original_value = value}; + node->attributes[name_] = RegexTreeNode::AttributeValue{.field = std::move(field), .pieces = {}, .original_value = value}; } } } @@ -330,11 +332,20 @@ void RegExpTreeDictionary::loadData() std::vector flags; std::vector lengths; + // Notes: + // - Always set HS_FLAG_SINGLEMATCH because we only care about whether a pattern matches at least once + // - HS_FLAG_CASELESS is supported by hs_compile_lit_multi, so we should set it if flag_case_insensitive is set. + // - HS_FLAG_DOTALL is not supported by hs_compile_lit_multi, but the '.' wildcard can't appear in any of the simple regexps + // anyway, so even if flag_dotall is set, we only need to configure the RE2 searcher, and don't need to set any Hyperscan flags. + unsigned int flag_bits = HS_FLAG_SINGLEMATCH; + if (flag_case_insensitive) + flag_bits |= HS_FLAG_CASELESS; + for (const std::string & simple_regexp : simple_regexps) { patterns.push_back(simple_regexp.data()); lengths.push_back(simple_regexp.size()); - flags.push_back(HS_FLAG_SINGLEMATCH); + flags.push_back(flag_bits); } hs_database_t * db = nullptr; @@ -346,7 +357,7 @@ void RegExpTreeDictionary::loadData() ids[i] = static_cast(i+1); hs_error_t err = hs_compile_lit_multi(patterns.data(), flags.data(), ids.get(), lengths.data(), static_cast(patterns.size()), HS_MODE_BLOCK, nullptr, &db, &compile_error); - origin_db = (db); + origin_db.reset(db); if (err != HS_SUCCESS) { /// CompilerError is a unique_ptr, so correct memory free after the exception is thrown. @@ -380,12 +391,16 @@ RegExpTreeDictionary::RegExpTreeDictionary( const DictionaryStructure & structure_, DictionarySourcePtr source_ptr_, Configuration configuration_, - bool use_vectorscan_) + bool use_vectorscan_, + bool flag_case_insensitive_, + bool flag_dotall_) : IDictionary(id_), structure(structure_), source_ptr(source_ptr_), configuration(configuration_), use_vectorscan(use_vectorscan_), + flag_case_insensitive(flag_case_insensitive_), + flag_dotall(flag_dotall_), logger(&Poco::Logger::get("RegExpTreeDictionary")) { if (auto * ch_source = typeid_cast(source_ptr.get())) @@ -658,7 +673,7 @@ std::unordered_map RegExpTreeDictionary::match( }; hs_error_t err = hs_scan( - origin_db, + origin_db.get(), reinterpret_cast(keys_data.data()) + offset, static_cast(length), 0, @@ -859,7 +874,14 @@ void registerDictionaryRegExpTree(DictionaryFactory & factory) auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix); - return std::make_unique(dict_id, dict_struct, std::move(source_ptr), configuration, context->getSettings().regexp_dict_allow_hyperscan); + return std::make_unique( + dict_id, + dict_struct, + std::move(source_ptr), + configuration, + context->getSettings().regexp_dict_allow_hyperscan, + context->getSettings().regexp_dict_flag_case_insensitive, + context->getSettings().regexp_dict_flag_dotall); }; factory.registerLayout("regexp_tree", create_layout, true); diff --git a/src/Dictionaries/RegExpTreeDictionary.h b/src/Dictionaries/RegExpTreeDictionary.h index 30966184eb6..62008bb5aae 100644 --- a/src/Dictionaries/RegExpTreeDictionary.h +++ b/src/Dictionaries/RegExpTreeDictionary.h @@ -49,7 +49,9 @@ public: const DictionaryStructure & structure_, DictionarySourcePtr source_ptr_, Configuration configuration_, - bool use_vectorscan_); + bool use_vectorscan_, + bool flag_case_insensitive_, + bool flag_dotall_); std::string getTypeName() const override { return name; } @@ -85,7 +87,8 @@ public: std::shared_ptr clone() const override { - return std::make_shared(getDictionaryID(), structure, source_ptr->clone(), configuration, use_vectorscan); + return std::make_shared( + getDictionaryID(), structure, source_ptr->clone(), configuration, use_vectorscan, flag_case_insensitive, flag_dotall); } ColumnUInt8::Ptr hasKeys(const Columns &, const DataTypes &) const override @@ -189,6 +192,8 @@ private: using RegexTreeNodePtr = std::shared_ptr; bool use_vectorscan; + bool flag_case_insensitive; + bool flag_dotall; std::vector simple_regexps; std::vector regexp_ids; @@ -199,7 +204,7 @@ private: #if USE_VECTORSCAN MultiRegexps::DeferredConstructedRegexpsPtr hyperscan_regex; MultiRegexps::ScratchPtr origin_scratch; - hs_database_t* origin_db; + MultiRegexps::DataBasePtr origin_db; #endif Poco::Logger * logger; diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 0b7352e9cbb..98f115b2ed8 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace DB @@ -322,7 +323,7 @@ void buildSingleAttribute( /** Transforms - * PRIMARY KEY Attr1 ,..., AttrN + * PRIMARY KEY Attr1, ..., AttrN * to the next configuration * Attr1 * or @@ -614,6 +615,16 @@ getDictionaryConfigurationFromAST(const ASTCreateQuery & query, ContextPtr conte checkPrimaryKey(all_attr_names_and_types, pk_attrs); + /// If the pk size is 1 and pk's DataType is not number, we should convert to complex. + /// NOTE: the data type of Numeric key(simple layout) is UInt64, so if the type is not under UInt64, type casting will lead to precision loss. + DataTypePtr first_key_type = DataTypeFactory::instance().get(all_attr_names_and_types.find(pk_attrs[0])->second.type); + if ((pk_attrs.size() > 1 || (pk_attrs.size() == 1 && !isNumber(first_key_type))) + && !complex + && DictionaryFactory::instance().convertToComplex(dictionary_layout->layout_type)) + { + complex = true; + } + buildPrimaryKeyConfiguration(xml_document, structure_element, complex, pk_attrs, query.dictionary_attributes_list); buildLayoutConfiguration(xml_document, current_dictionary, query.dictionary->dict_settings, dictionary_layout); diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index 441e639b967..ca7cbf443f2 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -324,7 +324,7 @@ ReservationPtr DiskEncrypted::reserve(UInt64 bytes) } -void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) +void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir, const WriteSettings & settings) { /// Check if we can copy the file without deciphering. if (isSameDiskType(*this, *to_disk)) @@ -340,14 +340,14 @@ void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::sha auto wrapped_from_path = wrappedPath(from_dir); auto to_delegate = to_disk_enc->delegate; auto wrapped_to_path = to_disk_enc->wrappedPath(to_dir); - delegate->copyDirectoryContent(wrapped_from_path, to_delegate, wrapped_to_path); + delegate->copyDirectoryContent(wrapped_from_path, to_delegate, wrapped_to_path, settings); return; } } } /// Copy the file through buffers with deciphering. - IDisk::copyDirectoryContent(from_dir, to_disk, to_dir); + IDisk::copyDirectoryContent(from_dir, to_disk, to_dir, settings); } std::unique_ptr DiskEncrypted::readFile( @@ -433,10 +433,10 @@ void DiskEncrypted::applyNewSettings( { auto new_settings = parseDiskEncryptedSettings(name, config, config_prefix, disk_map); if (new_settings->wrapped_disk != delegate) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging wrapped disk on the fly is not supported. Disk {}", name); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Changing wrapped disk on the fly is not supported. Disk {}", name); if (new_settings->disk_path != disk_path) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging disk path on the fly is not supported. Disk {}", name); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Changing disk path on the fly is not supported. Disk {}", name); current_settings.set(std::move(new_settings)); IDisk::applyNewSettings(config, context, config_prefix, disk_map); diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index f7db4f398c5..2252e4f43f5 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -112,7 +112,7 @@ public: delegate->listFiles(wrapped_path, file_names); } - void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) override; + void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir, const WriteSettings & settings) override; std::unique_ptr readFile( const String & path, diff --git a/src/Disks/DiskEncryptedTransaction.cpp b/src/Disks/DiskEncryptedTransaction.cpp index 40df94b309a..3fd2085f9cc 100644 --- a/src/Disks/DiskEncryptedTransaction.cpp +++ b/src/Disks/DiskEncryptedTransaction.cpp @@ -53,11 +53,11 @@ String DiskEncryptedSettings::findKeyByFingerprint(UInt128 key_fingerprint, cons return it->second; } -void DiskEncryptedTransaction::copyFile(const std::string & from_file_path, const std::string & to_file_path) +void DiskEncryptedTransaction::copyFile(const std::string & from_file_path, const std::string & to_file_path, const WriteSettings & settings) { auto wrapped_from_path = wrappedPath(from_file_path); auto wrapped_to_path = wrappedPath(to_file_path); - delegate_transaction->copyFile(wrapped_from_path, wrapped_to_path); + delegate_transaction->copyFile(wrapped_from_path, wrapped_to_path, settings); } std::unique_ptr DiskEncryptedTransaction::writeFile( // NOLINT diff --git a/src/Disks/DiskEncryptedTransaction.h b/src/Disks/DiskEncryptedTransaction.h index 04cc63f1671..70ed1f469ef 100644 --- a/src/Disks/DiskEncryptedTransaction.h +++ b/src/Disks/DiskEncryptedTransaction.h @@ -116,7 +116,7 @@ public: /// but it's impossible to implement correctly in transactions because other disk can /// use different metadata storage. /// TODO: maybe remove it at all, we don't want copies - void copyFile(const std::string & from_file_path, const std::string & to_file_path) override; + void copyFile(const std::string & from_file_path, const std::string & to_file_path, const WriteSettings & settings) override; /// Open the file for write and return WriteBufferFromFileBase object. std::unique_ptr writeFile( /// NOLINT diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index d020145b2c3..aaa22655f7b 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -432,12 +432,13 @@ bool inline isSameDiskType(const IDisk & one, const IDisk & another) return typeid(one) == typeid(another); } -void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) +void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir, const WriteSettings & settings) { - if (isSameDiskType(*this, *to_disk)) + /// If throttling was configured we cannot use copying directly. + if (isSameDiskType(*this, *to_disk) && !settings.local_throttler) fs::copy(fs::path(disk_path) / from_dir, fs::path(to_disk->getPath()) / to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. else - IDisk::copyDirectoryContent(from_dir, to_disk, to_dir); + IDisk::copyDirectoryContent(from_dir, to_disk, to_dir, settings); } SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 97118e5e18c..197f6bb9367 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -65,7 +65,7 @@ public: void replaceFile(const String & from_path, const String & to_path) override; - void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) override; + void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir, const WriteSettings & settings) override; void listFiles(const String & path, std::vector & file_names) const override; diff --git a/src/Disks/FakeDiskTransaction.h b/src/Disks/FakeDiskTransaction.h index 2cf540444be..440ee6271e9 100644 --- a/src/Disks/FakeDiskTransaction.h +++ b/src/Disks/FakeDiskTransaction.h @@ -54,9 +54,9 @@ public: disk.replaceFile(from_path, to_path); } - void copyFile(const std::string & from_file_path, const std::string & to_file_path) override + void copyFile(const std::string & from_file_path, const std::string & to_file_path, const WriteSettings & settings) override { - disk.copyFile(from_file_path, disk, to_file_path); + disk.copyFile(from_file_path, disk, to_file_path, settings); } std::unique_ptr writeFile( /// NOLINT diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp index 544ba014fde..5b9f1208622 100644 --- a/src/Disks/IDisk.cpp +++ b/src/Disks/IDisk.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -122,11 +123,10 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p } } -void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr & to_disk, const String & to_path, bool copy_root_dir) +void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr & to_disk, const String & to_path, bool copy_root_dir, WriteSettings settings) { ResultsCollector results; - WriteSettings settings; /// Disable parallel write. We already copy in parallel. /// Avoid high memory usage. See test_s3_zero_copy_ttl/test.py::test_move_and_s3_memory_usage settings.s3_allow_parallel_part_upload = false; @@ -140,12 +140,12 @@ void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr & to_disk, const String & to_dir) +void IDisk::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir, const WriteSettings & settings) { if (!to_disk->exists(to_dir)) to_disk->createDirectories(to_dir); - copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir */ false); + copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir= */ false, settings); } void IDisk::truncateFile(const String &, size_t) diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 2b0ca369a96..fc4eaec428c 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -193,7 +193,7 @@ public: virtual void replaceFile(const String & from_path, const String & to_path) = 0; /// Recursively copy files from from_dir to to_dir. Create to_dir if not exists. - virtual void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir); + virtual void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir, const WriteSettings & settings); /// Copy file `from_file_path` to `to_file_path` located at `to_disk`. virtual void copyFile( /// NOLINT @@ -470,7 +470,7 @@ protected: /// Base implementation of the function copy(). /// It just opens two files, reads data by portions from the first file, and writes it to the second one. /// A derived class may override copy() to provide a faster implementation. - void copyThroughBuffers(const String & from_path, const std::shared_ptr & to_disk, const String & to_path, bool copy_root_dir = true); + void copyThroughBuffers(const String & from_path, const std::shared_ptr & to_disk, const String & to_path, bool copy_root_dir, WriteSettings settings); virtual void checkAccessImpl(const String & path); diff --git a/src/Disks/IDiskTransaction.h b/src/Disks/IDiskTransaction.h index 935cd6b2c65..9f18206a4ad 100644 --- a/src/Disks/IDiskTransaction.h +++ b/src/Disks/IDiskTransaction.h @@ -59,7 +59,7 @@ public: /// but it's impossible to implement correctly in transactions because other disk can /// use different metadata storage. /// TODO: maybe remove it at all, we don't want copies - virtual void copyFile(const std::string & from_file_path, const std::string & to_file_path) = 0; + virtual void copyFile(const std::string & from_file_path, const std::string & to_file_path, const WriteSettings & settings = {}) = 0; /// Open the file for write and return WriteBufferFromFileBase object. virtual std::unique_ptr writeFile( /// NOLINT diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp index 86ee541dcbd..d52748b04bf 100644 --- a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp @@ -81,8 +81,7 @@ bool AsynchronousBoundedReadBuffer::hasPendingDataToRead() return true; } -std::future -AsynchronousBoundedReadBuffer::asyncReadInto(char * data, size_t size, Priority priority) +std::future AsynchronousBoundedReadBuffer::readAsync(char * data, size_t size, Priority priority) { IAsynchronousReader::Request request; request.descriptor = std::make_shared(*impl, async_read_counters); @@ -94,6 +93,17 @@ AsynchronousBoundedReadBuffer::asyncReadInto(char * data, size_t size, Priority return reader.submit(request); } +IAsynchronousReader::Result AsynchronousBoundedReadBuffer::readSync(char * data, size_t size) +{ + IAsynchronousReader::Request request; + request.descriptor = std::make_shared(*impl, async_read_counters); + request.buf = data; + request.size = size; + request.offset = file_offset_of_buffer_end; + request.ignore = bytes_to_ignore; + return reader.execute(request); +} + void AsynchronousBoundedReadBuffer::prefetch(Priority priority) { if (prefetch_future.valid()) @@ -106,7 +116,7 @@ void AsynchronousBoundedReadBuffer::prefetch(Priority priority) last_prefetch_info.priority = priority; chassert(prefetch_buffer.size() == chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize())); - prefetch_future = asyncReadInto(prefetch_buffer.data(), prefetch_buffer.size(), priority); + prefetch_future = readAsync(prefetch_buffer.data(), prefetch_buffer.size(), priority); ProfileEvents::increment(ProfileEvents::RemoteFSPrefetches); } @@ -114,6 +124,26 @@ void AsynchronousBoundedReadBuffer::setReadUntilPosition(size_t position) { if (!read_until_position || position != *read_until_position) { + if (position < file_offset_of_buffer_end) + { + /// file has been read beyond new read until position already + if (working_buffer.size() >= file_offset_of_buffer_end - position) + { + /// new read until position is inside working buffer + file_offset_of_buffer_end = position; + } + else + { + /// new read until position is before working buffer begin + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Attempt to set read until position before already read data ({} > {}, info: {})", + position, + getPosition(), + impl->getInfoForLog()); + } + } + read_until_position = position; /// We must wait on future and reset the prefetch here, because otherwise there might be @@ -147,7 +177,7 @@ void AsynchronousBoundedReadBuffer::appendToPrefetchLog( }; if (prefetches_log) - prefetches_log->add(elem); + prefetches_log->add(std::move(elem)); } @@ -158,58 +188,55 @@ bool AsynchronousBoundedReadBuffer::nextImpl() chassert(file_offset_of_buffer_end <= impl->getFileSize()); - size_t size, offset; + IAsynchronousReader::Result result; if (prefetch_future.valid()) { - ProfileEventTimeIncrement watch(ProfileEvents::AsynchronousRemoteReadWaitMicroseconds); - CurrentMetrics::Increment metric_increment{CurrentMetrics::AsynchronousReadWait}; + { + ProfileEventTimeIncrement watch(ProfileEvents::AsynchronousRemoteReadWaitMicroseconds); + CurrentMetrics::Increment metric_increment{CurrentMetrics::AsynchronousReadWait}; - auto result = prefetch_future.get(); - size = result.size; - offset = result.offset; + result = prefetch_future.get(); + } prefetch_future = {}; prefetch_buffer.swap(memory); if (read_settings.enable_filesystem_read_prefetches_log) - { - appendToPrefetchLog(FilesystemPrefetchState::USED, size, result.execution_watch); - } + appendToPrefetchLog(FilesystemPrefetchState::USED, result.size, result.execution_watch); + last_prefetch_info = {}; ProfileEvents::increment(ProfileEvents::RemoteFSPrefetchedReads); - ProfileEvents::increment(ProfileEvents::RemoteFSPrefetchedBytes, size); + ProfileEvents::increment(ProfileEvents::RemoteFSPrefetchedBytes, result.size); } else { - ProfileEventTimeIncrement watch(ProfileEvents::SynchronousRemoteReadWaitMicroseconds); - chassert(memory.size() == chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize())); - std::tie(size, offset) = impl->readInto(memory.data(), memory.size(), file_offset_of_buffer_end, bytes_to_ignore); + + { + ProfileEventTimeIncrement watch(ProfileEvents::SynchronousRemoteReadWaitMicroseconds); + result = readSync(memory.data(), memory.size()); + } ProfileEvents::increment(ProfileEvents::RemoteFSUnprefetchedReads); - ProfileEvents::increment(ProfileEvents::RemoteFSUnprefetchedBytes, size); + ProfileEvents::increment(ProfileEvents::RemoteFSUnprefetchedBytes, result.size); } - bytes_to_ignore = 0; - - chassert(size >= offset); - - size_t bytes_read = size - offset; + size_t bytes_read = result.size - result.offset; if (bytes_read) { /// Adjust the working buffer so that it ignores `offset` bytes. internal_buffer = Buffer(memory.data(), memory.data() + memory.size()); - working_buffer = Buffer(memory.data() + offset, memory.data() + size); + working_buffer = Buffer(memory.data() + result.offset, memory.data() + result.size); pos = working_buffer.begin(); } file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd(); + bytes_to_ignore = 0; /// In case of multiple files for the same file in clickhouse (i.e. log family) /// file_offset_of_buffer_end will not match getImplementationBufferOffset() /// so we use [impl->getImplementationBufferOffset(), impl->getFileSize()] - chassert(file_offset_of_buffer_end >= impl->getFileOffsetOfBufferEnd()); chassert(file_offset_of_buffer_end <= impl->getFileSize()); return bytes_read; @@ -248,7 +275,6 @@ off_t AsynchronousBoundedReadBuffer::seek(off_t offset, int whence) { /// Position is still inside the buffer. /// Probably it is at the end of the buffer - then we will load data on the following 'next' call. - pos = working_buffer.end() - file_offset_of_buffer_end + new_pos; assert(pos >= working_buffer.begin()); assert(pos <= working_buffer.end()); @@ -285,7 +311,8 @@ off_t AsynchronousBoundedReadBuffer::seek(off_t offset, int whence) if (read_until_position && new_pos > *read_until_position) { - ProfileEvents::increment(ProfileEvents::RemoteFSSeeksWithReset); + if (!impl->seekIsCheap()) + ProfileEvents::increment(ProfileEvents::RemoteFSSeeksWithReset); file_offset_of_buffer_end = new_pos = *read_until_position; /// read_until_position is a non-included boundary. impl->seek(file_offset_of_buffer_end, SEEK_SET); return new_pos; @@ -303,7 +330,8 @@ off_t AsynchronousBoundedReadBuffer::seek(off_t offset, int whence) } else { - ProfileEvents::increment(ProfileEvents::RemoteFSSeeksWithReset); + if (!impl->seekIsCheap()) + ProfileEvents::increment(ProfileEvents::RemoteFSSeeksWithReset); file_offset_of_buffer_end = new_pos; impl->seek(file_offset_of_buffer_end, SEEK_SET); } diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.h b/src/Disks/IO/AsynchronousBoundedReadBuffer.h index c307bd8e214..c43b08ce2b0 100644 --- a/src/Disks/IO/AsynchronousBoundedReadBuffer.h +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.h @@ -46,6 +46,8 @@ public: void setReadUntilEnd() override { return setReadUntilPosition(getFileSize()); } + size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; } + off_t getPosition() override { return file_offset_of_buffer_end - available() + bytes_to_ignore; } private: @@ -88,7 +90,9 @@ private: int64_t size, const std::unique_ptr & execution_watch); - std::future asyncReadInto(char * data, size_t size, Priority priority); + std::future readAsync(char * data, size_t size, Priority priority); + + IAsynchronousReader::Result readSync(char * data, size_t size); void resetPrefetch(FilesystemPrefetchState state); diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 6674eefeab1..1cfdd96b271 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -74,19 +74,22 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile( } void CachedOnDiskReadBufferFromFile::appendFilesystemCacheLog( - const FileSegment::Range & file_segment_range, CachedOnDiskReadBufferFromFile::ReadType type) + const FileSegment & file_segment, CachedOnDiskReadBufferFromFile::ReadType type) { if (!cache_log) return; + const auto range = file_segment.range(); FilesystemCacheLogElement elem { .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), .query_id = query_id, .source_file_path = source_file_path, - .file_segment_range = { file_segment_range.left, file_segment_range.right }, + .file_segment_range = { range.left, range.right }, .requested_range = { first_offset, read_until_position }, - .file_segment_size = file_segment_range.size(), + .file_segment_key = file_segment.key().toString(), + .file_segment_offset = file_segment.offset(), + .file_segment_size = range.size(), .read_from_cache_attempted = true, .read_buffer_id = current_buffer_id, .profile_counters = std::make_shared( @@ -108,7 +111,7 @@ void CachedOnDiskReadBufferFromFile::appendFilesystemCacheLog( break; } - cache_log->add(elem); + cache_log->add(std::move(elem)); } void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size) @@ -144,11 +147,19 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size) } CachedOnDiskReadBufferFromFile::ImplementationBufferPtr -CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segment) const +CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segment) { ProfileEventTimeIncrement watch(ProfileEvents::CachedReadBufferCreateBufferMicroseconds); auto path = file_segment.getPathInLocalCache(); + if (cache_file_reader) + { + chassert(cache_file_reader->getFileName() == path); + if (cache_file_reader->getFileName() == path) + return cache_file_reader; + + cache_file_reader.reset(); + } ReadSettings local_read_settings{settings}; /// Do not allow to use asynchronous version of LocalFSReadMethod. @@ -157,12 +168,12 @@ CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segm if (use_external_buffer) local_read_settings.local_fs_buffer_size = 0; - auto buf = createReadBufferFromFileBase(path, local_read_settings, std::nullopt, std::nullopt, file_segment.getFlagsForLocalRead()); + cache_file_reader = createReadBufferFromFileBase(path, local_read_settings, std::nullopt, std::nullopt, file_segment.getFlagsForLocalRead()); - if (getFileSizeFromReadBuffer(*buf) == 0) + if (getFileSizeFromReadBuffer(*cache_file_reader) == 0) throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read from an empty cache file: {}", path); - return buf; + return cache_file_reader; } CachedOnDiskReadBufferFromFile::ImplementationBufferPtr @@ -204,7 +215,7 @@ CachedOnDiskReadBufferFromFile::getRemoteReadBuffer(FileSegment & file_segment, } else { - chassert(remote_fs_segment_reader->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset(false)); + chassert(remote_fs_segment_reader->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset()); } return remote_fs_segment_reader; @@ -237,12 +248,12 @@ bool CachedOnDiskReadBufferFromFile::canStartFromCache(size_t current_offset, co /// segment{k} state: DOWNLOADING /// cache: [______|___________ /// ^ - /// first_non_downloaded_offset (in progress) + /// current_write_offset (in progress) /// requested_range: [__________] /// ^ /// current_offset - size_t first_non_downloaded_offset = file_segment.getFirstNonDownloadedOffset(true); - return first_non_downloaded_offset > current_offset; + size_t current_write_offset = file_segment.getCurrentWriteOffset(); + return current_write_offset > current_offset; } CachedOnDiskReadBufferFromFile::ImplementationBufferPtr @@ -282,7 +293,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s /// segment{k} state: DOWNLOADING /// cache: [______|___________ /// ^ - /// first_non_downloaded_offset (in progress) + /// current_write_offset (in progress) /// requested_range: [__________] /// ^ /// file_offset_of_buffer_end @@ -307,7 +318,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s /// segment{k} state: PARTIALLY_DOWNLOADED /// cache: [______|___________ /// ^ - /// first_non_downloaded_offset (in progress) + /// current_write_offset (in progress) /// requested_range: [__________] /// ^ /// file_offset_of_buffer_end @@ -324,7 +335,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s /// segment{k} /// cache: [______|___________ /// ^ - /// first_non_downloaded_offset + /// current_write_offset /// requested_range: [__________] /// ^ /// file_offset_of_buffer_end @@ -334,7 +345,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s return getCacheReadBuffer(file_segment); } - auto current_write_offset = file_segment.getCurrentWriteOffset(false); + auto current_write_offset = file_segment.getCurrentWriteOffset(); if (current_write_offset < file_offset_of_buffer_end) { /// segment{1} @@ -456,7 +467,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme if (bytes_to_predownload) { - const size_t current_write_offset = file_segment.getCurrentWriteOffset(false); + const size_t current_write_offset = file_segment.getCurrentWriteOffset(); read_buffer_for_file_segment->seek(current_write_offset, SEEK_SET); } else @@ -466,7 +477,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme chassert(read_buffer_for_file_segment->getFileOffsetOfBufferEnd() == file_offset_of_buffer_end); } - const auto current_write_offset = file_segment.getCurrentWriteOffset(false); + const auto current_write_offset = file_segment.getCurrentWriteOffset(); if (current_write_offset != static_cast(read_buffer_for_file_segment->getPosition())) { throw Exception( @@ -495,9 +506,10 @@ bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext() auto completed_range = current_file_segment->range(); if (cache_log) - appendFilesystemCacheLog(completed_range, read_type); + appendFilesystemCacheLog(*current_file_segment, read_type); chassert(file_offset_of_buffer_end > completed_range.right); + cache_file_reader.reset(); file_segments->popFront(); if (file_segments->empty()) @@ -518,7 +530,7 @@ CachedOnDiskReadBufferFromFile::~CachedOnDiskReadBufferFromFile() { if (cache_log && file_segments && !file_segments->empty()) { - appendFilesystemCacheLog(file_segments->front().range(), read_type); + appendFilesystemCacheLog(file_segments->front(), read_type); } } @@ -544,9 +556,9 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment) /// download from offset a'' < a', but return buffer from offset a'. LOG_TEST(log, "Bytes to predownload: {}, caller_id: {}", bytes_to_predownload, FileSegment::getCallerId()); - /// chassert(implementation_buffer->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset(false)); - chassert(static_cast(implementation_buffer->getPosition()) == file_segment.getCurrentWriteOffset(false)); - size_t current_offset = file_segment.getCurrentWriteOffset(false); + /// chassert(implementation_buffer->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset()); + size_t current_offset = file_segment.getCurrentWriteOffset(); + chassert(static_cast(implementation_buffer->getPosition()) == current_offset); const auto & current_range = file_segment.range(); while (true) @@ -572,7 +584,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment) "current download offset: {}, expected: {}, eof: {}", bytes_to_predownload, current_range.toString(), - file_segment.getCurrentWriteOffset(false), + file_segment.getCurrentWriteOffset(), file_offset_of_buffer_end, implementation_buffer->eof()); @@ -582,7 +594,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment) { nextimpl_working_buffer_offset = implementation_buffer->offset(); - auto current_write_offset = file_segment.getCurrentWriteOffset(false); + auto current_write_offset = file_segment.getCurrentWriteOffset(); if (current_write_offset != static_cast(implementation_buffer->getPosition()) || current_write_offset != file_offset_of_buffer_end) { @@ -611,7 +623,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment) { LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, current_impl_buffer_size); - chassert(file_segment.getCurrentWriteOffset(false) == static_cast(implementation_buffer->getPosition())); + chassert(file_segment.getCurrentWriteOffset() == static_cast(implementation_buffer->getPosition())); continue_predownload = writeCache(implementation_buffer->buffer().begin(), current_predownload_size, current_offset, file_segment); if (continue_predownload) @@ -692,38 +704,19 @@ bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded() { /// If current read_type is ReadType::CACHED and file segment is not DOWNLOADED, /// it means the following case, e.g. we started from CacheReadBuffer and continue with RemoteFSReadBuffer. - /// segment{k} - /// cache: [______|___________ + /// segment{k} + /// cache: [______|___________] /// ^ /// current_write_offset - /// requested_range: [__________] + /// requested_range: [__________ /// ^ /// file_offset_of_buffer_end - auto current_write_offset = file_segment.getCurrentWriteOffset(true); - bool cached_part_is_finished = current_write_offset == file_offset_of_buffer_end; - - LOG_TEST(log, "Current write offset: {}, file offset of buffer end: {}", current_write_offset, file_offset_of_buffer_end); - - if (cached_part_is_finished) + if (file_offset_of_buffer_end >= file_segment.getCurrentWriteOffset()) { - /// TODO: makes sense to reuse local file reader if we return here with CACHED read type again? implementation_buffer = getImplementationBuffer(file_segment); - return true; } - else if (current_write_offset < file_offset_of_buffer_end) - { - const auto path = file_segment.getPathInLocalCache(); - size_t file_size = 0; - if (fs::exists(path)) - file_size = fs::file_size(path); - - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Invariant failed. Expected {} >= {} (size on fs: {}, {})", - current_write_offset, file_offset_of_buffer_end, file_size, getInfoForLog()); - } } else if (read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE) { @@ -947,23 +940,6 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() { ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheBytes, size); ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheMicroseconds, elapsed); - - if (result) - { - const size_t new_file_offset = file_offset_of_buffer_end + size; - const size_t file_segment_write_offset = file_segment.getCurrentWriteOffset(true); - if (new_file_offset > file_segment.range().right + 1 || new_file_offset > file_segment_write_offset) - { - auto file_segment_path = file_segment.getPathInLocalCache(); - throw Exception( - ErrorCodes::LOGICAL_ERROR, "Read unexpected size. " - "File size: {}, file segment path: {}, impl size: {}, impl path: {}" - "file segment info: {}", - fs::file_size(file_segment_path), file_segment_path, - implementation_buffer->getFileSize(), implementation_buffer->getFileName(), - file_segment.getInfoForLog()); - } - } } else { @@ -982,15 +958,15 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() bool success = file_segment.reserve(size); if (success) { - chassert(file_segment.getCurrentWriteOffset(false) == static_cast(implementation_buffer->getPosition())); + chassert(file_segment.getCurrentWriteOffset() == static_cast(implementation_buffer->getPosition())); success = writeCache(implementation_buffer->position(), size, file_offset_of_buffer_end, file_segment); if (success) { - chassert(file_segment.getCurrentWriteOffset(false) <= file_segment.range().right + 1); + chassert(file_segment.getCurrentWriteOffset() <= file_segment.range().right + 1); chassert( /* last_file_segment */file_segments->size() == 1 - || file_segment.getCurrentWriteOffset(false) == implementation_buffer->getFileOffsetOfBufferEnd()); + || file_segment.getCurrentWriteOffset() == implementation_buffer->getFileOffsetOfBufferEnd()); LOG_TEST(log, "Successfully written {} bytes", size); download_current_segment_succeeded = true; @@ -1032,7 +1008,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() file_offset_of_buffer_end += size; if (download_current_segment && download_current_segment_succeeded) - chassert(file_segment.getCurrentWriteOffset(false) >= file_offset_of_buffer_end); + chassert(file_segment.getCurrentWriteOffset() >= file_offset_of_buffer_end); chassert(file_offset_of_buffer_end <= read_until_position); } @@ -1081,7 +1057,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() current_read_range.toString(), file_offset_of_buffer_end, FileSegment::stateToString(file_segment.state()), - file_segment.getCurrentWriteOffset(false), + file_segment.getCurrentWriteOffset(), toString(read_type), read_until_position, first_offset, @@ -1176,6 +1152,7 @@ off_t CachedOnDiskReadBufferFromFile::seek(off_t offset, int whence) file_segments.reset(); implementation_buffer.reset(); initialized = false; + cache_file_reader.reset(); LOG_TEST(log, "Reset state for seek to position {}", new_pos); @@ -1211,6 +1188,7 @@ void CachedOnDiskReadBufferFromFile::setReadUntilPosition(size_t position) file_segments.reset(); implementation_buffer.reset(); initialized = false; + cache_file_reader.reset(); read_until_position = position; @@ -1227,13 +1205,6 @@ off_t CachedOnDiskReadBufferFromFile::getPosition() return file_offset_of_buffer_end - available(); } -void CachedOnDiskReadBufferFromFile::assertCorrectness() const -{ - if (!CachedObjectStorage::canUseReadThroughCache(settings) - && !settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache usage is not allowed (query_id: {})", query_id); -} - String CachedOnDiskReadBufferFromFile::getInfoForLog() { String current_file_segment_info; diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h index b4e7701de75..0b9b01b8a94 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h @@ -64,7 +64,6 @@ private: using ImplementationBufferPtr = std::shared_ptr; void initialize(size_t offset, size_t size); - void assertCorrectness() const; /** * Return a list of file segments ordered in ascending order. This list represents @@ -76,7 +75,7 @@ private: ImplementationBufferPtr getReadBufferForFileSegment(FileSegment & file_segment); - ImplementationBufferPtr getCacheReadBuffer(const FileSegment & file_segment) const; + ImplementationBufferPtr getCacheReadBuffer(const FileSegment & file_segment); ImplementationBufferPtr getRemoteReadBuffer(FileSegment & file_segment, ReadType read_type_); @@ -90,7 +89,7 @@ private: bool completeFileSegmentAndGetNext(); - void appendFilesystemCacheLog(const FileSegment::Range & file_segment_range, ReadType read_type); + void appendFilesystemCacheLog(const FileSegment & file_segment, ReadType read_type); bool writeCache(char * data, size_t size, size_t offset, FileSegment & file_segment); @@ -110,7 +109,8 @@ private: ImplementationBufferCreator implementation_buffer_creator; /// Remote read buffer, which can only be owned by current buffer. - FileSegment::RemoteFileReaderPtr remote_file_reader; + ImplementationBufferPtr remote_file_reader; + ImplementationBufferPtr cache_file_reader; FileSegmentsHolderPtr file_segments; diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp index 33d4ed7b3d7..48e12c7b9b9 100644 --- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp @@ -70,7 +70,7 @@ bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset while (size > 0) { - size_t available_size = file_segment->range().size() - file_segment->getDownloadedSize(false); + size_t available_size = file_segment->range().size() - file_segment->getDownloadedSize(); if (available_size == 0) { completeFileSegment(); @@ -108,6 +108,10 @@ bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset data += size_to_write; } + size_t available_size = file_segment->range().size() - file_segment->getDownloadedSize(); + if (available_size == 0) + completeFileSegment(); + return true; } @@ -155,7 +159,7 @@ void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_s return; auto file_segment_range = file_segment.range(); - size_t file_segment_right_bound = file_segment_range.left + file_segment.getDownloadedSize(false) - 1; + size_t file_segment_right_bound = file_segment_range.left + file_segment.getDownloadedSize() - 1; FilesystemCacheLogElement elem { @@ -165,13 +169,14 @@ void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_s .file_segment_range = { file_segment_range.left, file_segment_right_bound }, .requested_range = {}, .cache_type = FilesystemCacheLogElement::CacheType::WRITE_THROUGH_CACHE, + .file_segment_key = {}, .file_segment_size = file_segment_range.size(), .read_from_cache_attempted = false, .read_buffer_id = {}, .profile_counters = nullptr, }; - cache_log->add(elem); + cache_log->add(std::move(elem)); } void FileSegmentRangeWriter::completeFileSegment() @@ -195,15 +200,16 @@ CachedOnDiskWriteBufferFromFile::CachedOnDiskWriteBufferFromFile( const String & source_path_, const FileCache::Key & key_, const String & query_id_, - const WriteSettings & settings_) + const WriteSettings & settings_, + std::shared_ptr cache_log_) : WriteBufferFromFileDecorator(std::move(impl_)) , log(&Poco::Logger::get("CachedOnDiskWriteBufferFromFile")) , cache(cache_) , source_path(source_path_) , key(key_) , query_id(query_id_) - , enable_cache_log(!query_id_.empty() && settings_.enable_filesystem_cache_log) , throw_on_error_from_cache(settings_.throw_on_error_from_cache) + , cache_log(!query_id_.empty() && settings_.enable_filesystem_cache_log ? cache_log_ : nullptr) { } @@ -240,10 +246,6 @@ void CachedOnDiskWriteBufferFromFile::cacheData(char * data, size_t size, bool t if (!cache_writer) { - std::shared_ptr cache_log; - if (enable_cache_log) - cache_log = Context::getGlobalContextInstance()->getFilesystemCacheLog(); - cache_writer = std::make_unique(cache.get(), key, cache_log, query_id, source_path); } diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h index 8d39b6eed42..6e2ff37a5c7 100644 --- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h @@ -73,7 +73,8 @@ public: const String & source_path_, const FileCache::Key & key_, const String & query_id_, - const WriteSettings & settings_); + const WriteSettings & settings_, + std::shared_ptr cache_log_); void nextImpl() override; @@ -91,12 +92,11 @@ private: size_t current_download_offset = 0; const String query_id; - bool enable_cache_log; - bool throw_on_error_from_cache; bool cache_in_error_state_or_disabled = false; std::unique_ptr cache_writer; + std::shared_ptr cache_log; }; } diff --git a/src/Disks/IO/IOUringReader.cpp b/src/Disks/IO/IOUringReader.cpp index 7b68e0ee2de..f007cf00d50 100644 --- a/src/Disks/IO/IOUringReader.cpp +++ b/src/Disks/IO/IOUringReader.cpp @@ -62,8 +62,16 @@ IOUringReader::IOUringReader(uint32_t entries_) struct io_uring_params params = { + .sq_entries = 0, // filled by the kernel, initializing to silence warning .cq_entries = 0, // filled by the kernel, initializing to silence warning .flags = 0, + .sq_thread_cpu = 0, // Unused (IORING_SETUP_SQ_AFF isn't set). Silences warning + .sq_thread_idle = 0, // Unused (IORING_SETUP_SQPOL isn't set). Silences warning + .features = 0, // filled by the kernel, initializing to silence warning + .wq_fd = 0, // Unused (IORING_SETUP_ATTACH_WQ isn't set). Silences warning. + .resv = {0, 0, 0}, // "The resv array must be initialized to zero." + .sq_off = {}, // filled by the kernel, initializing to silence warning + .cq_off = {}, // filled by the kernel, initializing to silence warning }; int ret = io_uring_queue_init_params(entries_, &ring, ¶ms); diff --git a/src/Disks/IO/IOUringReader.h b/src/Disks/IO/IOUringReader.h index 9b80ac6e5e0..b038b3acf7d 100644 --- a/src/Disks/IO/IOUringReader.h +++ b/src/Disks/IO/IOUringReader.h @@ -15,6 +15,10 @@ namespace Poco { class Logger; } namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} class Exception; @@ -76,6 +80,7 @@ public: inline bool isSupported() { return is_supported; } std::future submit(Request request) override; + Result execute(Request /* request */) override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `execute` not implemented for IOUringReader"); } void wait() override {} diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp index 129bb97be09..8de977ef876 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp @@ -102,6 +102,19 @@ bool ReadBufferFromAzureBlobStorage::nextImpl() size_t bytes_read = 0; size_t sleep_time_with_backoff_milliseconds = 100; + + auto handle_exception = [&, this](const auto & e, size_t i) + { + LOG_INFO(log, "Exception caught during Azure Read for file {} at attempt {}/{}: {}", path, i + 1, max_single_read_retries, e.Message); + if (i + 1 == max_single_read_retries) + throw; + + sleepForMilliseconds(sleep_time_with_backoff_milliseconds); + sleep_time_with_backoff_milliseconds *= 2; + initialized = false; + initialize(); + }; + for (size_t i = 0; i < max_single_read_retries; ++i) { try @@ -111,16 +124,9 @@ bool ReadBufferFromAzureBlobStorage::nextImpl() read_settings.remote_throttler->add(bytes_read, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds); break; } - catch (const Azure::Storage::StorageException & e) + catch (const Azure::Core::RequestFailedException & e) { - LOG_INFO(log, "Exception caught during Azure Read for file {} at attempt {}: {}", path, i, e.Message); - if (i + 1 == max_single_read_retries) - throw; - - sleepForMilliseconds(sleep_time_with_backoff_milliseconds); - sleep_time_with_backoff_milliseconds *= 2; - initialized = false; - initialize(); + handle_exception(e, i); } } @@ -211,6 +217,17 @@ void ReadBufferFromAzureBlobStorage::initialize() blob_client = std::make_unique(blob_container_client->GetBlobClient(path)); size_t sleep_time_with_backoff_milliseconds = 100; + + auto handle_exception = [&, this](const auto & e, size_t i) + { + LOG_INFO(log, "Exception caught during Azure Download for file {} at offset {} at attempt {}/{}: {}", path, offset, i + 1, max_single_download_retries, e.Message); + if (i + 1 == max_single_download_retries) + throw; + + sleepForMilliseconds(sleep_time_with_backoff_milliseconds); + sleep_time_with_backoff_milliseconds *= 2; + }; + for (size_t i = 0; i < max_single_download_retries; ++i) { try @@ -221,12 +238,7 @@ void ReadBufferFromAzureBlobStorage::initialize() } catch (const Azure::Core::RequestFailedException & e) { - LOG_INFO(log, "Exception caught during Azure Download for file {} at offset {} at attempt {} : {}", path, offset, i + 1, e.Message); - if (i + 1 == max_single_download_retries) - throw; - - sleepForMilliseconds(sleep_time_with_backoff_milliseconds); - sleep_time_with_backoff_milliseconds *= 2; + handle_exception(e,i); } } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 800cc0883e6..421a79d71cc 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -2,7 +2,6 @@ #include -#include #include #include #include @@ -75,7 +74,7 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c size_t current_read_until_position = read_until_position ? read_until_position : object.bytes_size; auto current_read_buffer_creator = [=, this]() { return read_buffer_creator(object_path, current_read_until_position); }; -#ifndef CLICKHOUSE_PROGRAM_STANDALONE_BUILD +#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD if (with_cache) { auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); @@ -109,29 +108,12 @@ void ReadBufferFromRemoteFSGather::appendUncachedReadInfo() .source_file_path = current_object.remote_path, .file_segment_range = { 0, current_object.bytes_size }, .cache_type = FilesystemCacheLogElement::CacheType::READ_FROM_FS_BYPASSING_CACHE, + .file_segment_key = {}, + .file_segment_offset = {}, .file_segment_size = current_object.bytes_size, .read_from_cache_attempted = false, }; - cache_log->add(elem); -} - -IAsynchronousReader::Result ReadBufferFromRemoteFSGather::readInto(char * data, size_t size, size_t offset, size_t ignore) -{ - /** - * Set `data` to current working and internal buffers. - * Internal buffer with size `size`. Working buffer with size 0. - */ - set(data, size); - - file_offset_of_buffer_end = offset; - bytes_to_ignore = ignore; - - const auto result = nextImpl(); - - if (result) - return { working_buffer.size(), BufferBase::offset(), nullptr }; - - return {0, 0, nullptr}; + cache_log->add(std::move(elem)); } void ReadBufferFromRemoteFSGather::initialize() @@ -203,39 +185,14 @@ bool ReadBufferFromRemoteFSGather::readImpl() { SwapHelper swap(*this, *current_buf); - bool result = false; - - /** - * Lazy seek is performed here. - * In asynchronous buffer when seeking to offset in range [pos, pos + min_bytes_for_seek] - * we save how many bytes need to be ignored (new_offset - position() bytes). - */ - if (bytes_to_ignore) - { - current_buf->ignore(bytes_to_ignore); - result = current_buf->hasPendingData(); - file_offset_of_buffer_end += bytes_to_ignore; - bytes_to_ignore = 0; - } - - if (!result) - result = current_buf->next(); - - if (blobs_to_read.size() == 1) - { - file_offset_of_buffer_end = current_buf->getFileOffsetOfBufferEnd(); - } - else - { - /// For log family engines there are multiple s3 files for the same clickhouse file - file_offset_of_buffer_end += current_buf->available(); - } - - /// Required for non-async reads. + bool result = current_buf->next(); if (result) { - assert(current_buf->available()); + file_offset_of_buffer_end += current_buf->available(); nextimpl_working_buffer_offset = current_buf->offset(); + + chassert(current_buf->available()); + chassert(blobs_to_read.size() != 1 || file_offset_of_buffer_end == current_buf->getFileOffsetOfBufferEnd()); } return result; @@ -255,7 +212,6 @@ void ReadBufferFromRemoteFSGather::reset() current_object = {}; current_buf_idx = {}; current_buf.reset(); - bytes_to_ignore = 0; } off_t ReadBufferFromRemoteFSGather::seek(off_t offset, int whence) diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 6488d532829..61381d63d67 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -40,15 +40,13 @@ public: void setReadUntilEnd() override { return setReadUntilPosition(getFileSize()); } - IAsynchronousReader::Result readInto(char * data, size_t size, size_t offset, size_t ignore) override; - size_t getFileSize() override { return getTotalSize(blobs_to_read); } size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; } off_t seek(off_t offset, int whence) override; - off_t getPosition() override { return file_offset_of_buffer_end - available() + bytes_to_ignore; } + off_t getPosition() override { return file_offset_of_buffer_end - available(); } bool seekIsCheap() override { return !current_buf; } @@ -77,7 +75,6 @@ private: size_t read_until_position = 0; size_t file_offset_of_buffer_end = 0; - size_t bytes_to_ignore = 0; StoredObject current_object; size_t current_buf_idx = 0; diff --git a/src/Disks/IO/ReadBufferFromWebServer.cpp b/src/Disks/IO/ReadBufferFromWebServer.cpp index 1f4818c8cb9..46d8c41ff78 100644 --- a/src/Disks/IO/ReadBufferFromWebServer.cpp +++ b/src/Disks/IO/ReadBufferFromWebServer.cpp @@ -55,7 +55,7 @@ std::unique_ptr ReadBufferFromWebServer::initialize() const auto & settings = context->getSettingsRef(); const auto & config = context->getConfigRef(); - Poco::Timespan http_keep_alive_timeout{config.getUInt("keep_alive_timeout", 20), 0}; + Poco::Timespan http_keep_alive_timeout{config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}; auto res = std::make_unique( uri, diff --git a/src/Disks/IO/ThreadPoolReader.h b/src/Disks/IO/ThreadPoolReader.h index 4c55be29bf9..42bc9bf8bb4 100644 --- a/src/Disks/IO/ThreadPoolReader.h +++ b/src/Disks/IO/ThreadPoolReader.h @@ -8,6 +8,10 @@ namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} /** Perform reads from separate thread pool of specified size. * @@ -36,6 +40,8 @@ public: std::future submit(Request request) override; + Result execute(Request /* request */) override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `execute` not implemented for ThreadpoolReader"); } + void wait() override; /// pool automatically waits for all tasks in destructor. diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index 988a445cfd0..0ec5e0fd6c1 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -56,14 +56,10 @@ namespace }; } -IAsynchronousReader::Result RemoteFSFileDescriptor::readInto(char * data, size_t size, size_t offset, size_t ignore) -{ - return reader.readInto(data, size, offset, ignore); -} - - ThreadPoolRemoteFSReader::ThreadPoolRemoteFSReader(size_t pool_size, size_t queue_size_) - : pool(std::make_unique(CurrentMetrics::ThreadPoolRemoteFSReaderThreads, CurrentMetrics::ThreadPoolRemoteFSReaderThreadsActive, pool_size, pool_size, queue_size_)) + : pool(std::make_unique(CurrentMetrics::ThreadPoolRemoteFSReaderThreads, + CurrentMetrics::ThreadPoolRemoteFSReaderThreadsActive, + pool_size, pool_size, queue_size_)) { } @@ -71,23 +67,46 @@ ThreadPoolRemoteFSReader::ThreadPoolRemoteFSReader(size_t pool_size, size_t queu std::future ThreadPoolRemoteFSReader::submit(Request request) { ProfileEventTimeIncrement elapsed(ProfileEvents::ThreadpoolReaderSubmit); - return scheduleFromThreadPool([request]() -> Result + return scheduleFromThreadPool([request, this]() -> Result { return execute(request); }, + *pool, + "VFSRead", + request.priority); +} + +IAsynchronousReader::Result ThreadPoolRemoteFSReader::execute(Request request) +{ + CurrentMetrics::Increment metric_increment{CurrentMetrics::RemoteRead}; + + auto * fd = assert_cast(request.descriptor.get()); + auto & reader = fd->getReader(); + + auto read_counters = fd->getReadCounters(); + std::optional increment = read_counters ? std::optional(read_counters) : std::nullopt; + + auto watch = std::make_unique(CLOCK_REALTIME); + + reader.set(request.buf, request.size); + reader.seek(request.offset, SEEK_SET); + if (request.ignore) + reader.ignore(request.ignore); + + bool result = reader.available(); + if (!result) + result = reader.next(); + + watch->stop(); + ProfileEvents::increment(ProfileEvents::ThreadpoolReaderTaskMicroseconds, watch->elapsedMicroseconds()); + + IAsynchronousReader::Result read_result; + if (result) { - CurrentMetrics::Increment metric_increment{CurrentMetrics::RemoteRead}; - auto * remote_fs_fd = assert_cast(request.descriptor.get()); + read_result.size = reader.buffer().size(); + read_result.offset = reader.offset(); + ProfileEvents::increment(ProfileEvents::ThreadpoolReaderReadBytes, read_result.size); + } - auto async_read_counters = remote_fs_fd->getReadCounters(); - std::optional increment = async_read_counters ? std::optional(async_read_counters) : std::nullopt; - - auto watch = std::make_unique(CLOCK_REALTIME); - Result result = remote_fs_fd->readInto(request.buf, request.size, request.offset, request.ignore); - watch->stop(); - - ProfileEvents::increment(ProfileEvents::ThreadpoolReaderTaskMicroseconds, watch->elapsedMicroseconds()); - ProfileEvents::increment(ProfileEvents::ThreadpoolReaderReadBytes, result.size); - - return Result{ .size = result.size, .offset = result.offset, .execution_watch = std::move(watch) }; - }, *pool, "VFSRead", request.priority); + read_result.execution_watch = std::move(watch); + return read_result; } void ThreadPoolRemoteFSReader::wait() diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h index 506d77a64ef..192a12370e3 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.h +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include @@ -16,6 +16,7 @@ public: ThreadPoolRemoteFSReader(size_t pool_size, size_t queue_size_); std::future submit(Request request) override; + IAsynchronousReader::Result execute(Request request) override; void wait() override; @@ -27,17 +28,18 @@ class RemoteFSFileDescriptor : public IAsynchronousReader::IFileDescriptor { public: explicit RemoteFSFileDescriptor( - ReadBuffer & reader_, + SeekableReadBuffer & reader_, std::shared_ptr async_read_counters_) : reader(reader_) , async_read_counters(async_read_counters_) {} - IAsynchronousReader::Result readInto(char * data, size_t size, size_t offset, size_t ignore = 0); + SeekableReadBuffer & getReader() { return reader; } std::shared_ptr getReadCounters() const { return async_read_counters; } private: - ReadBuffer & reader; + /// Reader is used for reading only by RemoteFSFileDescriptor. + SeekableReadBuffer & reader; std::shared_ptr async_read_counters; }; diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index b5d296bd865..60bc04f5f95 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -62,10 +62,6 @@ void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, func(); break; } - catch (const Azure::Core::Http::TransportException & e) - { - handle_exception(e, i); - } catch (const Azure::Core::RequestFailedException & e) { handle_exception(e, i); diff --git a/src/Disks/IO/WriteBufferFromTemporaryFile.cpp b/src/Disks/IO/WriteBufferFromTemporaryFile.cpp index 5bfbb2fa440..de494c03789 100644 --- a/src/Disks/IO/WriteBufferFromTemporaryFile.cpp +++ b/src/Disks/IO/WriteBufferFromTemporaryFile.cpp @@ -14,7 +14,7 @@ namespace ErrorCodes } WriteBufferFromTemporaryFile::WriteBufferFromTemporaryFile(TemporaryFileOnDiskHolder && tmp_file_) - : WriteBufferFromFile(tmp_file_->getPath(), DBMS_DEFAULT_BUFFER_SIZE, O_RDWR | O_TRUNC | O_CREAT, /* throttler= */ {}, 0600) + : WriteBufferFromFile(tmp_file_->getAbsolutePath(), DBMS_DEFAULT_BUFFER_SIZE, O_RDWR | O_TRUNC | O_CREAT, /* throttler= */ {}, 0600) , tmp_file(std::move(tmp_file_)) { } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index dbb41851053..f76fbd45736 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -69,7 +69,7 @@ private: static_cast(blob.BlobSize), Poco::Timestamp::fromEpochTime( std::chrono::duration_cast( - blob.Details.LastModified.time_since_epoch()).count()), + static_cast(blob.Details.LastModified).time_since_epoch()).count()), {}}); } @@ -162,7 +162,7 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith static_cast(blob.BlobSize), Poco::Timestamp::fromEpochTime( std::chrono::duration_cast( - blob.Details.LastModified.time_since_epoch()).count()), + static_cast(blob.Details.LastModified).time_since_epoch()).count()), {}}); } @@ -350,7 +350,7 @@ ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) c for (const auto & [key, value] : properties.Metadata) (*result.attributes)[key] = value; } - result.last_modified.emplace(properties.LastModified.time_since_epoch().count()); + result.last_modified.emplace(static_cast(properties.LastModified).time_since_epoch().count()); return result; } diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp index 3e7c4d12c42..0da572a06ab 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp @@ -16,11 +16,6 @@ namespace fs = std::filesystem; namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - CachedObjectStorage::CachedObjectStorage( ObjectStoragePtr object_storage_, FileCachePtr cache_, @@ -79,8 +74,6 @@ std::unique_ptr CachedObjectStorage::readObjects( /// NO std::optional read_hint, std::optional file_size) const { - if (objects.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Received empty list of objects to read"); return object_storage->readObjects(objects, patchSettings(read_settings), read_hint, file_size); } @@ -120,7 +113,8 @@ std::unique_ptr CachedObjectStorage::writeObject( /// N implementation_buffer->getFileName(), key, CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() ? std::string(CurrentThread::getQueryId()) : "", - modified_write_settings); + modified_write_settings, + Context::getGlobalContextInstance()->getFilesystemCacheLog()); } return implementation_buffer; diff --git a/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp b/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp index 2b40fa9c21b..2f80b4c9efd 100644 --- a/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp +++ b/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -40,10 +41,24 @@ void registerDiskCache(DiskFactory & factory, bool /* global_skip_access_check * FileCacheSettings file_cache_settings; file_cache_settings.loadFromConfig(config, config_prefix); - if (file_cache_settings.base_path.empty()) - file_cache_settings.base_path = fs::path(context->getPath()) / "disks" / name / "cache/"; - else if (fs::path(file_cache_settings.base_path).is_relative()) - file_cache_settings.base_path = fs::path(context->getPath()) / "caches" / file_cache_settings.base_path; + auto config_fs_caches_dir = context->getFilesystemCachesPath(); + if (config_fs_caches_dir.empty()) + { + if (fs::path(file_cache_settings.base_path).is_relative()) + file_cache_settings.base_path = fs::path(context->getPath()) / "caches" / file_cache_settings.base_path; + } + else + { + if (fs::path(file_cache_settings.base_path).is_relative()) + file_cache_settings.base_path = fs::path(config_fs_caches_dir) / file_cache_settings.base_path; + + if (!pathStartsWith(file_cache_settings.base_path, config_fs_caches_dir)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Filesystem cache path {} must lie inside default filesystem cache path `{}`", + file_cache_settings.base_path, config_fs_caches_dir); + } + } auto cache = FileCacheFactory::instance().getOrCreate(name, file_cache_settings); auto disk = disk_it->second; diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 762151b3808..466a1d3d5dd 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -3,9 +3,11 @@ #include #include +#include #include #include #include +#include #include #include #include @@ -64,6 +66,8 @@ DiskObjectStorage::DiskObjectStorage( , metadata_storage(std::move(metadata_storage_)) , object_storage(std::move(object_storage_)) , send_metadata(config.getBool(config_prefix + ".send_metadata", false)) + , read_resource_name(config.getString(config_prefix + ".read_resource", "")) + , write_resource_name(config.getString(config_prefix + ".write_resource", "")) , metadata_helper(std::make_unique(this, ReadSettings{})) {} @@ -479,15 +483,48 @@ DiskObjectStoragePtr DiskObjectStorage::createDiskObjectStorage() config_prefix); } +template +static inline Settings updateResourceLink(const Settings & settings, const String & resource_name) +{ + if (resource_name.empty()) + return settings; + if (auto query_context = CurrentThread::getQueryContext()) + { + Settings result(settings); + result.resource_link = query_context->getWorkloadClassifier()->get(resource_name); + return result; + } + return settings; +} + +String DiskObjectStorage::getReadResourceName() const +{ + std::unique_lock lock(resource_mutex); + return read_resource_name; +} + +String DiskObjectStorage::getWriteResourceName() const +{ + std::unique_lock lock(resource_mutex); + return write_resource_name; +} + std::unique_ptr DiskObjectStorage::readFile( const String & path, const ReadSettings & settings, std::optional read_hint, std::optional file_size) const { + auto storage_objects = metadata_storage->getStorageObjects(path); + + const bool file_can_be_empty = !file_size.has_value() || *file_size == 0; + + if (storage_objects.empty() && file_can_be_empty) + return std::make_unique(); + return object_storage->readObjects( - metadata_storage->getStorageObjects(path), - object_storage->getAdjustedSettingsFromMetadataFile(settings, path), + storage_objects, + object_storage->getAdjustedSettingsFromMetadataFile(updateResourceLink(settings, getReadResourceName()), path), read_hint, file_size); } @@ -501,13 +538,11 @@ std::unique_ptr DiskObjectStorage::writeFile( LOG_TEST(log, "Write file: {}", path); auto transaction = createObjectStorageTransaction(); - auto result = transaction->writeFile( + return transaction->writeFile( path, buf_size, mode, - object_storage->getAdjustedSettingsFromMetadataFile(settings, path)); - - return result; + object_storage->getAdjustedSettingsFromMetadataFile(updateResourceLink(settings, getWriteResourceName()), path)); } Strings DiskObjectStorage::getBlobPath(const String & path) const @@ -537,6 +572,15 @@ void DiskObjectStorage::applyNewSettings( /// FIXME we cannot use config_prefix that was passed through arguments because the disk may be wrapped with cache and we need another name const auto config_prefix = "storage_configuration.disks." + name; object_storage->applyNewSettings(config, config_prefix, context_); + + { + std::unique_lock lock(resource_mutex); + if (String new_read_resource_name = config.getString(config_prefix + ".read_resource", ""); new_read_resource_name != read_resource_name) + read_resource_name = new_read_resource_name; + if (String new_write_resource_name = config.getString(config_prefix + ".write_resource", ""); new_write_resource_name != write_resource_name) + write_resource_name = new_write_resource_name; + } + IDisk::applyNewSettings(config, context_, config_prefix, disk_map); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 6b05d5f27e7..b7ade2e43cf 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -189,7 +189,7 @@ public: /// DiskObjectStorage(CachedObjectStorage(CachedObjectStorage(S3ObjectStorage))) String getStructure() const { return fmt::format("DiskObjectStorage-{}({})", getName(), object_storage->getName()); } -#ifndef CLICKHOUSE_PROGRAM_STANDALONE_BUILD +#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// Add a cache layer. /// Example: DiskObjectStorage(S3ObjectStorage) -> DiskObjectStorage(CachedObjectStorage(S3ObjectStorage)) /// There can be any number of cache layers: @@ -212,6 +212,9 @@ private: /// execution. DiskTransactionPtr createObjectStorageTransaction(); + String getReadResourceName() const; + String getWriteResourceName() const; + const String object_storage_root_path; Poco::Logger * log; @@ -226,6 +229,10 @@ private: const bool send_metadata; + mutable std::mutex resource_mutex; + String read_resource_name; + String write_resource_name; + std::unique_ptr metadata_helper; }; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index 0ae577602b1..fd01caacd25 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -5,8 +5,10 @@ #include #include #include +#include #include +#include namespace DB { @@ -156,14 +158,13 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperation { - RemoveBatchRequest remove_paths; - bool keep_all_batch_data; - NameSet file_names_remove_metadata_only; + const RemoveBatchRequest remove_paths; + const bool keep_all_batch_data; + const NameSet file_names_remove_metadata_only; + std::vector paths_removed_with_objects; std::vector objects_to_remove; - bool remove_from_cache = false; - RemoveManyObjectStorageOperation( IObjectStorage & object_storage_, IMetadataStorage & metadata_storage_, @@ -203,6 +204,7 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati if (unlink_outcome && !keep_all_batch_data && !file_names_remove_metadata_only.contains(fs::path(path).filename())) { objects_to_remove.emplace_back(ObjectsToRemove{std::move(objects), std::move(unlink_outcome)}); + paths_removed_with_objects.push_back(path); } } catch (const Exception & e) @@ -213,6 +215,12 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati || e.code() == ErrorCodes::CANNOT_READ_ALL_DATA || e.code() == ErrorCodes::CANNOT_OPEN_FILE) { + LOG_DEBUG( + &Poco::Logger::get("RemoveManyObjectStorageOperation"), + "Can't read metadata because of an exception. Just remove it from the filesystem. Path: {}, exception: {}", + metadata_storage.getPath() + path, + e.message()); + tx->unlinkFile(path); } else @@ -238,16 +246,31 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati /// TL;DR Don't pay any attention to 404 status code if (!remove_from_remote.empty()) object_storage.removeObjectsIfExist(remove_from_remote); + + if (!keep_all_batch_data) + { + LOG_DEBUG( + &Poco::Logger::get("RemoveManyObjectStorageOperation"), + "metadata and objects were removed for [{}], " + "only metadata were removed for [{}].", + boost::algorithm::join(paths_removed_with_objects, ", "), + boost::algorithm::join(file_names_remove_metadata_only, ", ")); + } } }; struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOperation { - std::string path; + /// path inside disk with metadata + const std::string path; + const bool keep_all_batch_data; + /// paths inside the 'this->path' + const NameSet file_names_remove_metadata_only; + + /// map from local_path to its remote objects with hardlinks counter + /// local_path is the path inside 'this->path' std::unordered_map objects_to_remove_by_path; - bool keep_all_batch_data; - NameSet file_names_remove_metadata_only; RemoveRecursiveObjectStorageOperation( IObjectStorage & object_storage_, @@ -274,11 +297,16 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp { try { + chassert(path_to_remove.starts_with(path)); + auto rel_path = String(fs::relative(fs::path(path_to_remove), fs::path(path))); + auto objects_paths = metadata_storage.getStorageObjects(path_to_remove); auto unlink_outcome = tx->unlinkMetadata(path_to_remove); - if (unlink_outcome) + + if (unlink_outcome && !file_names_remove_metadata_only.contains(rel_path)) { - objects_to_remove_by_path[path_to_remove] = ObjectsToRemove{std::move(objects_paths), std::move(unlink_outcome)}; + objects_to_remove_by_path[std::move(rel_path)] + = ObjectsToRemove{std::move(objects_paths), std::move(unlink_outcome)}; } } catch (const Exception & e) @@ -320,25 +348,38 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp void undo() override { - } void finalize() override { if (!keep_all_batch_data) { + std::vector total_removed_paths; + total_removed_paths.reserve(objects_to_remove_by_path.size()); + StoredObjects remove_from_remote; for (auto && [local_path, objects_to_remove] : objects_to_remove_by_path) { - if (!file_names_remove_metadata_only.contains(fs::path(local_path).filename())) + chassert(!file_names_remove_metadata_only.contains(local_path)); + if (objects_to_remove.unlink_outcome->num_hardlinks == 0) { - if (objects_to_remove.unlink_outcome->num_hardlinks == 0) - std::move(objects_to_remove.objects.begin(), objects_to_remove.objects.end(), std::back_inserter(remove_from_remote)); + std::move(objects_to_remove.objects.begin(), objects_to_remove.objects.end(), std::back_inserter(remove_from_remote)); + total_removed_paths.push_back(local_path); } } + /// Read comment inside RemoveObjectStorageOperation class /// TL;DR Don't pay any attention to 404 status code object_storage.removeObjectsIfExist(remove_from_remote); + + LOG_DEBUG( + &Poco::Logger::get("RemoveRecursiveObjectStorageOperation"), + "Recursively remove path {}: " + "metadata and objects were removed for [{}], " + "only metadata were removed for [{}].", + path, + boost::algorithm::join(total_removed_paths, ", "), + boost::algorithm::join(file_names_remove_metadata_only, ", ")); } } }; @@ -769,8 +810,11 @@ void DiskObjectStorageTransaction::createFile(const std::string & path) })); } -void DiskObjectStorageTransaction::copyFile(const std::string & from_file_path, const std::string & to_file_path) +void DiskObjectStorageTransaction::copyFile(const std::string & from_file_path, const std::string & to_file_path, const WriteSettings & settings) { + /// NOTE: For native copy we can ignore throttling, so no need to use WriteSettings + UNUSED(settings); + operations_to_execute.emplace_back( std::make_unique(object_storage, metadata_storage, from_file_path, to_file_path)); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h index a4cb0ed3739..8ce10dad212 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h @@ -86,7 +86,7 @@ public: void createFile(const String & path) override; - void copyFile(const std::string & from_file_path, const std::string & to_file_path) override; + void copyFile(const std::string & from_file_path, const std::string & to_file_path, const WriteSettings & settings) override; /// writeFile is a difficult function for transactions. /// Now it's almost noop because metadata added to transaction in finalize method diff --git a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp index e72e7028c4b..96ff0a91564 100644 --- a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp +++ b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp @@ -54,6 +54,7 @@ void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check) std::move(hdfs_storage), config, config_prefix); + disk->startup(context, skip_access_check); return disk; diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h index 264c481ee08..6b75e157dee 100644 --- a/src/Disks/ObjectStorages/IMetadataStorage.h +++ b/src/Disks/ObjectStorages/IMetadataStorage.h @@ -22,7 +22,14 @@ namespace ErrorCodes } class IMetadataStorage; -struct UnlinkMetadataFileOperationOutcome; + +/// Return the result of operation to the caller. +/// It is used in `IDiskObjectStorageOperation::finalize` after metadata transaction executed to make decision on blob removal. +struct UnlinkMetadataFileOperationOutcome +{ + UInt32 num_hardlinks = std::numeric_limits::max(); +}; + using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; /// Tries to provide some "transactions" interface, which allow diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h index 4662ebc3423..ccb77f6ae7b 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h @@ -244,15 +244,6 @@ private: std::unique_ptr write_operation; }; -/// Return the result of operation to the caller. -/// It is used in `IDiskObjectStorageOperation::finalize` after metadata transaction executed to make decision on blob removal. -struct UnlinkMetadataFileOperationOutcome -{ - UInt32 num_hardlinks = std::numeric_limits::max(); -}; - -using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; - struct UnlinkMetadataFileOperation final : public IMetadataOperation { const UnlinkMetadataFileOperationOutcomePtr outcome = std::make_shared(); diff --git a/src/Disks/ObjectStorages/S3/ProxyConfiguration.h b/src/Disks/ObjectStorages/S3/ProxyConfiguration.h deleted file mode 100644 index fd2761c2cba..00000000000 --- a/src/Disks/ObjectStorages/S3/ProxyConfiguration.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include - -namespace DB::S3 -{ -class ProxyConfiguration -{ -public: - virtual ~ProxyConfiguration() = default; - /// Returns proxy configuration on each HTTP request. - virtual ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) = 0; - virtual void errorReport(const ClientConfigurationPerRequest & config) = 0; -}; - -} - -#endif diff --git a/src/Disks/ObjectStorages/S3/ProxyListConfiguration.cpp b/src/Disks/ObjectStorages/S3/ProxyListConfiguration.cpp deleted file mode 100644 index 7c7bc7966ea..00000000000 --- a/src/Disks/ObjectStorages/S3/ProxyListConfiguration.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include "ProxyListConfiguration.h" - -#if USE_AWS_S3 - -#include -#include - -namespace DB::S3 -{ -ProxyListConfiguration::ProxyListConfiguration(std::vector proxies_) : proxies(std::move(proxies_)), access_counter(0) -{ -} - - -ClientConfigurationPerRequest ProxyListConfiguration::getConfiguration(const Aws::Http::HttpRequest &) -{ - /// Avoid atomic increment if number of proxies is 1. - size_t index = proxies.size() > 1 ? (access_counter++) % proxies.size() : 0; - - ClientConfigurationPerRequest cfg; - cfg.proxy_scheme = Aws::Http::SchemeMapper::FromString(proxies[index].getScheme().c_str()); - cfg.proxy_host = proxies[index].getHost(); - cfg.proxy_port = proxies[index].getPort(); - - LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use proxy: {}", proxies[index].toString()); - - return cfg; -} - -} - -#endif diff --git a/src/Disks/ObjectStorages/S3/ProxyListConfiguration.h b/src/Disks/ObjectStorages/S3/ProxyListConfiguration.h deleted file mode 100644 index 14fac8baff5..00000000000 --- a/src/Disks/ObjectStorages/S3/ProxyListConfiguration.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include // for std::atomic - -#include "ProxyConfiguration.h" - -namespace DB::S3 -{ -/** - * For each request to S3 it chooses a proxy from the specified list using round-robin strategy. - */ -class ProxyListConfiguration : public ProxyConfiguration -{ -public: - explicit ProxyListConfiguration(std::vector proxies_); - ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override; - void errorReport(const ClientConfigurationPerRequest &) override {} - -private: - /// List of configured proxies. - const std::vector proxies; - /// Access counter to get proxy using round-robin strategy. - std::atomic access_counter; -}; - -} - -#endif diff --git a/src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.h b/src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.h deleted file mode 100644 index d6d7456a6ac..00000000000 --- a/src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include "ProxyConfiguration.h" - -#include - -namespace DB::S3 -{ -/** - * Proxy configuration where proxy host is obtained each time from specified endpoint. - * For each request to S3 it makes GET request to specified endpoint URL and reads proxy host from a response body. - * Specified scheme and port added to obtained proxy host to form completed proxy URL. - */ -class ProxyResolverConfiguration : public ProxyConfiguration -{ -public: - ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_, unsigned cache_ttl_); - ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override; - void errorReport(const ClientConfigurationPerRequest & config) override; - -private: - /// Endpoint to obtain a proxy host. - const Poco::URI endpoint; - /// Scheme for obtained proxy. - const String proxy_scheme; - /// Port for obtained proxy. - const unsigned proxy_port; - - std::mutex cache_mutex; - bool cache_valid = false; - std::chrono::time_point cache_timestamp; - const std::chrono::seconds cache_ttl{0}; - ClientConfigurationPerRequest cached_config; -}; - -} - -#endif diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index e46ca3d0828..0d9670efebe 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -135,7 +135,7 @@ private: return result; } - throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", + throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", quoteString(request.GetBucket()), quoteString(request.GetPrefix()), backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); } @@ -320,7 +320,7 @@ void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exis throwIfUnexpectedError(outcome, if_exists); - LOG_TRACE(log, "Object with path {} was removed from S3", object.remote_path); + LOG_DEBUG(log, "Object with path {} was removed from S3", object.remote_path); } void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_exists) @@ -368,7 +368,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e throwIfUnexpectedError(outcome, if_exists); - LOG_TRACE(log, "Objects with paths [{}] were removed from S3", keys); + LOG_DEBUG(log, "Objects with paths [{}] were removed from S3", keys); } } } @@ -431,11 +431,11 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT /// Shortcut for S3 if (auto * dest_s3 = dynamic_cast(&object_storage_to); dest_s3 != nullptr) { - auto client_ptr = clients.get()->client; + auto clients_ = clients.get(); auto settings_ptr = s3_settings.get(); - auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); + auto size = S3::getObjectSize(*clients_->client, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); - copyS3File(client_ptr, bucket, object_from.remote_path, 0, size, dest_s3->bucket, object_to.remote_path, + copyS3File(clients_->client, clients_->client_with_long_timeout, bucket, object_from.remote_path, 0, size, dest_s3->bucket, object_to.remote_path, settings_ptr->request_settings, object_to_attributes, scheduler, /* for_disk_s3= */ true); } else @@ -447,11 +447,11 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT void S3ObjectStorage::copyObject( // NOLINT const StoredObject & object_from, const StoredObject & object_to, std::optional object_to_attributes) { - auto client_ptr = clients.get()->client; + auto clients_ = clients.get(); auto settings_ptr = s3_settings.get(); - auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); + auto size = S3::getObjectSize(*clients_->client, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); - copyS3File(client_ptr, bucket, object_from.remote_path, 0, size, bucket, object_to.remote_path, + copyS3File(clients_->client, clients_->client_with_long_timeout, bucket, object_from.remote_path, 0, size, bucket, object_to.remote_path, settings_ptr->request_settings, object_to_attributes, scheduler, /* for_disk_s3= */ true); } diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 0bd35c07a4b..06e79193c3e 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -17,9 +18,6 @@ #include #include -#include -#include -#include #include #include #include @@ -27,11 +25,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { const Settings & settings = context->getSettingsRef(); @@ -44,76 +37,17 @@ std::unique_ptr getSettings(const Poco::Util::AbstractC config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000)); } -std::shared_ptr getProxyResolverConfiguration( - const String & prefix, const Poco::Util::AbstractConfiguration & proxy_resolver_config) -{ - auto endpoint = Poco::URI(proxy_resolver_config.getString(prefix + ".endpoint")); - auto proxy_scheme = proxy_resolver_config.getString(prefix + ".proxy_scheme"); - if (proxy_scheme != "http" && proxy_scheme != "https") - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only HTTP/HTTPS schemas allowed in proxy resolver config: {}", proxy_scheme); - auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port"); - auto cache_ttl = proxy_resolver_config.getUInt(prefix + ".proxy_cache_time", 10); - - LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}", - endpoint.toString(), proxy_scheme, proxy_port); - - return std::make_shared(endpoint, proxy_scheme, proxy_port, cache_ttl); -} - -std::shared_ptr getProxyListConfiguration( - const String & prefix, const Poco::Util::AbstractConfiguration & proxy_config) -{ - std::vector keys; - proxy_config.keys(prefix, keys); - - std::vector proxies; - for (const auto & key : keys) - if (startsWith(key, "uri")) - { - Poco::URI proxy_uri(proxy_config.getString(prefix + "." + key)); - - if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https") - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only HTTP/HTTPS schemas allowed in proxy uri: {}", proxy_uri.toString()); - if (proxy_uri.getHost().empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Empty host in proxy uri: {}", proxy_uri.toString()); - - proxies.push_back(proxy_uri); - - LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy: {}", proxy_uri.toString()); - } - - if (!proxies.empty()) - return std::make_shared(proxies); - - return nullptr; -} - -std::shared_ptr getProxyConfiguration(const String & prefix, const Poco::Util::AbstractConfiguration & config) -{ - if (!config.has(prefix + ".proxy")) - return nullptr; - - std::vector config_keys; - config.keys(prefix + ".proxy", config_keys); - - if (auto resolver_configs = std::count(config_keys.begin(), config_keys.end(), "resolver")) - { - if (resolver_configs > 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple proxy resolver configurations aren't allowed"); - - return getProxyResolverConfiguration(prefix + ".proxy.resolver", config); - } - - return getProxyListConfiguration(prefix + ".proxy", config); -} - - std::unique_ptr getClient( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, const S3ObjectStorageSettings & settings) { + String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); + S3::URI uri(endpoint); + if (!uri.key.ends_with('/')) + uri.key.push_back('/'); + S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( config.getString(config_prefix + ".region", ""), context->getRemoteHostFilter(), @@ -121,26 +55,26 @@ std::unique_ptr getClient( context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, /* for_disk_s3 = */ true, settings.request_settings.get_request_throttler, - settings.request_settings.put_request_throttler); - - String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); - S3::URI uri(endpoint); - if (uri.key.back() != '/') - throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key); + settings.request_settings.put_request_throttler, + uri.uri.getScheme()); client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 1000); client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 3000); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100); client_configuration.endpointOverride = uri.endpoint; - client_configuration.http_keep_alive_timeout_ms = config.getUInt(config_prefix + ".http_keep_alive_timeout_ms", 10000); + client_configuration.http_keep_alive_timeout_ms + = config.getUInt(config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000); client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000); client_configuration.wait_on_pool_size_limit = false; - auto proxy_config = getProxyConfiguration(config_prefix, config); + /* + * Override proxy configuration for backwards compatibility with old configuration format. + * */ + auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat(config_prefix, config); if (proxy_config) { client_configuration.per_request_configuration - = [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); }; + = [proxy_config]() { return proxy_config->resolve(); }; client_configuration.error_report = [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); }; } diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index d1264affaea..69bdfe01a36 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -104,12 +104,8 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check) { String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); S3::URI uri(endpoint); - - if (uri.key.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No key in S3 uri: {}", uri.uri.toString()); - - if (uri.key.back() != '/') - throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key); + if (!uri.key.ends_with('/')) + uri.key.push_back('/'); S3Capabilities s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); std::shared_ptr s3_storage; diff --git a/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp b/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp index bc6c17863ef..442a399fc78 100644 --- a/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp +++ b/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp @@ -54,6 +54,7 @@ void registerDiskWebServer(DiskFactory & factory, bool global_skip_access_check) object_storage, config, config_prefix); + disk->startup(context, skip_access_check); return disk; }; diff --git a/src/Disks/TemporaryFileOnDisk.cpp b/src/Disks/TemporaryFileOnDisk.cpp index 8e5c8bcebbd..06d7da4af58 100644 --- a/src/Disks/TemporaryFileOnDisk.cpp +++ b/src/Disks/TemporaryFileOnDisk.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -41,20 +40,12 @@ TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_, const String & p ProfileEvents::increment(ProfileEvents::ExternalProcessingFilesTotal); - /// Do not use default temporaty root path `/tmp/tmpXXXXXX`. - /// The `dummy_prefix` is used to know what to replace with the real prefix. - String dummy_prefix = "a/"; - relative_path = Poco::TemporaryFile::tempName(dummy_prefix); - dummy_prefix += "tmp"; - /// a/tmpXXXXX -> XXXXX - assert(relative_path.starts_with(dummy_prefix)); - relative_path.replace(0, dummy_prefix.length(), prefix); - - if (relative_path.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary file name is empty"); + /// A disk can be remote and shared between multiple replicas. + /// That's why we must not use Poco::TemporaryFile::tempName() here (Poco::TemporaryFile::tempName() can return the same names for different processes on different nodes). + relative_path = prefix + toString(UUIDHelpers::generateV4()); } -String TemporaryFileOnDisk::getPath() const +String TemporaryFileOnDisk::getAbsolutePath() const { return std::filesystem::path(disk->getPath()) / relative_path; } diff --git a/src/Disks/TemporaryFileOnDisk.h b/src/Disks/TemporaryFileOnDisk.h index bd82b9744ea..cccfc82cf9e 100644 --- a/src/Disks/TemporaryFileOnDisk.h +++ b/src/Disks/TemporaryFileOnDisk.h @@ -22,7 +22,10 @@ public: ~TemporaryFileOnDisk(); DiskPtr getDisk() const { return disk; } - String getPath() const; + /// Return absolute path (disk + relative_path) + String getAbsolutePath() const; + /// Return relative path (without disk) + const String & getRelativePath() const { return relative_path; } private: DiskPtr disk; diff --git a/src/Disks/getDiskConfigurationFromAST.cpp b/src/Disks/getDiskConfigurationFromAST.cpp index 4b1323b4db8..76a257d3b52 100644 --- a/src/Disks/getDiskConfigurationFromAST.cpp +++ b/src/Disks/getDiskConfigurationFromAST.cpp @@ -31,7 +31,7 @@ namespace ErrorCodes message.empty() ? "" : ": " + message); } -Poco::AutoPtr getDiskConfigurationFromASTImpl(const std::string & root_name, const ASTs & disk_args, ContextPtr context) +Poco::AutoPtr getDiskConfigurationFromASTImpl(const ASTs & disk_args, ContextPtr context) { if (disk_args.empty()) throwBadConfiguration("expected non-empty list of arguments"); @@ -39,8 +39,6 @@ Poco::AutoPtr getDiskConfigurationFromASTImpl(const std::st Poco::AutoPtr xml_document(new Poco::XML::Document()); Poco::AutoPtr root(xml_document->createElement("disk")); xml_document->appendChild(root); - Poco::AutoPtr disk_configuration(xml_document->createElement(root_name)); - root->appendChild(disk_configuration); for (const auto & arg : disk_args) { @@ -62,7 +60,7 @@ Poco::AutoPtr getDiskConfigurationFromASTImpl(const std::st const std::string & key = key_identifier->name(); Poco::AutoPtr key_element(xml_document->createElement(key)); - disk_configuration->appendChild(key_element); + root->appendChild(key_element); if (!function_args[1]->as() && !function_args[1]->as()) throwBadConfiguration("expected values to be literals or identifiers"); @@ -75,9 +73,9 @@ Poco::AutoPtr getDiskConfigurationFromASTImpl(const std::st return xml_document; } -DiskConfigurationPtr getDiskConfigurationFromAST(const std::string & root_name, const ASTs & disk_args, ContextPtr context) +DiskConfigurationPtr getDiskConfigurationFromAST(const ASTs & disk_args, ContextPtr context) { - auto xml_document = getDiskConfigurationFromASTImpl(root_name, disk_args, context); + auto xml_document = getDiskConfigurationFromASTImpl(disk_args, context); Poco::AutoPtr conf(new Poco::Util::XMLConfiguration()); conf->load(xml_document); return conf; diff --git a/src/Disks/getDiskConfigurationFromAST.h b/src/Disks/getDiskConfigurationFromAST.h index 5697955e914..f23fb37b9dc 100644 --- a/src/Disks/getDiskConfigurationFromAST.h +++ b/src/Disks/getDiskConfigurationFromAST.h @@ -14,19 +14,19 @@ using DiskConfigurationPtr = Poco::AutoPtr; /** * Transform a list of pairs ( key1=value1, key2=value2, ... ), where keys and values are ASTLiteral or ASTIdentifier * into - * + * * value1 * value2 * ... - * + * * * Used in case disk configuration is passed via AST when creating * a disk object on-the-fly without any configuration file. */ -DiskConfigurationPtr getDiskConfigurationFromAST(const std::string & root_name, const ASTs & disk_args, ContextPtr context); +DiskConfigurationPtr getDiskConfigurationFromAST(const ASTs & disk_args, ContextPtr context); /// The same as above function, but return XML::Document for easier modification of result configuration. -[[ maybe_unused ]] Poco::AutoPtr getDiskConfigurationFromASTImpl(const std::string & root_name, const ASTs & disk_args, ContextPtr context); +[[ maybe_unused ]] Poco::AutoPtr getDiskConfigurationFromASTImpl(const ASTs & disk_args, ContextPtr context); /* * A reverse function. diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index a9a0e972bd1..da318303f62 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -26,8 +26,16 @@ namespace { std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context) { + const auto * function_args_expr = assert_cast(function.arguments.get()); + const auto & function_args = function_args_expr->children; + auto config = getDiskConfigurationFromAST(function_args, context); + std::string disk_name; - if (function.name == "disk") + if (config->has("name")) + { + disk_name = config->getString("name"); + } + else { /// We need a unique name for a created custom disk, but it needs to be the same /// after table is reattached or server is restarted, so take a hash of the disk @@ -36,21 +44,9 @@ namespace disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX + toString(sipHash128(disk_setting_string.data(), disk_setting_string.size())); } - else - { - static constexpr std::string_view custom_disk_prefix = "disk_"; - - if (function.name.size() <= custom_disk_prefix.size() || !function.name.starts_with(custom_disk_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid disk name: {}", function.name); - - disk_name = function.name.substr(custom_disk_prefix.size()); - } auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr { - const auto * function_args_expr = assert_cast(function.arguments.get()); - const auto & function_args = function_args_expr->children; - auto config = getDiskConfigurationFromAST(disk_name, function_args, context); - auto disk = DiskFactory::instance().create(disk_name, *config, disk_name, context, disks_map); + auto disk = DiskFactory::instance().create(disk_name, *config, "", context, disks_map); /// Mark that disk can be used without storage policy. disk->markDiskAsCustom(); return disk; diff --git a/src/Disks/registerDisks.cpp b/src/Disks/registerDisks.cpp index 676744a8e79..4e07fda1cc2 100644 --- a/src/Disks/registerDisks.cpp +++ b/src/Disks/registerDisks.cpp @@ -32,7 +32,7 @@ void registerDiskCache(DiskFactory & factory, bool global_skip_access_check); void registerDiskLocalObjectStorage(DiskFactory & factory, bool global_skip_access_check); -#ifndef CLICKHOUSE_PROGRAM_STANDALONE_BUILD +#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerDisks(bool global_skip_access_check) { diff --git a/src/Disks/tests/gtest_azure_sdk.cpp b/src/Disks/tests/gtest_azure_sdk.cpp index 56b49ae05ae..c8867fa0a31 100644 --- a/src/Disks/tests/gtest_azure_sdk.cpp +++ b/src/Disks/tests/gtest_azure_sdk.cpp @@ -35,7 +35,7 @@ TEST(AzureBlobContainerClient, CurlMemoryLeak) options.Retry.MaxRetries = 0; auto client = std::make_unique(BlobContainerClient::CreateFromConnectionString(unavailable_url, container, options)); - EXPECT_THROW({ client->ListBlobs(); }, Azure::Core::Http::TransportException); + EXPECT_THROW({ client->ListBlobs(); }, Azure::Core::RequestFailedException); } #endif diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 6e3e086859b..6481cc2d125 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -86,6 +86,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter; format_settings.custom.try_detect_header = settings.input_format_custom_detect_header; format_settings.custom.skip_trailing_empty_lines = settings.input_format_custom_skip_trailing_empty_lines; + format_settings.custom.allow_variable_number_of_columns = settings.input_format_custom_allow_variable_number_of_columns; format_settings.date_time_input_format = settings.date_time_input_format; format_settings.date_time_output_format = settings.date_time_output_format; format_settings.interval.output_format = settings.interval_output_format; @@ -115,14 +116,15 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8; format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name; format_settings.json.allow_object_type = context->getSettingsRef().allow_experimental_object_type; + format_settings.json.compact_allow_variable_number_of_columns = settings.input_format_json_compact_allow_variable_number_of_columns; format_settings.null_as_default = settings.input_format_null_as_default; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_rows = settings.output_format_parquet_row_group_size; format_settings.parquet.row_group_bytes = settings.output_format_parquet_row_group_size_bytes; format_settings.parquet.output_version = settings.output_format_parquet_version; - format_settings.parquet.import_nested = settings.input_format_parquet_import_nested; format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching; format_settings.parquet.preserve_order = settings.input_format_parquet_preserve_order; + format_settings.parquet.filter_push_down = settings.input_format_parquet_filter_push_down; format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference; format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string; @@ -130,6 +132,11 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size; format_settings.parquet.output_compression_method = settings.output_format_parquet_compression_method; format_settings.parquet.output_compliant_nested_types = settings.output_format_parquet_compliant_nested_types; + format_settings.parquet.use_custom_encoder = settings.output_format_parquet_use_custom_encoder; + format_settings.parquet.parallel_encoding = settings.output_format_parquet_parallel_encoding; + format_settings.parquet.data_page_size = settings.output_format_parquet_data_page_size; + format_settings.parquet.write_batch_size = settings.output_format_parquet_batch_size; + format_settings.parquet.local_read_min_bytes_for_seek = settings.input_format_parquet_local_file_min_bytes_for_seek; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.color = settings.output_format_pretty_color; format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width; @@ -139,12 +146,14 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.protobuf.input_flatten_google_wrappers = settings.input_format_protobuf_flatten_google_wrappers; format_settings.protobuf.output_nullables_with_google_wrappers = settings.output_format_protobuf_nullables_with_google_wrappers; format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference; + format_settings.protobuf.use_autogenerated_schema = settings.format_protobuf_use_autogenerated_schema; format_settings.regexp.escaping_rule = settings.format_regexp_escaping_rule; format_settings.regexp.regexp = settings.format_regexp; format_settings.regexp.skip_unmatched = settings.format_regexp_skip_unmatched; format_settings.schema.format_schema = settings.format_schema; format_settings.schema.format_schema_path = context->getFormatSchemaPath(); format_settings.schema.is_server = context->hasGlobalContext() && (context->getGlobalContext()->getApplicationType() == Context::ApplicationType::SERVER); + format_settings.schema.output_format_schema = settings.output_format_schema; format_settings.skip_unknown_fields = settings.input_format_skip_unknown_fields; format_settings.template_settings.resultset_format = settings.format_template_resultset; format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter; @@ -157,6 +166,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.tsv.skip_first_lines = settings.input_format_tsv_skip_first_lines; format_settings.tsv.try_detect_header = settings.input_format_tsv_detect_header; format_settings.tsv.skip_trailing_empty_lines = settings.input_format_tsv_skip_trailing_empty_lines; + format_settings.tsv.allow_variable_number_of_columns = settings.input_format_tsv_allow_variable_number_of_columns; format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals; format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions; format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions; @@ -164,7 +174,6 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.with_types_use_header = settings.input_format_with_types_use_header; format_settings.write_statistics = settings.output_format_write_statistics; format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; - format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns; format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference; format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference; @@ -172,20 +181,20 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.arrow.output_string_as_string = settings.output_format_arrow_string_as_string; format_settings.arrow.output_fixed_string_as_fixed_byte_array = settings.output_format_arrow_fixed_string_as_fixed_byte_array; format_settings.arrow.output_compression_method = settings.output_format_arrow_compression_method; - format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference; - format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference; format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching; format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string; format_settings.orc.output_compression_method = settings.output_format_orc_compression_method; + format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.capn_proto.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference; + format_settings.capn_proto.use_autogenerated_schema = settings.format_capn_proto_use_autogenerated_schema; format_settings.seekable_read = settings.input_format_allow_seeks; format_settings.msgpack.number_of_columns = settings.input_format_msgpack_number_of_columns; format_settings.msgpack.output_uuid_representation = settings.output_format_msgpack_uuid_representation; @@ -204,6 +213,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.try_infer_integers = settings.input_format_try_infer_integers; format_settings.try_infer_dates = settings.input_format_try_infer_dates; format_settings.try_infer_datetimes = settings.input_format_try_infer_datetimes; + format_settings.output_format_markdown_escape_special_characters = settings.output_format_markdown_escape_special_characters; format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string; format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference; format_settings.max_binary_string_size = settings.format_binary_max_string_size; @@ -434,7 +444,7 @@ OutputFormatPtr FormatFactory::getOutputFormatParallelIfPossible( return format; } - return getOutputFormat(name, buf, sample, context, _format_settings); + return getOutputFormat(name, buf, sample, context, format_settings); } @@ -453,6 +463,7 @@ OutputFormatPtr FormatFactory::getOutputFormat( context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Format, name); auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context); + format_settings.max_threads = context->getSettingsRef().max_threads; /** TODO: Materialization is needed, because formats can use the functions `IDataType`, * which only work with full columns. @@ -673,18 +684,18 @@ void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & na void FormatFactory::markFormatSupportsSubsetOfColumns(const String & name) { - auto & target = dict[name].supports_subset_of_columns; + auto & target = dict[name].subset_of_columns_support_checker; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subset of columns", name); - target = true; + target = [](const FormatSettings &){ return true; }; } -void FormatFactory::markFormatSupportsSubcolumns(const String & name) +void FormatFactory::registerSubsetOfColumnsSupportChecker(const String & name, SubsetOfColumnsSupportChecker subset_of_columns_support_checker) { - auto & target = dict[name].supports_subcolumns; + auto & target = dict[name].subset_of_columns_support_checker; if (target) - throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subcolumns", name); - target = true; + throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subset of columns", name); + target = std::move(subset_of_columns_support_checker); } void FormatFactory::markOutputFormatPrefersLargeBlocks(const String & name) @@ -695,16 +706,11 @@ void FormatFactory::markOutputFormatPrefersLargeBlocks(const String & name) target = true; } -bool FormatFactory::checkIfFormatSupportsSubcolumns(const String & name) const +bool FormatFactory::checkIfFormatSupportsSubsetOfColumns(const DB::String & name, const ContextPtr & context, const std::optional & format_settings_) const { const auto & target = getCreators(name); - return target.supports_subcolumns; -} - -bool FormatFactory::checkIfFormatSupportsSubsetOfColumns(const String & name) const -{ - const auto & target = getCreators(name); - return target.supports_subset_of_columns; + auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context); + return target.subset_of_columns_support_checker && target.subset_of_columns_support_checker(format_settings); } void FormatFactory::registerAdditionalInfoForSchemaCacheGetter( diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 489db944ee6..48a1869d563 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -90,9 +90,6 @@ private: const FormatSettings & settings)>; // Incompatible with FileSegmentationEngine. - // - // In future we may also want to pass some information about WHERE conditions (SelectQueryInfo?) - // and get some information about projections (min/max/count per column per row group). using RandomAccessInputCreator = std::function; + /// Some formats can support reading subset of columns depending on settings. + /// The checker should return true if format support append. + using SubsetOfColumnsSupportChecker = std::function; + struct Creators { InputCreator input_creator; @@ -135,12 +136,11 @@ private: SchemaReaderCreator schema_reader_creator; ExternalSchemaReaderCreator external_schema_reader_creator; bool supports_parallel_formatting{false}; - bool supports_subcolumns{false}; - bool supports_subset_of_columns{false}; bool prefers_large_blocks{false}; NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker; AppendSupportChecker append_support_checker; AdditionalInfoForSchemaCacheGetter additional_info_for_schema_cache_getter; + SubsetOfColumnsSupportChecker subset_of_columns_support_checker; }; using FormatsDictionary = std::unordered_map; @@ -228,11 +228,10 @@ public: void markOutputFormatSupportsParallelFormatting(const String & name); void markOutputFormatPrefersLargeBlocks(const String & name); - void markFormatSupportsSubcolumns(const String & name); - void markFormatSupportsSubsetOfColumns(const String & name); - bool checkIfFormatSupportsSubcolumns(const String & name) const; - bool checkIfFormatSupportsSubsetOfColumns(const String & name) const; + void markFormatSupportsSubsetOfColumns(const String & name); + void registerSubsetOfColumnsSupportChecker(const String & name, SubsetOfColumnsSupportChecker subset_of_columns_support_checker); + bool checkIfFormatSupportsSubsetOfColumns(const String & name, const ContextPtr & context, const std::optional & format_settings_ = std::nullopt) const; bool checkIfFormatHasSchemaReader(const String & name) const; bool checkIfFormatHasExternalSchemaReader(const String & name) const; diff --git a/src/Formats/FormatSchemaInfo.cpp b/src/Formats/FormatSchemaInfo.cpp index 8e2afe1b2c9..c0f0aec6fd3 100644 --- a/src/Formats/FormatSchemaInfo.cpp +++ b/src/Formats/FormatSchemaInfo.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include #include @@ -105,4 +107,84 @@ FormatSchemaInfo::FormatSchemaInfo(const FormatSettings & settings, const String { } +template +MaybeAutogeneratedFormatSchemaInfo::MaybeAutogeneratedFormatSchemaInfo( + const FormatSettings & settings, const String & format, const Block & header, bool use_autogenerated_schema) +{ + if (!use_autogenerated_schema || !settings.schema.format_schema.empty()) + { + schema_info = std::make_unique(settings, format, true); + return; + } + + String schema_path; + fs::path default_schema_directory_path(fs::canonical(settings.schema.format_schema_path) / ""); + fs::path path; + if (!settings.schema.output_format_schema.empty()) + { + schema_path = settings.schema.output_format_schema; + path = schema_path; + if (path.is_absolute()) + { + if (settings.schema.is_server) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Absolute path in the 'output_format_schema' setting is prohibited: {}", path.string()); + } + else if (path.has_parent_path() && !fs::weakly_canonical(default_schema_directory_path / path).string().starts_with(fs::weakly_canonical(default_schema_directory_path).string())) + { + if (settings.schema.is_server) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Path in the 'format_schema' setting shouldn't go outside the 'format_schema_path' directory: {} ({} not in {})", + default_schema_directory_path.string(), + path.string(), + default_schema_directory_path.string()); + path = default_schema_directory_path / path; + } + else + { + path = default_schema_directory_path / path; + } + } + else + { + if (settings.schema.is_server) + { + tmp_file_path = PocoTemporaryFile::tempName(default_schema_directory_path.string()) + '.' + getFormatSchemaDefaultFileExtension(format); + schema_path = fs::path(tmp_file_path).filename(); + } + else + { + tmp_file_path = PocoTemporaryFile::tempName() + '.' + getFormatSchemaDefaultFileExtension(format); + schema_path = tmp_file_path; + } + + path = tmp_file_path; + } + + WriteBufferFromFile buf(path.string()); + SchemaGenerator::writeSchema(buf, "Message", header.getNamesAndTypesList()); + buf.finalize(); + + schema_info = std::make_unique(schema_path + ":Message", format, true, settings.schema.is_server, settings.schema.format_schema_path); +} + +template +MaybeAutogeneratedFormatSchemaInfo::~MaybeAutogeneratedFormatSchemaInfo() +{ + if (!tmp_file_path.empty()) + { + try + { + fs::remove(tmp_file_path); + } + catch (...) + { + tryLogCurrentException("MaybeAutogeneratedFormatSchemaInfo", "Cannot delete temporary schema file"); + } + } +} + +template class MaybeAutogeneratedFormatSchemaInfo; +template class MaybeAutogeneratedFormatSchemaInfo; + } diff --git a/src/Formats/FormatSchemaInfo.h b/src/Formats/FormatSchemaInfo.h index 8c430218af0..e8758c3f761 100644 --- a/src/Formats/FormatSchemaInfo.h +++ b/src/Formats/FormatSchemaInfo.h @@ -2,6 +2,8 @@ #include #include +#include +#include namespace DB { @@ -30,4 +32,23 @@ private: String message_name; }; + +template +class MaybeAutogeneratedFormatSchemaInfo +{ +public: + MaybeAutogeneratedFormatSchemaInfo(const FormatSettings & settings, const String & format, const Block & header, bool use_autogenerated_schema); + + ~MaybeAutogeneratedFormatSchemaInfo(); + + const FormatSchemaInfo & getSchemaInfo() const { return *schema_info; } +private: + + std::unique_ptr schema_info; + String tmp_file_path; +}; + +using CapnProtoSchemaInfo = MaybeAutogeneratedFormatSchemaInfo; +using ProtobufSchemaInfo = MaybeAutogeneratedFormatSchemaInfo; + } diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index e321e5264ca..c2744f68088 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -46,6 +46,8 @@ struct FormatSettings bool try_infer_dates = false; bool try_infer_datetimes = false; + bool output_format_markdown_escape_special_characters = false; + enum class DateTimeInputFormat { Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp. @@ -100,6 +102,8 @@ struct FormatSettings UInt64 max_parser_depth = DBMS_DEFAULT_MAX_PARSER_DEPTH; + size_t max_threads = 1; + enum class ArrowCompression { NONE, @@ -111,7 +115,6 @@ struct FormatSettings { UInt64 row_group_size = 1000000; bool low_cardinality_as_dictionary = false; - bool import_nested = false; bool allow_missing_columns = false; bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; @@ -174,6 +177,7 @@ struct FormatSettings EscapingRule escaping_rule = EscapingRule::Escaped; bool try_detect_header = true; bool skip_trailing_empty_lines = false; + bool allow_variable_number_of_columns = false; } custom; struct @@ -196,6 +200,7 @@ struct FormatSettings bool validate_types_from_metadata = true; bool validate_utf8 = false; bool allow_object_type = false; + bool compact_allow_variable_number_of_columns = false; } json; struct @@ -225,18 +230,23 @@ struct FormatSettings { UInt64 row_group_rows = 1000000; UInt64 row_group_bytes = 512 * 1024 * 1024; - bool import_nested = false; bool allow_missing_columns = false; bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; + bool filter_push_down = true; std::unordered_set skip_row_groups = {}; bool output_string_as_string = false; bool output_fixed_string_as_fixed_byte_array = true; bool preserve_order = false; + bool use_custom_encoder = true; + bool parallel_encoding = true; UInt64 max_block_size = 8192; ParquetVersion output_version; ParquetCompression output_compression_method = ParquetCompression::SNAPPY; bool output_compliant_nested_types = true; + size_t data_page_size = 1024 * 1024; + size_t write_batch_size = 1024; + size_t local_read_min_bytes_for_seek = 8192; } parquet; struct Pretty @@ -270,6 +280,7 @@ struct FormatSettings */ bool allow_multiple_rows_without_delimiter = false; bool skip_fields_with_unsupported_types_in_schema_inference = false; + bool use_autogenerated_schema = true; } protobuf; struct @@ -291,6 +302,7 @@ struct FormatSettings std::string format_schema; std::string format_schema_path; bool is_server = false; + std::string output_format_schema; } schema; struct @@ -310,6 +322,7 @@ struct FormatSettings UInt64 skip_first_lines = 0; bool try_detect_header = true; bool skip_trailing_empty_lines = false; + bool allow_variable_number_of_columns = false; } tsv; struct @@ -330,7 +343,6 @@ struct FormatSettings struct { - bool import_nested = false; bool allow_missing_columns = false; int64_t row_batch_size = 100'000; bool skip_columns_with_unsupported_types_in_schema_inference = false; @@ -338,6 +350,7 @@ struct FormatSettings std::unordered_set skip_stripes = {}; bool output_string_as_string = false; ORCCompression output_compression_method = ORCCompression::NONE; + bool use_fast_decoder = true; } orc; /// For capnProto format we should determine how to @@ -353,6 +366,7 @@ struct FormatSettings { CapnProtoEnumComparingMode enum_comparing_mode = CapnProtoEnumComparingMode::BY_VALUES; bool skip_fields_with_unsupported_types_in_schema_inference = false; + bool use_autogenerated_schema = true; } capn_proto; enum class MsgPackUUIDRepresentation diff --git a/src/Formats/IndexForNativeFormat.cpp b/src/Formats/IndexForNativeFormat.cpp index 91ae1e39280..bb410125378 100644 --- a/src/Formats/IndexForNativeFormat.cpp +++ b/src/Formats/IndexForNativeFormat.cpp @@ -20,8 +20,8 @@ void IndexOfBlockForNativeFormat::read(ReadBuffer & istr) auto & column = columns.emplace_back(); readBinary(column.name, istr); readBinary(column.type, istr); - readBinary(column.location.offset_in_compressed_file, istr); - readBinary(column.location.offset_in_decompressed_block, istr); + readBinaryLittleEndian(column.location.offset_in_compressed_file, istr); + readBinaryLittleEndian(column.location.offset_in_decompressed_block, istr); } } @@ -34,8 +34,8 @@ void IndexOfBlockForNativeFormat::write(WriteBuffer & ostr) const const auto & column = columns[i]; writeBinary(column.name, ostr); writeBinary(column.type, ostr); - writeBinary(column.location.offset_in_compressed_file, ostr); - writeBinary(column.location.offset_in_decompressed_block, ostr); + writeBinaryLittleEndian(column.location.offset_in_compressed_file, ostr); + writeBinaryLittleEndian(column.location.offset_in_decompressed_block, ostr); } } diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 0aac72c68fe..6fbda869154 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -126,6 +126,86 @@ namespace JSONUtils return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_bytes, min_rows, max_rows); } + template + void skipRowForJSONEachRowImpl(ReadBuffer & in) + { + size_t balance = 0; + bool quotes = false; + while (!in.eof()) + { + if (quotes) + { + auto * pos = find_first_symbols<'\\', '"'>(in.position(), in.buffer().end()); + in.position() = pos; + + if (in.position() > in.buffer().end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug."); + else if (in.position() == in.buffer().end()) + continue; + + if (*in.position() == '\\') + { + ++in.position(); + if (!in.eof()) + ++in.position(); + } + else if (*in.position() == '"') + { + ++in.position(); + quotes = false; + } + } + else + { + auto * pos = find_first_symbols(in.position(), in.buffer().end()); + in.position() = pos; + + if (in.position() > in.buffer().end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug."); + else if (in.position() == in.buffer().end()) + continue; + + else if (*in.position() == opening_bracket) + { + ++balance; + ++in.position(); + } + else if (*in.position() == closing_bracket) + { + --balance; + ++in.position(); + } + else if (*in.position() == '\\') + { + ++in.position(); + if (!in.eof()) + ++in.position(); + } + else if (*in.position() == '"') + { + quotes = true; + ++in.position(); + } + + if (balance == 0) + return; + } + } + + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected eof"); + + } + + void skipRowForJSONEachRow(ReadBuffer & in) + { + return skipRowForJSONEachRowImpl<'{', '}'>(in); + } + + void skipRowForJSONCompactEachRow(ReadBuffer & in) + { + return skipRowForJSONEachRowImpl<'[', ']'>(in); + } + NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, const FormatSettings & settings, JSONInferenceInfo * inference_info) { skipWhitespaceIfAny(in); @@ -612,8 +692,11 @@ namespace JSONUtils auto names_and_types = JSONUtils::readMetadata(in); for (const auto & [name, type] : names_and_types) { + if (!header.has(name)) + continue; + auto header_type = header.getByName(name).type; - if (header.has(name) && !type->equals(*header_type)) + if (!type->equals(*header_type)) throw Exception( ErrorCodes::INCORRECT_DATA, "Type {} of column '{}' from metadata is not the same as type in header {}", diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h index fd1ba7db980..bd56eb646cb 100644 --- a/src/Formats/JSONUtils.h +++ b/src/Formats/JSONUtils.h @@ -20,6 +20,9 @@ namespace JSONUtils std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows); std::pair fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows); + void skipRowForJSONEachRow(ReadBuffer & in); + void skipRowForJSONCompactEachRow(ReadBuffer & in); + /// Read row in JSONEachRow format and try to determine type for each field. /// Return list of names and types. /// If cannot determine the type of some field, return nullptr for it. diff --git a/src/Formats/NativeWriter.cpp b/src/Formats/NativeWriter.cpp index 8100a3868e6..70d5b7914a7 100644 --- a/src/Formats/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -135,9 +135,19 @@ size_t NativeWriter::write(const Block & block) if (client_revision >= DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION) { auto info = column.type->getSerializationInfo(*column.column); - serialization = column.type->getSerialization(*info); + bool has_custom = false; + + if (client_revision >= DBMS_MIN_REVISION_WITH_SPARSE_SERIALIZATION) + { + serialization = column.type->getSerialization(*info); + has_custom = info->hasCustomSerialization(); + } + else + { + serialization = column.type->getDefaultSerialization(); + column.column = recursiveRemoveSparse(column.column); + } - bool has_custom = info->hasCustomSerialization(); writeBinary(static_cast(has_custom), ostr); if (has_custom) info->serialializeKindBinary(ostr); diff --git a/src/Formats/ProtobufReader.cpp b/src/Formats/ProtobufReader.cpp index 159a1d33c49..577342bf299 100644 --- a/src/Formats/ProtobufReader.cpp +++ b/src/Formats/ProtobufReader.cpp @@ -429,12 +429,10 @@ void ProtobufReader::ignoreGroup() [[noreturn]] void ProtobufReader::throwUnknownFormat() const { - throw Exception::createDeprecated( - std::string("Protobuf messages are corrupted or don't match the provided schema.") - + (root_message_has_length_delimiter - ? " Please note that Protobuf stream is length-delimited: every message is prefixed by its length in varint." - : ""), - ErrorCodes::UNKNOWN_PROTOBUF_FORMAT); + throw Exception(ErrorCodes::UNKNOWN_PROTOBUF_FORMAT, "Protobuf messages are corrupted or don't match the provided schema.{}", + root_message_has_length_delimiter + ? " Please note that Protobuf stream is length-delimited: every message is prefixed by its length in varint." + : ""); } } diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index f690800d145..dd37c25719c 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -3029,7 +3029,7 @@ namespace if (!message_serializer) { throw Exception(ErrorCodes::NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS, - "Not found matches between the names of the columns {{}} and the fields {{}} of the message {} in the protobuf schema", + "Not found matches between the names of the columns ({}) and the fields ({}) of the message {} in the protobuf schema", boost::algorithm::join(column_names, ", "), boost::algorithm::join(getFieldNames(message_descriptor), ", "), quoteString(message_descriptor.full_name())); } @@ -3647,7 +3647,7 @@ namespace if (!message_serializer) { throw Exception(ErrorCodes::NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS, - "Not found matches between the names of the tuple's elements {{}} and the fields {{}} " + "Not found matches between the names of the tuple's elements ({}) and the fields ({}) " "of the message {} in the protobuf schema", boost::algorithm::join(tuple_data_type.getElementNames(), ", "), boost::algorithm::join(getFieldNames(*field_descriptor.message_type()), ", "), diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index cbfa05bfba6..3158798fdca 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -1,13 +1,11 @@ -#include -#include #include -#include -#include #include #include #include #include #include +#include + namespace DB { @@ -49,10 +47,11 @@ bool isRetryableSchemaInferenceError(int code) ColumnsDescription readSchemaFromFormat( const String & format_name, const std::optional & format_settings, - ReadBufferIterator & read_buffer_iterator, + IReadBufferIterator & read_buffer_iterator, bool retry, ContextPtr & context, std::unique_ptr & buf) +try { NamesAndTypesList names_and_types; if (FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format_name)) @@ -78,13 +77,13 @@ ColumnsDescription readSchemaFromFormat( size_t max_bytes_to_read = format_settings ? format_settings->max_bytes_to_read_for_schema_inference : context->getSettingsRef().input_format_max_bytes_to_read_for_schema_inference; size_t iterations = 0; - ColumnsDescription cached_columns; while (true) { bool is_eof = false; try { - buf = read_buffer_iterator(cached_columns); + read_buffer_iterator.setPreviousReadBuffer(std::move(buf)); + buf = read_buffer_iterator.next(); if (!buf) break; is_eof = buf->eof(); @@ -124,6 +123,9 @@ ColumnsDescription readSchemaFromFormat( schema_reader = FormatFactory::instance().getSchemaReader(format_name, *buf, context, format_settings); schema_reader->setMaxRowsAndBytesToRead(max_rows_to_read, max_bytes_to_read); names_and_types = schema_reader->readSchema(); + auto num_rows = schema_reader->readNumberOrRows(); + if (num_rows) + read_buffer_iterator.setNumRowsToLastFile(*num_rows); break; } catch (...) @@ -178,8 +180,8 @@ ColumnsDescription readSchemaFromFormat( } } - if (!cached_columns.empty()) - return cached_columns; + if (auto cached_columns = read_buffer_iterator.getCachedColumns()) + return *cached_columns; if (names_and_types.empty()) throw Exception( @@ -209,17 +211,28 @@ ColumnsDescription readSchemaFromFormat( ErrorCodes::BAD_ARGUMENTS, "{} file format doesn't support schema inference. You must specify the structure manually", format_name); + /// Some formats like CSVWithNames can contain empty column names. We don't support empty column names and further processing can fail with an exception. Let's just remove columns with empty names from the structure. names_and_types.erase( std::remove_if(names_and_types.begin(), names_and_types.end(), [](const NameAndTypePair & pair) { return pair.name.empty(); }), names_and_types.end()); return ColumnsDescription(names_and_types); } +catch (Exception & e) +{ + if (!buf) + throw; + auto file_name = getFileNameFromReadBuffer(*buf); + if (!file_name.empty()) + e.addMessage(fmt::format("(in file/uri {})", file_name)); + throw; +} + ColumnsDescription readSchemaFromFormat( const String & format_name, const std::optional & format_settings, - ReadBufferIterator & read_buffer_iterator, + IReadBufferIterator & read_buffer_iterator, bool retry, ContextPtr & context) { diff --git a/src/Formats/ReadSchemaUtils.h b/src/Formats/ReadSchemaUtils.h index 6b4c78a4ff6..c769846acbb 100644 --- a/src/Formats/ReadSchemaUtils.h +++ b/src/Formats/ReadSchemaUtils.h @@ -1,15 +1,46 @@ #pragma once -#include -#include #include +#include +#include namespace DB { -using ReadBufferIterator = std::function(ColumnsDescription &)>; +struct IReadBufferIterator +{ + virtual ~IReadBufferIterator() = default; -/// Try to determine the schema of the data in the specified format. + virtual void setPreviousReadBuffer(std::unique_ptr /* buffer */) {} + + virtual std::unique_ptr next() = 0; + + virtual std::optional getCachedColumns() { return std::nullopt; } + + virtual void setNumRowsToLastFile(size_t /*num_rows*/) {} +}; + +struct SingleReadBufferIterator : public IReadBufferIterator +{ +public: + explicit SingleReadBufferIterator(std::unique_ptr buf_) : buf(std::move(buf_)) + { + } + + std::unique_ptr next() override + { + if (done) + return nullptr; + done = true; + return std::move(buf); + } + +private: + std::unique_ptr buf; + bool done = false; +}; + +/// Try to determine the schema of the data and number of rows in data in the specified format. /// For formats that have an external schema reader, it will /// use it and won't create a read buffer. /// For formats that have a schema reader from the data, @@ -22,7 +53,7 @@ using ReadBufferIterator = std::function(ColumnsDesc ColumnsDescription readSchemaFromFormat( const String & format_name, const std::optional & format_settings, - ReadBufferIterator & read_buffer_iterator, + IReadBufferIterator & read_buffer_iterator, bool retry, ContextPtr & context); @@ -30,12 +61,12 @@ ColumnsDescription readSchemaFromFormat( ColumnsDescription readSchemaFromFormat( const String & format_name, const std::optional & format_settings, - ReadBufferIterator & read_buffer_iterator, + IReadBufferIterator & read_buffer_iterator, bool retry, ContextPtr & context, std::unique_ptr & buf_out); -SchemaCache::Key getKeyForSchemaCache(const String & source, const String & format, const std::optional & format_settings, const ContextPtr & context); -SchemaCache::Keys getKeysForSchemaCache(const Strings & sources, const String & format, const std::optional & format_settings, const ContextPtr & context); +SchemaCache::Key getKeyForSchemaCache(const String & source, const String & format, const std::optional & format_settings, const ContextPtr & context); +SchemaCache::Keys getKeysForSchemaCache(const Strings & sources, const String & format, const std::optional & format_settings, const ContextPtr & context); } diff --git a/src/Formats/StructureToCapnProtoSchema.cpp b/src/Formats/StructureToCapnProtoSchema.cpp new file mode 100644 index 00000000000..9f4d96b7c8a --- /dev/null +++ b/src/Formats/StructureToCapnProtoSchema.cpp @@ -0,0 +1,236 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +using namespace StructureToFormatSchemaUtils; + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +const std::unordered_map capn_proto_simple_type_names = +{ + {TypeIndex::Int8, "Int8"}, + {TypeIndex::UInt8, "UInt8"}, + {TypeIndex::Int16, "Int16"}, + {TypeIndex::UInt16, "UInt16"}, + {TypeIndex::Int32, "Int32"}, + {TypeIndex::UInt32, "UInt32"}, + {TypeIndex::Int64, "Int64"}, + {TypeIndex::UInt64, "UInt64"}, + {TypeIndex::Int128, "Data"}, + {TypeIndex::UInt128, "Data"}, + {TypeIndex::Int256, "Data"}, + {TypeIndex::UInt256, "Data"}, + {TypeIndex::Float32, "Float32"}, + {TypeIndex::Float64, "Float64"}, + {TypeIndex::Decimal32, "Int32"}, + {TypeIndex::Decimal64, "Int64"}, + {TypeIndex::Decimal128, "Data"}, + {TypeIndex::Decimal256, "Data"}, + {TypeIndex::String, "Data"}, + {TypeIndex::FixedString, "Data"}, + {TypeIndex::UUID, "Data"}, + {TypeIndex::Date, "UInt16"}, + {TypeIndex::Date32, "Int32"}, + {TypeIndex::DateTime, "UInt32"}, + {TypeIndex::DateTime64, "Int64"}, + {TypeIndex::IPv4, "UInt32"}, + {TypeIndex::IPv6, "Data"}, +}; + +void writeCapnProtoHeader(WriteBuffer & buf) +{ + pcg64 rng(randomSeed()); + size_t id = rng() | (1ull << 63); /// First bit should be 1 + writeString(fmt::format("@0x{};\n\n", getHexUIntLowercase(id)), buf); +} + +void writeFieldDefinition(WriteBuffer & buf, const String & type_name, const String & column_name, size_t & field_index, size_t indent) +{ + writeIndent(buf, indent); + writeString(fmt::format("{} @{} : {};\n", getSchemaFieldName(column_name), field_index++, type_name), buf); +} + +void startEnum(WriteBuffer & buf, const String & enum_name, size_t indent) +{ + startNested(buf, enum_name, "enum", indent); +} + +void startUnion(WriteBuffer & buf, size_t indent) +{ + startNested(buf, "", "union", indent); +} + +void startStruct(WriteBuffer & buf, const String & struct_name, size_t indent) +{ + startNested(buf, struct_name, "struct", indent); +} + +String prepareAndGetCapnProtoTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent); + +void writeField(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t & field_index, size_t indent) +{ + auto field_type_name = prepareAndGetCapnProtoTypeName(buf, data_type, column_name, indent); + writeFieldDefinition(buf, field_type_name, column_name, field_index, indent); +} + +String prepareArrayAndGetCapnProtoTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent) +{ + const auto & nested_type = assert_cast(*data_type).getNestedType(); + auto nested_type_name = prepareAndGetCapnProtoTypeName(buf, nested_type, column_name, indent); + return "List(" + nested_type_name + ")"; +} + +String prepareNullableAndGetCapnProtoTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent) +{ + /// Nullable is represented as a struct with union with 2 fields: + /// + /// struct Nullable + /// { + /// union + /// { + /// value @0 : Value; + /// null @1 : Void; + /// } + /// } + auto struct_name = getSchemaMessageName(column_name); + startStruct(buf, struct_name, indent); + auto nested_type_name = prepareAndGetCapnProtoTypeName(buf, assert_cast(*data_type).getNestedType(), column_name, indent); + startUnion(buf, indent + 1); + size_t field_index = 0; + writeFieldDefinition(buf, nested_type_name, "value", field_index, indent + 2); + writeFieldDefinition(buf, "Void", "null", field_index, indent + 2); + endNested(buf, indent + 1); + endNested(buf, indent); + return struct_name; +} + +String prepareTupleAndGetCapnProtoTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent) +{ + const auto & tuple_type = assert_cast(*data_type); + auto nested_names_and_types = getCollectedTupleElements(tuple_type); + + String struct_name = getSchemaMessageName(column_name); + startStruct(buf, struct_name, indent); + size_t nested_field_index = 0; + for (const auto & [name, type] : nested_names_and_types) + writeField(buf, type, name, nested_field_index, indent + 1); + endNested(buf, indent); + return struct_name; +} + +String prepareMapAndGetCapnProtoTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent) +{ + /// We output/input Map type as follow CapnProto schema + /// + /// struct Map + /// { + /// struct Entry + /// { + /// key @0: Key; + /// value @1: Value; + /// } + /// entries @0 :List(Entry); + /// } + const auto & map_type = assert_cast(*data_type); + const auto & key_type = map_type.getKeyType(); + const auto & value_type = map_type.getValueType(); + + String struct_name = getSchemaMessageName(column_name); + startStruct(buf, struct_name, indent); + startStruct(buf, "Entry", indent + 1); + auto key_type_name = prepareAndGetCapnProtoTypeName(buf, key_type, "key", indent + 2); + auto value_type_name = prepareAndGetCapnProtoTypeName(buf, value_type, "value", indent + 2); + size_t field_index = 0; + writeFieldDefinition(buf, key_type_name, "key", field_index, indent + 2); + writeFieldDefinition(buf, value_type_name, "value", field_index, indent + 2); + endNested(buf, indent + 1); + field_index = 0; + writeFieldDefinition(buf, "List(Entry)", "entries", field_index, indent + 1); + endNested(buf, indent); + return struct_name; +} + +template +String prepareEnumAndGetCapnProtoTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent) +{ + const auto & enum_type = assert_cast &>(*data_type); + String enum_name = getSchemaMessageName(column_name); + startEnum(buf, enum_name, indent); + const auto & names = enum_type.getAllRegisteredNames(); + for (size_t i = 0; i != names.size(); ++i) + { + writeIndent(buf, indent + 1); + writeString(fmt::format("{} @{};\n", names[i], std::to_string(i)), buf); + } + endNested(buf, indent); + return enum_name; +} + +String prepareAndGetCapnProtoTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent) +{ + TypeIndex type_id = data_type->getTypeId(); + + switch (data_type->getTypeId()) + { + case TypeIndex::Nullable: + return prepareNullableAndGetCapnProtoTypeName(buf, data_type, column_name, indent); + case TypeIndex::LowCardinality: + return prepareAndGetCapnProtoTypeName(buf, assert_cast(*data_type).getDictionaryType(), column_name, indent); + case TypeIndex::Array: + return prepareArrayAndGetCapnProtoTypeName(buf, data_type, column_name, indent); + case TypeIndex::Tuple: + return prepareTupleAndGetCapnProtoTypeName(buf, data_type, column_name, indent); + case TypeIndex::Map: + return prepareMapAndGetCapnProtoTypeName(buf, data_type, column_name, indent); + case TypeIndex::Enum8: + return prepareEnumAndGetCapnProtoTypeName(buf, data_type, column_name, indent); + case TypeIndex::Enum16: + return prepareEnumAndGetCapnProtoTypeName(buf, data_type, column_name, indent); + default: + { + if (isBool(data_type)) + return "Bool"; + + auto it = capn_proto_simple_type_names.find(type_id); + if (it == capn_proto_simple_type_names.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "CapnProto type name is not found for type {}", data_type->getName()); + return it->second; + } + } +} + +} + +void StructureToCapnProtoSchema::writeSchema(WriteBuffer & buf, const String & message_name, const NamesAndTypesList & names_and_types_) +{ + auto names_and_types = collectNested(names_and_types_); + writeCapnProtoHeader(buf); + startStruct(buf, getSchemaMessageName(message_name), 0); + + size_t field_index = 0; + for (const auto & [column_name, data_type] : names_and_types) + writeField(buf, data_type, column_name, field_index, 1); + + endNested(buf, 0); +} + +} diff --git a/src/Formats/StructureToCapnProtoSchema.h b/src/Formats/StructureToCapnProtoSchema.h new file mode 100644 index 00000000000..b2a0a8a8cf9 --- /dev/null +++ b/src/Formats/StructureToCapnProtoSchema.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +struct StructureToCapnProtoSchema +{ + static constexpr auto name = "structureToCapnProtoSchema"; + + static void writeSchema(WriteBuffer & buf, const String & message_name, const NamesAndTypesList & names_and_types_); +}; + +} diff --git a/src/Formats/StructureToFormatSchemaUtils.cpp b/src/Formats/StructureToFormatSchemaUtils.cpp new file mode 100644 index 00000000000..a9374647ebc --- /dev/null +++ b/src/Formats/StructureToFormatSchemaUtils.cpp @@ -0,0 +1,117 @@ +#include +#include + +namespace DB +{ + +namespace StructureToFormatSchemaUtils +{ + +void writeIndent(WriteBuffer & buf, size_t indent) +{ + writeChar(' ', indent * 4, buf); +} + +void startNested(WriteBuffer & buf, const String & nested_name, const String & nested_type, size_t indent) +{ + writeIndent(buf, indent); + writeString(nested_type, buf); + if (!nested_name.empty()) + { + writeChar(' ', buf); + writeString(nested_name, buf); + } + writeChar('\n', buf); + writeIndent(buf, indent); + writeCString("{\n", buf); +} + +void endNested(WriteBuffer & buf, size_t indent) +{ + writeIndent(buf, indent); + writeCString("}\n", buf); +} + +String getSchemaFieldName(const String & column_name) +{ + String result = column_name; + /// Replace all first uppercase letters to lower-case, + /// because fields in CapnProto schema must begin with a lower-case letter. + /// Don't replace all letters to lower-case to remain camelCase field names. + for (auto & symbol : result) + { + if (islower(symbol)) + break; + symbol = tolower(symbol); + } + return result; +} + +String getSchemaMessageName(const String & column_name) +{ + String result = column_name; + if (!column_name.empty() && isalpha(column_name[0])) + result[0] = toupper(column_name[0]); + return result; +} + +namespace +{ + std::pair splitName(const String & name) + { + const auto * begin = name.data(); + const auto * end = name.data() + name.size(); + const auto * it = find_first_symbols<'_', '.'>(begin, end); + String first = String(begin, it); + String second = it == end ? "" : String(it + 1, end); + return {std::move(first), std::move(second)}; + } +} + +NamesAndTypesList collectNested(const NamesAndTypesList & names_and_types) +{ + /// Find all columns with dots '.' or underscores '_' and move them into a tuple. + /// For example if we have columns 'a.b UInt32, a.c UInt32, x_y String' we will + /// change it to 'a Tuple(b UInt32, c UInt32), x Tuple(y String)' + NamesAndTypesList result; + std::unordered_map nested; + for (const auto & [name, type] : names_and_types) + { + auto [field_name, nested_name] = splitName(name); + if (nested_name.empty()) + result.emplace_back(name, type); + else + nested[field_name].emplace_back(nested_name, type); + } + + for (const auto & [field_name, elements]: nested) + result.emplace_back(field_name, std::make_shared(elements.getTypes(), elements.getNames())); + + return result; +} + +NamesAndTypesList getCollectedTupleElements(const DataTypeTuple & tuple_type) +{ + const auto & nested_types = tuple_type.getElements(); + Names nested_names; + if (tuple_type.haveExplicitNames()) + { + nested_names = tuple_type.getElementNames(); + } + else + { + nested_names.reserve(nested_types.size()); + for (size_t i = 0; i != nested_types.size(); ++i) + nested_names.push_back("e" + std::to_string(i + 1)); + } + + NamesAndTypesList result; + for (size_t i = 0; i != nested_names.size(); ++i) + result.emplace_back(nested_names[i], nested_types[i]); + + return collectNested(result); +} + +} + +} diff --git a/src/Formats/StructureToFormatSchemaUtils.h b/src/Formats/StructureToFormatSchemaUtils.h new file mode 100644 index 00000000000..c6b86501ac8 --- /dev/null +++ b/src/Formats/StructureToFormatSchemaUtils.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +namespace StructureToFormatSchemaUtils +{ + void writeIndent(WriteBuffer & buf, size_t indent); + + void startNested(WriteBuffer & buf, const String & nested_name, const String & nested_type, size_t indent); + + void endNested(WriteBuffer & buf, size_t indent); + + String getSchemaFieldName(const String & column_name); + + String getSchemaMessageName(const String & column_name); + + NamesAndTypesList collectNested(const NamesAndTypesList & names_and_types); + + NamesAndTypesList getCollectedTupleElements(const DataTypeTuple & tuple_type); +} + +} diff --git a/src/Formats/StructureToProtobufSchema.cpp b/src/Formats/StructureToProtobufSchema.cpp new file mode 100644 index 00000000000..4a704e8d428 --- /dev/null +++ b/src/Formats/StructureToProtobufSchema.cpp @@ -0,0 +1,214 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +using namespace StructureToFormatSchemaUtils; + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +const std::unordered_map protobuf_simple_type_names = +{ + {TypeIndex::Int8, "int32"}, + {TypeIndex::UInt8, "uint32"}, + {TypeIndex::Int16, "int32"}, + {TypeIndex::UInt16, "uint32"}, + {TypeIndex::Int32, "int32"}, + {TypeIndex::UInt32, "uint32"}, + {TypeIndex::Int64, "int64"}, + {TypeIndex::UInt64, "uint64"}, + {TypeIndex::Int128, "bytes"}, + {TypeIndex::UInt128, "bytes"}, + {TypeIndex::Int256, "bytes"}, + {TypeIndex::UInt256, "bytes"}, + {TypeIndex::Float32, "float"}, + {TypeIndex::Float64, "double"}, + {TypeIndex::Decimal32, "bytes"}, + {TypeIndex::Decimal64, "bytes"}, + {TypeIndex::Decimal128, "bytes"}, + {TypeIndex::Decimal256, "bytes"}, + {TypeIndex::String, "bytes"}, + {TypeIndex::FixedString, "bytes"}, + {TypeIndex::UUID, "bytes"}, + {TypeIndex::Date, "uint32"}, + {TypeIndex::Date32, "int32"}, + {TypeIndex::DateTime, "uint32"}, + {TypeIndex::DateTime64, "uint64"}, + {TypeIndex::IPv4, "uint32"}, + {TypeIndex::IPv6, "bytes"}, +}; + +void writeProtobufHeader(WriteBuffer & buf) +{ + writeCString("syntax = \"proto3\";\n\n", buf); +} + +void startEnum(WriteBuffer & buf, const String & enum_name, size_t indent) +{ + startNested(buf, enum_name, "enum", indent); +} + +void startMessage(WriteBuffer & buf, const String & message_name, size_t indent) +{ + startNested(buf, message_name, "message", indent); +} + +void writeFieldDefinition(WriteBuffer & buf, const String & type_name, const String & column_name, size_t & field_index, size_t indent) +{ + writeIndent(buf, indent); + writeString(fmt::format("{} {} = {};\n", type_name, getSchemaFieldName(column_name), field_index++), buf); +} + +String prepareAndGetProtobufTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent); + +void writeProtobufField(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t & field_index, size_t indent) +{ + auto field_type_name = prepareAndGetProtobufTypeName(buf, data_type, column_name, indent); + writeFieldDefinition(buf, field_type_name, column_name, field_index, indent); +} + +String prepareArrayAndGetProtobufTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent) +{ + const auto & nested_type = assert_cast(*data_type).getNestedType(); + /// Simple case when we can just use 'repeated '. + if (!isArray(nested_type) && !isMap(nested_type)) + { + auto nested_type_name = prepareAndGetProtobufTypeName(buf, nested_type, column_name, indent); + return "repeated " + nested_type_name; + } + + /// Protobuf doesn't support multidimensional repeated fields and repeated maps. + /// When we have Array(Array(...)) or Array(Map(...)) we should place nested type into a nested Message with one field. + String message_name = getSchemaMessageName(column_name); + startMessage(buf, message_name, indent); + size_t nested_field_index = 1; + writeProtobufField(buf, nested_type, column_name, nested_field_index, indent + 1); + endNested(buf, indent); + return "repeated " + message_name; +} + +String prepareTupleAndGetProtobufTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent) +{ + const auto & tuple_type = assert_cast(*data_type); + auto nested_names_and_types = getCollectedTupleElements(tuple_type); + + String message_name = getSchemaMessageName(column_name); + startMessage(buf, message_name, indent); + size_t nested_field_index = 1; + for (const auto & [name, type] : nested_names_and_types) + writeProtobufField(buf, type, name, nested_field_index, indent + 1); + endNested(buf, indent); + return message_name; +} + +String prepareMapAndGetProtobufTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent) +{ + const auto & map_type = assert_cast(*data_type); + const auto & key_type = map_type.getKeyType(); + const auto & value_type = map_type.getValueType(); + auto it = protobuf_simple_type_names.find(key_type->getTypeId()); + if (it == protobuf_simple_type_names.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type {} is not supported for conversion into Map key in Protobuf schema", data_type->getName()); + auto key_type_name = it->second; + /// Protobuf map type doesn't support "bytes" type as a key. Change it to "string" + if (key_type_name == "bytes") + key_type_name = "string"; + + /// Special cases when value type is Array or Map, because Protobuf + /// doesn't support syntax "map" and "map>" + /// In this case we should place it into a nested Message with one field. + String value_type_name; + if (isArray(value_type) || isMap(value_type)) + { + value_type_name = getSchemaMessageName(column_name) + "Value"; + startMessage(buf, value_type_name, indent); + size_t nested_field_index = 1; + writeProtobufField(buf, value_type, column_name + "Value", nested_field_index, indent + 1); + endNested(buf, indent); + } + else + { + value_type_name = prepareAndGetProtobufTypeName(buf, value_type, column_name + "Value", indent); + } + + return fmt::format("map<{}, {}>", key_type_name, value_type_name); +} + +template +String prepareEnumAndGetProtobufTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent) +{ + const auto & enum_type = assert_cast &>(*data_type); + String enum_name = getSchemaMessageName(column_name); + startEnum(buf, enum_name, indent); + const auto & names = enum_type.getAllRegisteredNames(); + for (size_t i = 0; i != names.size(); ++i) + { + writeIndent(buf, indent + 1); + writeString(fmt::format("{} = {};\n", names[i], std::to_string(i)), buf); + } + endNested(buf, indent); + return enum_name; +} + +String prepareAndGetProtobufTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent) +{ + TypeIndex type_id = data_type->getTypeId(); + + switch (data_type->getTypeId()) + { + case TypeIndex::Nullable: + return prepareAndGetProtobufTypeName(buf, assert_cast(*data_type).getNestedType(), column_name, indent); + case TypeIndex::LowCardinality: + return prepareAndGetProtobufTypeName(buf, assert_cast(*data_type).getDictionaryType(), column_name, indent); + case TypeIndex::Array: + return prepareArrayAndGetProtobufTypeName(buf, data_type, column_name, indent); + case TypeIndex::Tuple: + return prepareTupleAndGetProtobufTypeName(buf, data_type, column_name, indent); + case TypeIndex::Map: + return prepareMapAndGetProtobufTypeName(buf, data_type, column_name, indent); + case TypeIndex::Enum8: + return prepareEnumAndGetProtobufTypeName(buf, data_type, column_name, indent); + case TypeIndex::Enum16: + return prepareEnumAndGetProtobufTypeName(buf, data_type, column_name, indent); + default: + { + if (isBool(data_type)) + return "bool"; + + auto it = protobuf_simple_type_names.find(type_id); + if (it == protobuf_simple_type_names.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type {} is not supported for conversion into Protobuf schema", data_type->getName()); + return it->second; + } + } +} + +} + +void StructureToProtobufSchema::writeSchema(WriteBuffer & buf, const String & message_name, const NamesAndTypesList & names_and_types_) +{ + auto names_and_types = collectNested(names_and_types_); + writeProtobufHeader(buf); + startMessage(buf, getSchemaMessageName(message_name), 0); + size_t field_index = 1; + for (const auto & [column_name, data_type] : names_and_types) + writeProtobufField(buf, data_type, column_name, field_index, 1); + endNested(buf, 0); +} + +} diff --git a/src/Formats/StructureToProtobufSchema.h b/src/Formats/StructureToProtobufSchema.h new file mode 100644 index 00000000000..f4dfb0ae0c2 --- /dev/null +++ b/src/Formats/StructureToProtobufSchema.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +struct StructureToProtobufSchema +{ + static constexpr auto name = "structureToProtobufSchema"; + + static void writeSchema(WriteBuffer & buf, const String & message_name, const NamesAndTypesList & names_and_types_); +}; + +} diff --git a/src/Formats/fuzzers/CMakeLists.txt b/src/Formats/fuzzers/CMakeLists.txt index 984823f3360..38009aeec1d 100644 --- a/src/Formats/fuzzers/CMakeLists.txt +++ b/src/Formats/fuzzers/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable(format_fuzzer format_fuzzer.cpp ${SRCS}) -target_link_libraries(format_fuzzer PRIVATE dbms clickhouse_aggregate_functions ${LIB_FUZZING_ENGINE}) +target_link_libraries(format_fuzzer PRIVATE dbms clickhouse_aggregate_functions) diff --git a/src/Formats/fuzzers/format_fuzzer.cpp b/src/Formats/fuzzers/format_fuzzer.cpp index e84d0913d0d..583d1173a01 100644 --- a/src/Formats/fuzzers/format_fuzzer.cpp +++ b/src/Formats/fuzzers/format_fuzzer.cpp @@ -22,112 +22,113 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) -try { - using namespace DB; - - static SharedContextHolder shared_context; - static ContextMutablePtr context; - - auto initialize = [&]() mutable + try { - shared_context = Context::createShared(); - context = Context::createGlobal(shared_context.get()); - context->makeGlobalContext(); - context->setApplicationType(Context::ApplicationType::LOCAL); + using namespace DB; - MainThreadStatus::getInstance(); + static SharedContextHolder shared_context; + static ContextMutablePtr context; - registerAggregateFunctions(); - registerFormats(); + auto initialize = [&]() mutable + { + shared_context = Context::createShared(); + context = Context::createGlobal(shared_context.get()); + context->makeGlobalContext(); + context->setApplicationType(Context::ApplicationType::LOCAL); - return true; - }; + MainThreadStatus::getInstance(); - static bool initialized = initialize(); - (void) initialized; + registerAggregateFunctions(); + registerFormats(); - total_memory_tracker.resetCounters(); - total_memory_tracker.setHardLimit(1_GiB); - CurrentThread::get().memory_tracker.resetCounters(); - CurrentThread::get().memory_tracker.setHardLimit(1_GiB); + return true; + }; - /// The input format is as follows: - /// - format name on the first line, - /// - table structure on the second line, - /// - the data for the rest of the input. + static bool initialized = initialize(); + (void) initialized; - /** The corpus was generated as follows: + total_memory_tracker.resetCounters(); + total_memory_tracker.setHardLimit(1_GiB); + CurrentThread::get().memory_tracker.resetCounters(); + CurrentThread::get().memory_tracker.setHardLimit(1_GiB); - i=0; find ../../../../tests/queries -name '*.sql' | - xargs -I{} bash -c "tr '\n' ' ' <{}; echo" | - rg -o -i 'CREATE TABLE\s+\w+\s+\(.+?\) ENGINE' | - sed -r -e 's/CREATE TABLE\s+\w+\s+\((.+?)\) ENGINE/\1/i' | sort | uniq | - while read line; do - i=$((i+1)); - clickhouse-local --query "SELECT name FROM system.formats ORDER BY rand() LIMIT 1" >> $i; - echo "$line" >> $i; - echo $RANDOM >> $i; - echo $i; + /// The input format is as follows: + /// - format name on the first line, + /// - table structure on the second line, + /// - the data for the rest of the input. + + /** The corpus was generated as follows: + + i=0; find ../../../../tests/queries -name '*.sql' | + xargs -I{} bash -c "tr '\n' ' ' <{}; echo" | + rg -o -i 'CREATE TABLE\s+\w+\s+\(.+?\) ENGINE' | + sed -r -e 's/CREATE TABLE\s+\w+\s+\((.+?)\) ENGINE/\1/i' | sort | uniq | + while read line; do + i=$((i+1)); + clickhouse-local --query "SELECT name FROM system.formats ORDER BY rand() LIMIT 1" >> $i; + echo "$line" >> $i; + echo $RANDOM >> $i; + echo $i; + done + */ + + /** And: + + for format in $(clickhouse-client --query "SELECT name FROM system.formats WHERE is_output"); do + echo $format; + echo $format >> $format; + echo "WatchID Int64, JavaEnable Int16, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID Int32, ClientIP Int32, RegionID Int32, UserID Int64, CounterClass Int16, OS Int16, UserAgent Int16, URL String, Referer String, IsRefresh Int16, RefererCategoryID Int16, RefererRegionID Int32, URLCategoryID Int16, URLRegionID Int32, ResolutionWidth Int16, ResolutionHeight Int16, ResolutionDepth Int16, FlashMajor Int16, FlashMinor Int16, FlashMinor2 String, NetMajor Int16, NetMinor Int16, UserAgentMajor Int16, UserAgentMinor String, CookieEnable Int16, JavascriptEnable Int16, IsMobile Int16, MobilePhone Int16, MobilePhoneModel String, Params String, IPNetworkID Int32, TraficSourceID Int16, SearchEngineID Int16, SearchPhrase String, AdvEngineID Int16, IsArtifical Int16, WindowClientWidth Int16, WindowClientHeight Int16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 Int16, SilverlightVersion2 Int16, SilverlightVersion3 Int32, SilverlightVersion4 Int16, PageCharset String, CodeVersion Int32, IsLink Int16, IsDownload Int16, IsNotBounce Int16, FUniqID Int64, OriginalURL String, HID Int32, IsOldCounter Int16, IsEvent Int16, IsParameter Int16, DontCountHits Int16, WithHash Int16, HitColor String, LocalEventTime DateTime, Age Int16, Sex Int16, Income Int16, Interests Int16, Robotness Int16, RemoteIP Int32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage String, BrowserCountry String, SocialNetwork String, SocialAction String, HTTPError Int16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, SocialSourceNetworkID Int16, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency String, ParamCurrencyID Int16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID Int16, RefererHash Int64, URLHash Int64, CLID Int32" >> $format; + clickhouse-client --query "SELECT * FROM hits LIMIT 10 FORMAT $format" >> $format || rm $format; done - */ - /** And: + */ - for format in $(clickhouse-client --query "SELECT name FROM system.formats WHERE is_output"); do - echo $format; - echo $format >> $format; - echo "WatchID Int64, JavaEnable Int16, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID Int32, ClientIP Int32, RegionID Int32, UserID Int64, CounterClass Int16, OS Int16, UserAgent Int16, URL String, Referer String, IsRefresh Int16, RefererCategoryID Int16, RefererRegionID Int32, URLCategoryID Int16, URLRegionID Int32, ResolutionWidth Int16, ResolutionHeight Int16, ResolutionDepth Int16, FlashMajor Int16, FlashMinor Int16, FlashMinor2 String, NetMajor Int16, NetMinor Int16, UserAgentMajor Int16, UserAgentMinor String, CookieEnable Int16, JavascriptEnable Int16, IsMobile Int16, MobilePhone Int16, MobilePhoneModel String, Params String, IPNetworkID Int32, TraficSourceID Int16, SearchEngineID Int16, SearchPhrase String, AdvEngineID Int16, IsArtifical Int16, WindowClientWidth Int16, WindowClientHeight Int16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 Int16, SilverlightVersion2 Int16, SilverlightVersion3 Int32, SilverlightVersion4 Int16, PageCharset String, CodeVersion Int32, IsLink Int16, IsDownload Int16, IsNotBounce Int16, FUniqID Int64, OriginalURL String, HID Int32, IsOldCounter Int16, IsEvent Int16, IsParameter Int16, DontCountHits Int16, WithHash Int16, HitColor String, LocalEventTime DateTime, Age Int16, Sex Int16, Income Int16, Interests Int16, Robotness Int16, RemoteIP Int32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage String, BrowserCountry String, SocialNetwork String, SocialAction String, HTTPError Int16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, SocialSourceNetworkID Int16, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency String, ParamCurrencyID Int16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID Int16, RefererHash Int64, URLHash Int64, CLID Int32" >> $format; - clickhouse-client --query "SELECT * FROM hits LIMIT 10 FORMAT $format" >> $format || rm $format; - done + /// Compile the code as follows: + /// mkdir build_asan_fuzz + /// cd build_asan_fuzz + /// CC=clang CXX=clang++ cmake -D SANITIZE=address -D ENABLE_FUZZING=1 -D WITH_COVERAGE=1 .. + /// + /// The corpus is located here: + /// https://github.com/ClickHouse/fuzz-corpus/tree/main/format_fuzzer + /// + /// The fuzzer can be run as follows: + /// ../../../build_asan_fuzz/src/Formats/fuzzers/format_fuzzer corpus -jobs=64 -rss_limit_mb=8192 - */ + DB::ReadBufferFromMemory in(data, size); - /// Compile the code as follows: - /// mkdir build_asan_fuzz - /// cd build_asan_fuzz - /// CC=clang CXX=clang++ cmake -D SANITIZE=address -D ENABLE_FUZZING=1 -D WITH_COVERAGE=1 .. - /// - /// The corpus is located here: - /// https://github.com/ClickHouse/fuzz-corpus/tree/main/format_fuzzer - /// - /// The fuzzer can be run as follows: - /// ../../../build_asan_fuzz/src/Formats/fuzzers/format_fuzzer corpus -jobs=64 -rss_limit_mb=8192 + String format; + readStringUntilNewlineInto(format, in); + assertChar('\n', in); - DB::ReadBufferFromMemory in(data, size); + String structure; + readStringUntilNewlineInto(structure, in); + assertChar('\n', in); - String format; - readStringUntilNewlineInto(format, in); - assertChar('\n', in); + ColumnsDescription description = parseColumnsListFromString(structure, context); + auto columns_info = description.getOrdinary(); - String structure; - readStringUntilNewlineInto(structure, in); - assertChar('\n', in); + Block header; + for (const auto & info : columns_info) + { + ColumnWithTypeAndName column; + column.name = info.name; + column.type = info.type; + column.column = column.type->createColumn(); + header.insert(std::move(column)); + } - ColumnsDescription description = parseColumnsListFromString(structure, context); - auto columns_info = description.getOrdinary(); + InputFormatPtr input_format = context->getInputFormat(format, in, header, 13 /* small block size */); - Block header; - for (const auto & info : columns_info) - { - ColumnWithTypeAndName column; - column.name = info.name; - column.type = info.type; - column.column = column.type->createColumn(); - header.insert(std::move(column)); + QueryPipeline pipeline(Pipe(std::move(input_format))); + PullingPipelineExecutor executor(pipeline); + Block res; + while (executor.pull(res)) + ; + } + catch (...) + { } - - InputFormatPtr input_format = context->getInputFormat(format, in, header, 13 /* small block size */); - - QueryPipeline pipeline(Pipe(std::move(input_format))); - PullingPipelineExecutor executor(pipeline); - Block res; - while (executor.pull(res)) - ; return 0; } -catch (...) -{ - return 1; -} diff --git a/src/Formats/insertNullAsDefaultIfNeeded.cpp b/src/Formats/insertNullAsDefaultIfNeeded.cpp index 767892718c5..c42b8c54d73 100644 --- a/src/Formats/insertNullAsDefaultIfNeeded.cpp +++ b/src/Formats/insertNullAsDefaultIfNeeded.cpp @@ -1,16 +1,96 @@ #include #include +#include +#include +#include #include #include +#include +#include +#include #include +#include namespace DB { -void insertNullAsDefaultIfNeeded(ColumnWithTypeAndName & input_column, const ColumnWithTypeAndName & header_column, size_t column_i, BlockMissingValues * block_missing_values) +bool insertNullAsDefaultIfNeeded(ColumnWithTypeAndName & input_column, const ColumnWithTypeAndName & header_column, size_t column_i, BlockMissingValues * block_missing_values) { + if (isArray(input_column.type) && isArray(header_column.type)) + { + ColumnWithTypeAndName nested_input_column; + const auto * array_input_column = checkAndGetColumn(input_column.column.get()); + nested_input_column.column = array_input_column->getDataPtr(); + nested_input_column.type = checkAndGetDataType(input_column.type.get())->getNestedType(); + + ColumnWithTypeAndName nested_header_column; + nested_header_column.column = checkAndGetColumn(header_column.column.get())->getDataPtr(); + nested_header_column.type = checkAndGetDataType(header_column.type.get())->getNestedType(); + + if (!insertNullAsDefaultIfNeeded(nested_input_column, nested_header_column, 0, nullptr)) + return false; + + input_column.column = ColumnArray::create(nested_input_column.column, array_input_column->getOffsetsPtr()); + input_column.type = std::make_shared(std::move(nested_input_column.type)); + return true; + } + + if (isTuple(input_column.type) && isTuple(header_column.type)) + { + const auto * tuple_input_column = checkAndGetColumn(input_column.column.get()); + const auto * tuple_input_type = checkAndGetDataType(input_column.type.get()); + const auto * tuple_header_column = checkAndGetColumn(header_column.column.get()); + const auto * tuple_header_type = checkAndGetDataType(header_column.type.get()); + + if (tuple_input_type->getElements().size() != tuple_header_type->getElements().size()) + return false; + + Columns nested_input_columns; + nested_input_columns.reserve(tuple_input_type->getElements().size()); + DataTypes nested_input_types; + nested_input_types.reserve(tuple_input_type->getElements().size()); + bool changed = false; + for (size_t i = 0; i != tuple_input_type->getElements().size(); ++i) + { + ColumnWithTypeAndName nested_input_column; + nested_input_column.column = tuple_input_column->getColumnPtr(i); + nested_input_column.type = tuple_input_type->getElement(i); + ColumnWithTypeAndName nested_header_column; + nested_header_column.column = tuple_header_column->getColumnPtr(i); + nested_header_column.type = tuple_header_type->getElement(i); + changed |= insertNullAsDefaultIfNeeded(nested_input_column, nested_header_column, 0, nullptr); + nested_input_columns.push_back(std::move(nested_input_column.column)); + nested_input_types.push_back(std::move(nested_input_column.type)); + } + + if (!changed) + return false; + + input_column.column = ColumnTuple::create(std::move(nested_input_columns)); + input_column.type = std::make_shared(std::move(nested_input_types)); + return true; + } + + if (isMap(input_column.type) && isMap(header_column.type)) + { + ColumnWithTypeAndName nested_input_column; + nested_input_column.column = checkAndGetColumn(input_column.column.get())->getNestedColumnPtr(); + nested_input_column.type = checkAndGetDataType(input_column.type.get())->getNestedType(); + + ColumnWithTypeAndName nested_header_column; + nested_header_column.column = checkAndGetColumn(header_column.column.get())->getNestedColumnPtr(); + nested_header_column.type = checkAndGetDataType(header_column.type.get())->getNestedType(); + + if (!insertNullAsDefaultIfNeeded(nested_input_column, nested_header_column, 0, nullptr)) + return false; + + input_column.column = ColumnMap::create(std::move(nested_input_column.column)); + input_column.type = std::make_shared(std::move(nested_input_column.type)); + return true; + } + if (!isNullableOrLowCardinalityNullable(input_column.type) || isNullableOrLowCardinalityNullable(header_column.type)) - return; + return false; if (block_missing_values) { @@ -32,6 +112,8 @@ void insertNullAsDefaultIfNeeded(ColumnWithTypeAndName & input_column, const Col const auto * lc_type = assert_cast(input_column.type.get()); input_column.type = std::make_shared(removeNullable(lc_type->getDictionaryType())); } + + return true; } } diff --git a/src/Formats/insertNullAsDefaultIfNeeded.h b/src/Formats/insertNullAsDefaultIfNeeded.h index 3e4dcd1e74a..874f803a14c 100644 --- a/src/Formats/insertNullAsDefaultIfNeeded.h +++ b/src/Formats/insertNullAsDefaultIfNeeded.h @@ -5,6 +5,6 @@ namespace DB { -void insertNullAsDefaultIfNeeded(ColumnWithTypeAndName & input_column, const ColumnWithTypeAndName & header_column, size_t column_i, BlockMissingValues * block_missing_values); +bool insertNullAsDefaultIfNeeded(ColumnWithTypeAndName & input_column, const ColumnWithTypeAndName & header_column, size_t column_i, BlockMissingValues * block_missing_values); } diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 29ef46f330f..580db61edde 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -101,6 +101,7 @@ void registerInputFormatJSONAsObject(FormatFactory & factory); void registerInputFormatLineAsString(FormatFactory & factory); void registerInputFormatMySQLDump(FormatFactory & factory); void registerInputFormatParquetMetadata(FormatFactory & factory); +void registerInputFormatOne(FormatFactory & factory); #if USE_HIVE void registerInputFormatHiveText(FormatFactory & factory); @@ -142,6 +143,7 @@ void registerTemplateSchemaReader(FormatFactory & factory); void registerMySQLSchemaReader(FormatFactory & factory); void registerBSONEachRowSchemaReader(FormatFactory & factory); void registerParquetMetadataSchemaReader(FormatFactory & factory); +void registerOneSchemaReader(FormatFactory & factory); void registerFileExtensions(FormatFactory & factory); @@ -243,6 +245,7 @@ void registerFormats() registerInputFormatMySQLDump(factory); registerInputFormatParquetMetadata(factory); + registerInputFormatOne(factory); registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(factory); registerNonTrivialPrefixAndSuffixCheckerJSONAsString(factory); @@ -279,6 +282,7 @@ void registerFormats() registerMySQLSchemaReader(factory); registerBSONEachRowSchemaReader(factory); registerParquetMetadataSchemaReader(factory); + registerOneSchemaReader(factory); } } diff --git a/src/Formats/registerWithNamesAndTypes.cpp b/src/Formats/registerWithNamesAndTypes.cpp index 2dee107844d..674865a3bed 100644 --- a/src/Formats/registerWithNamesAndTypes.cpp +++ b/src/Formats/registerWithNamesAndTypes.cpp @@ -12,8 +12,9 @@ void registerWithNamesAndTypes(const std::string & base_format_name, RegisterWit void markFormatWithNamesAndTypesSupportsSamplingColumns(const std::string & base_format_name, FormatFactory & factory) { - factory.markFormatSupportsSubsetOfColumns(base_format_name + "WithNames"); - factory.markFormatSupportsSubsetOfColumns(base_format_name + "WithNamesAndTypes"); + auto setting_checker = [](const FormatSettings & settings){ return settings.with_names_use_header; }; + factory.registerSubsetOfColumnsSupportChecker(base_format_name + "WithNames", setting_checker); + factory.registerSubsetOfColumnsSupportChecker(base_format_name + "WithNamesAndTypes", setting_checker); } } diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 06436488050..48008827f48 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -3,10 +3,22 @@ add_subdirectory(divide) include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_functions .) -list(REMOVE_ITEM clickhouse_functions_sources IFunction.cpp FunctionFactory.cpp FunctionHelpers.cpp extractTimeZoneFromFunctionArguments.cpp FunctionsLogical.cpp) -list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h FunctionHelpers.h extractTimeZoneFromFunctionArguments.h FunctionsLogical.h) +extract_into_parent_list(clickhouse_functions_sources dbms_sources + IFunction.cpp + FunctionFactory.cpp + FunctionHelpers.cpp + extractTimeZoneFromFunctionArguments.cpp + FunctionsLogical.cpp +) +extract_into_parent_list(clickhouse_functions_headers dbms_headers + IFunction.h + FunctionFactory.h + FunctionHelpers.h + extractTimeZoneFromFunctionArguments.h + FunctionsLogical.h +) -add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_sources}) +add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_headers} ${clickhouse_functions_sources}) list (APPEND OBJECT_LIBS $) @@ -112,6 +124,27 @@ if (ENABLE_FUZZING) add_compile_definitions(FUZZING_MODE=1) endif () +if (USE_GPERF) + # Only for regenerating + add_custom_target(generate-html-char-ref-gperf ./HTMLCharacterReference.sh + SOURCES ./HTMLCharacterReference.sh + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + BYPRODUCTS "${CMAKE_CURRENT_SOURCE_DIR}/HTMLCharacterReference.gperf" + ) + add_custom_target(generate-html-char-ref ${GPERF} -t HTMLCharacterReference.gperf --output-file=HTMLCharacterReference.generated.cpp + && clang-format -i HTMLCharacterReference.generated.cpp + # for clang-tidy, since string.h is deprecated + && sed -i 's/\#include /\#include /g' HTMLCharacterReference.generated.cpp + SOURCES HTMLCharacterReference.gperf + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) + add_dependencies(generate-html-char-ref generate-html-char-ref-gperf) + if (NOT TARGET generate-source) + add_custom_target(generate-source) + endif () + add_dependencies(generate-source generate-html-char-ref) +endif () + target_link_libraries(clickhouse_functions_obj PUBLIC ${PUBLIC_LIBS} PRIVATE ${PRIVATE_LIBS}) # Used to forward the linking information to the final binaries such as clickhouse / unit_tests_dbms, diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index a1c880f6956..a351d7fdf30 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -49,37 +49,37 @@ namespace ErrorCodes /// This factor transformation will say that the function is monotone everywhere. struct ZeroTransform { - static inline UInt16 execute(Int64, const DateLUTImpl &) { return 0; } - static inline UInt16 execute(UInt32, const DateLUTImpl &) { return 0; } - static inline UInt16 execute(Int32, const DateLUTImpl &) { return 0; } - static inline UInt16 execute(UInt16, const DateLUTImpl &) { return 0; } + static UInt16 execute(Int64, const DateLUTImpl &) { return 0; } + static UInt16 execute(UInt32, const DateLUTImpl &) { return 0; } + static UInt16 execute(Int32, const DateLUTImpl &) { return 0; } + static UInt16 execute(UInt16, const DateLUTImpl &) { return 0; } }; struct ToDateImpl { static constexpr auto name = "toDate"; - static inline UInt16 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) + static UInt16 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { return static_cast(time_zone.toDayNum(t.whole)); } - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return UInt16(time_zone.toDayNum(t)); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return UInt16(time_zone.toDayNum(t)); } - static inline UInt16 execute(Int32, const DateLUTImpl &) + static UInt16 execute(Int32, const DateLUTImpl &) { throwDateIsNotSupported(name); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl &) + static UInt16 execute(UInt16 d, const DateLUTImpl &) { return d; } - static inline DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) + static DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { return {time_zone.toDayNum(t.whole), 0}; } @@ -91,20 +91,20 @@ struct ToDate32Impl { static constexpr auto name = "toDate32"; - static inline Int32 execute(Int64 t, const DateLUTImpl & time_zone) + static Int32 execute(Int64 t, const DateLUTImpl & time_zone) { return Int32(time_zone.toDayNum(t)); } - static inline Int32 execute(UInt32 t, const DateLUTImpl & time_zone) + static Int32 execute(UInt32 t, const DateLUTImpl & time_zone) { /// Don't saturate. return Int32(time_zone.toDayNum(t)); } - static inline Int32 execute(Int32 d, const DateLUTImpl &) + static Int32 execute(Int32 d, const DateLUTImpl &) { return d; } - static inline Int32 execute(UInt16 d, const DateLUTImpl &) + static Int32 execute(UInt16 d, const DateLUTImpl &) { return d; } @@ -116,27 +116,27 @@ struct ToStartOfDayImpl { static constexpr auto name = "toStartOfDay"; - static inline UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) + static UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { return static_cast(time_zone.toDate(static_cast(t.whole))); } - static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { return static_cast(time_zone.toDate(t)); } - static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt32 execute(Int32 d, const DateLUTImpl & time_zone) { return static_cast(time_zone.toDate(ExtendedDayNum(d))); } - static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { return static_cast(time_zone.toDate(DayNum(d))); } - static inline DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) + static DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { return {time_zone.toDate(t.whole), 0}; } - static inline Int64 executeExtendedResult(Int32 d, const DateLUTImpl & time_zone) + static Int64 executeExtendedResult(Int32 d, const DateLUTImpl & time_zone) { return time_zone.fromDayNum(ExtendedDayNum(d)) * DecimalUtils::scaleMultiplier(DataTypeDateTime64::default_scale); } @@ -148,29 +148,29 @@ struct ToMondayImpl { static constexpr auto name = "toMonday"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { //return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t)); return time_zone.toFirstDayNumOfWeek(t); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { //return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t)); return time_zone.toFirstDayNumOfWeek(t); } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d)); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfWeek(DayNum(d)); } - static inline Int64 executeExtendedResult(Int64 t, const DateLUTImpl & time_zone) + static Int64 executeExtendedResult(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t)); } - static inline Int32 executeExtendedResult(Int32 d, const DateLUTImpl & time_zone) + static Int32 executeExtendedResult(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d)); } @@ -181,27 +181,27 @@ struct ToStartOfMonthImpl { static constexpr auto name = "toStartOfMonth"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfMonth(time_zone.toDayNum(t)); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfMonth(time_zone.toDayNum(t)); } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfMonth(ExtendedDayNum(d)); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfMonth(DayNum(d)); } - static inline Int64 executeExtendedResult(Int64 t, const DateLUTImpl & time_zone) + static Int64 executeExtendedResult(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfMonth(time_zone.toDayNum(t)); } - static inline Int32 executeExtendedResult(Int32 d, const DateLUTImpl & time_zone) + static Int32 executeExtendedResult(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfMonth(ExtendedDayNum(d)); } @@ -213,27 +213,27 @@ struct ToLastDayOfMonthImpl { static constexpr auto name = "toLastDayOfMonth"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toLastDayNumOfMonth(time_zone.toDayNum(t)); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toLastDayNumOfMonth(time_zone.toDayNum(t)); } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toLastDayNumOfMonth(ExtendedDayNum(d)); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toLastDayNumOfMonth(DayNum(d)); } - static inline Int64 executeExtendedResult(Int64 t, const DateLUTImpl & time_zone) + static Int64 executeExtendedResult(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toLastDayNumOfMonth(time_zone.toDayNum(t)); } - static inline Int32 executeExtendedResult(Int32 d, const DateLUTImpl & time_zone) + static Int32 executeExtendedResult(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toLastDayNumOfMonth(ExtendedDayNum(d)); } @@ -244,27 +244,27 @@ struct ToStartOfQuarterImpl { static constexpr auto name = "toStartOfQuarter"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfQuarter(time_zone.toDayNum(t)); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfQuarter(time_zone.toDayNum(t)); } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfQuarter(ExtendedDayNum(d)); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfQuarter(DayNum(d)); } - static inline Int64 executeExtendedResult(Int64 t, const DateLUTImpl & time_zone) + static Int64 executeExtendedResult(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfQuarter(time_zone.toDayNum(t)); } - static inline Int32 executeExtendedResult(Int32 d, const DateLUTImpl & time_zone) + static Int32 executeExtendedResult(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfQuarter(ExtendedDayNum(d)); } @@ -275,27 +275,27 @@ struct ToStartOfYearImpl { static constexpr auto name = "toStartOfYear"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfYear(time_zone.toDayNum(t)); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfYear(time_zone.toDayNum(t)); } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfYear(ExtendedDayNum(d)); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfYear(DayNum(d)); } - static inline Int64 executeExtendedResult(Int64 t, const DateLUTImpl & time_zone) + static Int64 executeExtendedResult(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfYear(time_zone.toDayNum(t)); } - static inline Int32 executeExtendedResult(Int32 d, const DateLUTImpl & time_zone) + static Int32 executeExtendedResult(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfYear(ExtendedDayNum(d)); } @@ -313,19 +313,19 @@ struct ToTimeImpl { return static_cast(time_zone.toTime(t.whole) + 86400); } - static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { return static_cast(time_zone.toTime(t) + 86400); } - static inline UInt32 execute(Int32, const DateLUTImpl &) + static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline UInt32 execute(UInt16, const DateLUTImpl &) + static UInt32 execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ToDateImpl; }; @@ -334,27 +334,27 @@ struct ToStartOfMinuteImpl { static constexpr auto name = "toStartOfMinute"; - static inline UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) + static UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { return static_cast(time_zone.toStartOfMinute(t.whole)); } - static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toStartOfMinute(t); } - static inline UInt32 execute(Int32, const DateLUTImpl &) + static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline UInt32 execute(UInt16, const DateLUTImpl &) + static UInt32 execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) + static DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { return {time_zone.toStartOfMinute(t.whole), 0}; } - static inline Int64 executeExtendedResult(Int32, const DateLUTImpl &) + static Int64 executeExtendedResult(Int32, const DateLUTImpl &) { throwDate32IsNotSupported(name); } @@ -369,7 +369,7 @@ struct ToStartOfSecondImpl { static constexpr auto name = "toStartOfSecond"; - static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) + static DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) { auto fractional_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier(datetime64, scale_multiplier); @@ -385,19 +385,19 @@ struct ToStartOfSecondImpl return datetime64 - fractional_with_sign; } - static inline UInt32 execute(UInt32, const DateLUTImpl &) + static UInt32 execute(UInt32, const DateLUTImpl &) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type DateTime of argument for function {}", name); } - static inline UInt32 execute(Int32, const DateLUTImpl &) + static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline UInt32 execute(UInt16, const DateLUTImpl &) + static UInt32 execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -406,7 +406,7 @@ struct ToStartOfMillisecondImpl { static constexpr auto name = "toStartOfMillisecond"; - static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) + static DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) { // given that scale is 6, scale_multiplier is 1000000 // for DateTime64 value of 123.456789: @@ -433,19 +433,19 @@ struct ToStartOfMillisecondImpl } } - static inline UInt32 execute(UInt32, const DateLUTImpl &) + static UInt32 execute(UInt32, const DateLUTImpl &) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type DateTime of argument for function {}", name); } - static inline UInt32 execute(Int32, const DateLUTImpl &) + static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline UInt32 execute(UInt16, const DateLUTImpl &) + static UInt32 execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -454,7 +454,7 @@ struct ToStartOfMicrosecondImpl { static constexpr auto name = "toStartOfMicrosecond"; - static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) + static DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) { // @see ToStartOfMillisecondImpl @@ -477,19 +477,19 @@ struct ToStartOfMicrosecondImpl } } - static inline UInt32 execute(UInt32, const DateLUTImpl &) + static UInt32 execute(UInt32, const DateLUTImpl &) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type DateTime of argument for function {}", name); } - static inline UInt32 execute(Int32, const DateLUTImpl &) + static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline UInt32 execute(UInt16, const DateLUTImpl &) + static UInt32 execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -498,7 +498,7 @@ struct ToStartOfNanosecondImpl { static constexpr auto name = "toStartOfNanosecond"; - static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) + static DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) { // @see ToStartOfMillisecondImpl if (scale_multiplier == 1000000000) @@ -515,19 +515,19 @@ struct ToStartOfNanosecondImpl } } - static inline UInt32 execute(UInt32, const DateLUTImpl &) + static UInt32 execute(UInt32, const DateLUTImpl &) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type DateTime of argument for function {}", name); } - static inline UInt32 execute(Int32, const DateLUTImpl &) + static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline UInt32 execute(UInt16, const DateLUTImpl &) + static UInt32 execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -536,27 +536,27 @@ struct ToStartOfFiveMinutesImpl { static constexpr auto name = "toStartOfFiveMinutes"; - static inline UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) + static UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { return static_cast(time_zone.toStartOfFiveMinutes(t.whole)); } - static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toStartOfFiveMinutes(t); } - static inline UInt32 execute(Int32, const DateLUTImpl &) + static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline UInt32 execute(UInt16, const DateLUTImpl &) + static UInt32 execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) + static DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { return {time_zone.toStartOfFiveMinutes(t.whole), 0}; } - static inline Int64 executeExtendedResult(Int32, const DateLUTImpl &) + static Int64 executeExtendedResult(Int32, const DateLUTImpl &) { throwDate32IsNotSupported(name); } @@ -568,27 +568,27 @@ struct ToStartOfTenMinutesImpl { static constexpr auto name = "toStartOfTenMinutes"; - static inline UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) + static UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { return static_cast(time_zone.toStartOfTenMinutes(t.whole)); } - static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toStartOfTenMinutes(t); } - static inline UInt32 execute(Int32, const DateLUTImpl &) + static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline UInt32 execute(UInt16, const DateLUTImpl &) + static UInt32 execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) + static DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { return {time_zone.toStartOfTenMinutes(t.whole), 0}; } - static inline Int64 executeExtendedResult(Int32, const DateLUTImpl &) + static Int64 executeExtendedResult(Int32, const DateLUTImpl &) { throwDate32IsNotSupported(name); } @@ -600,27 +600,27 @@ struct ToStartOfFifteenMinutesImpl { static constexpr auto name = "toStartOfFifteenMinutes"; - static inline UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) + static UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { return static_cast(time_zone.toStartOfFifteenMinutes(t.whole)); } - static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toStartOfFifteenMinutes(t); } - static inline UInt32 execute(Int32, const DateLUTImpl &) + static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline UInt32 execute(UInt16, const DateLUTImpl &) + static UInt32 execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) + static DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { return {time_zone.toStartOfFifteenMinutes(t.whole), 0}; } - static inline Int64 executeExtendedResult(Int32, const DateLUTImpl &) + static Int64 executeExtendedResult(Int32, const DateLUTImpl &) { throwDate32IsNotSupported(name); } @@ -633,34 +633,34 @@ struct TimeSlotImpl { static constexpr auto name = "timeSlot"; - static inline UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl &) + static UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl &) { return static_cast(t.whole / 1800 * 1800); } - static inline UInt32 execute(UInt32 t, const DateLUTImpl &) + static UInt32 execute(UInt32 t, const DateLUTImpl &) { return t / 1800 * 1800; } - static inline UInt32 execute(Int32, const DateLUTImpl &) + static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline UInt32 execute(UInt16, const DateLUTImpl &) + static UInt32 execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl &) + static DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl &) { if (likely(t.whole >= 0)) return {t.whole / 1800 * 1800, 0}; return {(t.whole + 1 - 1800) / 1800 * 1800, 0}; } - static inline Int64 executeExtendedResult(Int32, const DateLUTImpl &) + static Int64 executeExtendedResult(Int32, const DateLUTImpl &) { throwDate32IsNotSupported(name); } @@ -672,32 +672,32 @@ struct ToStartOfHourImpl { static constexpr auto name = "toStartOfHour"; - static inline UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) + static UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { return static_cast(time_zone.toStartOfHour(t.whole)); } - static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toStartOfHour(t); } - static inline UInt32 execute(Int32, const DateLUTImpl &) + static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline UInt32 execute(UInt16, const DateLUTImpl &) + static UInt32 execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) + static DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { return {time_zone.toStartOfHour(t.whole), 0}; } - static inline Int64 executeExtendedResult(Int32, const DateLUTImpl &) + static Int64 executeExtendedResult(Int32, const DateLUTImpl &) { throwDate32IsNotSupported(name); } @@ -709,26 +709,26 @@ struct ToYearImpl { static constexpr auto name = "toYear"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toYear(t); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toYear(t); } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toYear(ExtendedDayNum(d)); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toYear(DayNum(d)); } - static inline constexpr bool hasPreimage() { return true; } + static constexpr bool hasPreimage() { return true; } - static inline RangeOrNull getPreimage(const IDataType & type, const Field & point) + static RangeOrNull getPreimage(const IDataType & type, const Field & point) { if (point.getType() != Field::Types::UInt64) return std::nullopt; @@ -757,19 +757,19 @@ struct ToWeekYearImpl static constexpr Int8 week_mode = 3; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toYearWeek(t, week_mode).first; } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toYearWeek(t, week_mode).first; } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toYearWeek(ExtendedDayNum(d), week_mode).first; } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toYearWeek(DayNum(d), week_mode).first; } @@ -781,19 +781,19 @@ struct ToWeekOfWeekYearImpl { static constexpr auto name = "toWeekOfWeekYear"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toISOWeek(t); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toISOWeek(t); } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toISOWeek(ExtendedDayNum(d)); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toISOWeek(DayNum(d)); } @@ -805,23 +805,23 @@ struct ToQuarterImpl { static constexpr auto name = "toQuarter"; - static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toQuarter(t); } - static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toQuarter(t); } - static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt8 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toQuarter(ExtendedDayNum(d)); } - static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toQuarter(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfYearImpl; }; @@ -830,23 +830,23 @@ struct ToMonthImpl { static constexpr auto name = "toMonth"; - static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toMonth(t); } - static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toMonth(t); } - static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt8 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toMonth(ExtendedDayNum(d)); } - static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toMonth(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfYearImpl; }; @@ -855,24 +855,24 @@ struct ToDayOfMonthImpl { static constexpr auto name = "toDayOfMonth"; - static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toDayOfMonth(t); } - static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toDayOfMonth(t); } - static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt8 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfMonth(ExtendedDayNum(d)); } - static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfMonth(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfMonthImpl; }; @@ -880,19 +880,19 @@ struct ToDayOfWeekImpl { static constexpr auto name = "toDayOfWeek"; - static inline UInt8 execute(Int64 t, UInt8 mode, const DateLUTImpl & time_zone) + static UInt8 execute(Int64 t, UInt8 mode, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(t, mode); } - static inline UInt8 execute(UInt32 t, UInt8 mode, const DateLUTImpl & time_zone) + static UInt8 execute(UInt32 t, UInt8 mode, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(t, mode); } - static inline UInt8 execute(Int32 d, UInt8 mode, const DateLUTImpl & time_zone) + static UInt8 execute(Int32 d, UInt8 mode, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(ExtendedDayNum(d), mode); } - static inline UInt8 execute(UInt16 d, UInt8 mode, const DateLUTImpl & time_zone) + static UInt8 execute(UInt16 d, UInt8 mode, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(DayNum(d), mode); } @@ -904,48 +904,79 @@ struct ToDayOfYearImpl { static constexpr auto name = "toDayOfYear"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toDayOfYear(t); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toDayOfYear(t); } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfYear(ExtendedDayNum(d)); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfYear(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfYearImpl; }; +struct ToDaysSinceYearZeroImpl +{ +private: + /// Constant calculated from MySQL's TO_DAYS() implementation. + /// https://github.com/mysql/mysql-server/blob/ea1efa9822d81044b726aab20c857d5e1b7e046a/mysys/my_time.cc#L1042 + static constexpr auto DAYS_BETWEEN_YEARS_0_AND_1900 = 693'961; /// 01 January, each + +public: + static constexpr auto name = "toDaysSinceYearZero"; + + static UInt32 execute(Int64, const DateLUTImpl &) + { + throwDateTimeIsNotSupported(name); + } + static UInt32 execute(UInt32, const DateLUTImpl &) + { + throwDateTimeIsNotSupported(name); + } + static UInt32 execute(Int32 d, const DateLUTImpl &) + { + return DAYS_BETWEEN_YEARS_0_AND_1900 + d; + } + static UInt32 execute(UInt16 d, const DateLUTImpl &) + { + return DAYS_BETWEEN_YEARS_0_AND_1900 + d; + } + static constexpr bool hasPreimage() { return false; } + + using FactorTransform = ZeroTransform; +}; + struct ToHourImpl { static constexpr auto name = "toHour"; - static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toHour(t); } - static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toHour(t); } - static inline UInt8 execute(Int32, const DateLUTImpl &) + static UInt8 execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline UInt8 execute(UInt16, const DateLUTImpl &) + static UInt8 execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ToDateImpl; }; @@ -954,27 +985,27 @@ struct TimezoneOffsetImpl { static constexpr auto name = "timezoneOffset"; - static inline time_t execute(Int64 t, const DateLUTImpl & time_zone) + static time_t execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.timezoneOffset(t); } - static inline time_t execute(UInt32 t, const DateLUTImpl & time_zone) + static time_t execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.timezoneOffset(t); } - static inline time_t execute(Int32, const DateLUTImpl &) + static time_t execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline time_t execute(UInt16, const DateLUTImpl &) + static time_t execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ToTimeImpl; }; @@ -982,23 +1013,23 @@ struct ToMinuteImpl { static constexpr auto name = "toMinute"; - static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toMinute(t); } - static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toMinute(t); } - static inline UInt8 execute(Int32, const DateLUTImpl &) + static UInt8 execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline UInt8 execute(UInt16, const DateLUTImpl &) + static UInt8 execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfHourImpl; }; @@ -1007,23 +1038,23 @@ struct ToSecondImpl { static constexpr auto name = "toSecond"; - static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toSecond(t); } - static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toSecond(t); } - static inline UInt8 execute(Int32, const DateLUTImpl &) + static UInt8 execute(Int32, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline UInt8 execute(UInt16, const DateLUTImpl &) + static UInt8 execute(UInt16, const DateLUTImpl &) { - throwDateTimeIsNotSupported(name); + throwDateIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfMinuteImpl; }; @@ -1032,23 +1063,23 @@ struct ToISOYearImpl { static constexpr auto name = "toISOYear"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toISOYear(time_zone.toDayNum(t)); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toISOYear(time_zone.toDayNum(t)); } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toISOYear(ExtendedDayNum(d)); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toISOYear(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1057,27 +1088,27 @@ struct ToStartOfISOYearImpl { static constexpr auto name = "toStartOfISOYear"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return t < 0 ? 0 : time_zone.toFirstDayNumOfISOYear(ExtendedDayNum(std::min(Int32(time_zone.toDayNum(t)), Int32(DATE_LUT_MAX_DAY_NUM)))); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfISOYear(time_zone.toDayNum(t)); } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { return d < 0 ? 0 : time_zone.toFirstDayNumOfISOYear(ExtendedDayNum(std::min(d, Int32(DATE_LUT_MAX_DAY_NUM)))); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfISOYear(DayNum(d)); } - static inline Int64 executeExtendedResult(Int64 t, const DateLUTImpl & time_zone) + static Int64 executeExtendedResult(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfISOYear(time_zone.toDayNum(t)); } - static inline Int32 executeExtendedResult(Int32 d, const DateLUTImpl & time_zone) + static Int32 executeExtendedResult(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfISOYear(ExtendedDayNum(d)); } @@ -1089,23 +1120,23 @@ struct ToISOWeekImpl { static constexpr auto name = "toISOWeek"; - static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toISOWeek(time_zone.toDayNum(t)); } - static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toISOWeek(time_zone.toDayNum(t)); } - static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt8 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toISOWeek(ExtendedDayNum(d)); } - static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toISOWeek(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ToISOYearImpl; }; @@ -1126,29 +1157,29 @@ struct ToRelativeYearNumImpl { static constexpr auto name = "toRelativeYearNum"; - static inline auto execute(Int64 t, const DateLUTImpl & time_zone) + static auto execute(Int64 t, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toYear(t)); else return static_cast(time_zone.toYear(t)); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toYear(static_cast(t)); } - static inline auto execute(Int32 d, const DateLUTImpl & time_zone) + static auto execute(Int32 d, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toYear(ExtendedDayNum(d))); else return static_cast(time_zone.toYear(ExtendedDayNum(d))); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toYear(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1158,29 +1189,29 @@ struct ToRelativeQuarterNumImpl { static constexpr auto name = "toRelativeQuarterNum"; - static inline auto execute(Int64 t, const DateLUTImpl & time_zone) + static auto execute(Int64 t, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeQuarterNum(t)); else return static_cast(time_zone.toRelativeQuarterNum(t)); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toRelativeQuarterNum(static_cast(t)); } - static inline auto execute(Int32 d, const DateLUTImpl & time_zone) + static auto execute(Int32 d, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeQuarterNum(ExtendedDayNum(d))); else return static_cast(time_zone.toRelativeQuarterNum(ExtendedDayNum(d))); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toRelativeQuarterNum(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1190,29 +1221,29 @@ struct ToRelativeMonthNumImpl { static constexpr auto name = "toRelativeMonthNum"; - static inline auto execute(Int64 t, const DateLUTImpl & time_zone) + static auto execute(Int64 t, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeMonthNum(t)); else return static_cast(time_zone.toRelativeMonthNum(t)); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toRelativeMonthNum(static_cast(t)); } - static inline auto execute(Int32 d, const DateLUTImpl & time_zone) + static auto execute(Int32 d, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeMonthNum(ExtendedDayNum(d))); else return static_cast(time_zone.toRelativeMonthNum(ExtendedDayNum(d))); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toRelativeMonthNum(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1222,29 +1253,29 @@ struct ToRelativeWeekNumImpl { static constexpr auto name = "toRelativeWeekNum"; - static inline auto execute(Int64 t, const DateLUTImpl & time_zone) + static auto execute(Int64 t, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeWeekNum(t)); else return static_cast(time_zone.toRelativeWeekNum(t)); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toRelativeWeekNum(static_cast(t)); } - static inline auto execute(Int32 d, const DateLUTImpl & time_zone) + static auto execute(Int32 d, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeWeekNum(ExtendedDayNum(d))); else return static_cast(time_zone.toRelativeWeekNum(ExtendedDayNum(d))); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toRelativeWeekNum(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1254,29 +1285,29 @@ struct ToRelativeDayNumImpl { static constexpr auto name = "toRelativeDayNum"; - static inline auto execute(Int64 t, const DateLUTImpl & time_zone) + static auto execute(Int64 t, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toDayNum(t)); else return static_cast(time_zone.toDayNum(t)); } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toDayNum(static_cast(t)); } - static inline auto execute(Int32 d, const DateLUTImpl &) + static auto execute(Int32 d, const DateLUTImpl &) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(static_cast(d)); else return static_cast(static_cast(d)); } - static inline UInt16 execute(UInt16 d, const DateLUTImpl &) + static UInt16 execute(UInt16 d, const DateLUTImpl &) { return static_cast(d); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1286,35 +1317,35 @@ struct ToRelativeHourNumImpl { static constexpr auto name = "toRelativeHourNum"; - static inline auto execute(Int64 t, const DateLUTImpl & time_zone) + static auto execute(Int64 t, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toStableRelativeHourNum(t)); else return static_cast(time_zone.toRelativeHourNum(t)); } - static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toStableRelativeHourNum(static_cast(t))); else return static_cast(time_zone.toRelativeHourNum(static_cast(t))); } - static inline auto execute(Int32 d, const DateLUTImpl & time_zone) + static auto execute(Int32 d, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toStableRelativeHourNum(ExtendedDayNum(d))); else return static_cast(time_zone.toRelativeHourNum(ExtendedDayNum(d))); } - static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toStableRelativeHourNum(DayNum(d))); else return static_cast(time_zone.toRelativeHourNum(DayNum(d))); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1324,29 +1355,29 @@ struct ToRelativeMinuteNumImpl { static constexpr auto name = "toRelativeMinuteNum"; - static inline auto execute(Int64 t, const DateLUTImpl & time_zone) + static auto execute(Int64 t, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeMinuteNum(t)); else return static_cast(time_zone.toRelativeMinuteNum(t)); } - static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { return static_cast(time_zone.toRelativeMinuteNum(static_cast(t))); } - static inline auto execute(Int32 d, const DateLUTImpl & time_zone) + static auto execute(Int32 d, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeMinuteNum(ExtendedDayNum(d))); else return static_cast(time_zone.toRelativeMinuteNum(ExtendedDayNum(d))); } - static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { return static_cast(time_zone.toRelativeMinuteNum(DayNum(d))); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1356,26 +1387,26 @@ struct ToRelativeSecondNumImpl { static constexpr auto name = "toRelativeSecondNum"; - static inline Int64 execute(Int64 t, const DateLUTImpl &) + static Int64 execute(Int64 t, const DateLUTImpl &) { return t; } - static inline UInt32 execute(UInt32 t, const DateLUTImpl &) + static UInt32 execute(UInt32 t, const DateLUTImpl &) { return t; } - static inline auto execute(Int32 d, const DateLUTImpl & time_zone) + static auto execute(Int32 d, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); else return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); } - static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { return static_cast(time_zone.fromDayNum(DayNum(d))); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1385,7 +1416,7 @@ struct ToRelativeSubsecondNumImpl { static constexpr auto name = "toRelativeSubsecondNumImpl"; - static inline Int64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &) + static Int64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &) { static_assert(scale_multiplier == 1000 || scale_multiplier == 1000000); if (scale == scale_multiplier) @@ -1394,15 +1425,15 @@ struct ToRelativeSubsecondNumImpl return t.value / (scale / scale_multiplier); return t.value * (scale_multiplier / scale); } - static inline Int64 execute(UInt32 t, const DateLUTImpl &) + static Int64 execute(UInt32 t, const DateLUTImpl &) { return t * scale_multiplier; } - static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone) + static Int64 execute(Int32 d, const DateLUTImpl & time_zone) { return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))) * scale_multiplier; } - static inline Int64 execute(UInt16 d, const DateLUTImpl & time_zone) + static Int64 execute(UInt16 d, const DateLUTImpl & time_zone) { return static_cast(time_zone.fromDayNum(DayNum(d)) * scale_multiplier); } @@ -1414,25 +1445,25 @@ struct ToYYYYMMImpl { static constexpr auto name = "toYYYYMM"; - static inline UInt32 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt32 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMM(t); } - static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMM(t); } - static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt32 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMM(ExtendedDayNum(d)); } - static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMM(DayNum(d)); } - static inline constexpr bool hasPreimage() { return true; } + static constexpr bool hasPreimage() { return true; } - static inline RangeOrNull getPreimage(const IDataType & type, const Field & point) + static RangeOrNull getPreimage(const IDataType & type, const Field & point) { if (point.getType() != Field::Types::UInt64) return std::nullopt; @@ -1463,23 +1494,23 @@ struct ToYYYYMMDDImpl { static constexpr auto name = "toYYYYMMDD"; - static inline UInt32 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt32 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMMDD(t); } - static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMMDD(t); } - static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt32 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMMDD(ExtendedDayNum(d)); } - static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMMDD(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1488,23 +1519,23 @@ struct ToYYYYMMDDhhmmssImpl { static constexpr auto name = "toYYYYMMDDhhmmss"; - static inline UInt64 execute(Int64 t, const DateLUTImpl & time_zone) + static UInt64 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMMDDhhmmss(t); } - static inline UInt64 execute(UInt32 t, const DateLUTImpl & time_zone) + static UInt64 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMMDDhhmmss(t); } - static inline UInt64 execute(Int32 d, const DateLUTImpl & time_zone) + static UInt64 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMMDDhhmmss(time_zone.toDate(ExtendedDayNum(d))); } - static inline UInt64 execute(UInt16 d, const DateLUTImpl & time_zone) + static UInt64 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMMDDhhmmss(time_zone.toDate(DayNum(d))); } - static inline constexpr bool hasPreimage() { return false; } + static constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1519,7 +1550,7 @@ struct ToDateTimeComponentsImpl { static constexpr auto name = "toDateTimeComponents"; - static inline DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone) + static DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone) { auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier); @@ -1539,15 +1570,15 @@ struct ToDateTimeComponentsImpl UInt16 microsecond = static_cast(fractional % divider); return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(components.whole), millisecond, microsecond}; } - static inline DateTimeComponentsWithFractionalPart execute(UInt32 t, const DateLUTImpl & time_zone) + static DateTimeComponentsWithFractionalPart execute(UInt32 t, const DateLUTImpl & time_zone) { return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(static_cast(t)), 0, 0}; } - static inline DateTimeComponentsWithFractionalPart execute(Int32 d, const DateLUTImpl & time_zone) + static DateTimeComponentsWithFractionalPart execute(Int32 d, const DateLUTImpl & time_zone) { return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(ExtendedDayNum(d)), 0, 0}; } - static inline DateTimeComponentsWithFractionalPart execute(UInt16 d, const DateLUTImpl & time_zone) + static DateTimeComponentsWithFractionalPart execute(UInt16 d, const DateLUTImpl & time_zone) { return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(DayNum(d)), 0, 0}; } diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index ef340a33149..f52dec0eaf7 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -8,53 +8,34 @@ # include # include # include -# include +# include # include +# include # include namespace DB { namespace ErrorCodes { - extern const int BAD_ARGUMENTS; extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int INCORRECT_DATA; } -namespace Detail -{ - inline size_t base64Decode(const std::span src, UInt8 * dst) - { -# if defined(__aarch64__) - return tb64sdec(reinterpret_cast(src.data()), src.size(), reinterpret_cast(dst)); -# else - return _tb64d(reinterpret_cast(src.data()), src.size(), reinterpret_cast(dst)); -# endif - } -} - struct Base64Encode { static constexpr auto name = "base64Encode"; - static size_t getBufferSize(const size_t string_length, const size_t string_count) + static size_t getBufferSize(size_t string_length, size_t string_count) { return ((string_length - string_count) / 3 + string_count) * 4 + string_count; } - static size_t performCoding(const std::span src, UInt8 * dst) + static size_t perform(const std::span src, UInt8 * dst) { - /* - * Some bug in sse arm64 implementation? - * `base64Encode(repeat('a', 46))` returns wrong padding character - */ -# if defined(__aarch64__) - return tb64senc(reinterpret_cast(src.data()), src.size(), reinterpret_cast(dst)); -# else - return _tb64e(reinterpret_cast(src.data()), src.size(), reinterpret_cast(dst)); -# endif + size_t outlen = 0; + base64_encode(reinterpret_cast(src.data()), src.size(), reinterpret_cast(dst), &outlen, 0); + return outlen; } }; @@ -62,15 +43,17 @@ struct Base64Decode { static constexpr auto name = "base64Decode"; - static size_t getBufferSize(const size_t string_length, const size_t string_count) + static size_t getBufferSize(size_t string_length, size_t string_count) { return ((string_length - string_count) / 4 + string_count) * 3 + string_count; } - static size_t performCoding(const std::span src, UInt8 * dst) + static size_t perform(const std::span src, UInt8 * dst) { - const auto outlen = Detail::base64Decode(src, dst); - if (src.size() > 0 && !outlen) + size_t outlen = 0; + int rc = base64_decode(reinterpret_cast(src.data()), src.size(), reinterpret_cast(dst), &outlen, 0); + + if (rc != 1) throw Exception( ErrorCodes::INCORRECT_DATA, "Failed to {} input '{}'", @@ -85,17 +68,16 @@ struct TryBase64Decode { static constexpr auto name = "tryBase64Decode"; - static size_t getBufferSize(const size_t string_length, const size_t string_count) + static size_t getBufferSize(size_t string_length, size_t string_count) { return Base64Decode::getBufferSize(string_length, string_count); } - static size_t performCoding(const std::span src, UInt8 * dst) + static size_t perform(const std::span src, UInt8 * dst) { - if (src.empty()) - return 0; + size_t outlen = 0; + base64_decode(reinterpret_cast(src.data()), src.size(), reinterpret_cast(dst), &outlen, 0); - const auto outlen = Detail::base64Decode(src, dst); // during decoding character array can be partially polluted // if fail, revert back and clean if (!outlen) @@ -119,20 +101,16 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong number of arguments for function {}: 1 expected.", getName()); + FunctionArgumentDescriptors mandatory_arguments{ + {"value", &isStringOrFixedString, nullptr, "String or FixedString"} + }; - if (!WhichDataType(arguments[0].type).isStringOrFixedString()) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of 1st argument of function {}. Must be FixedString or String.", - arguments[0].type->getName(), - getName()); + validateFunctionArgumentTypes(*this, arguments, mandatory_arguments); return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, const size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & input_column = arguments[0].column; if (const auto * src_column_as_fixed_string = checkAndGetColumn(*input_column)) @@ -148,7 +126,7 @@ public: } private: - static ColumnPtr execute(const ColumnString & src_column, const size_t src_row_count) + static ColumnPtr execute(const ColumnString & src_column, size_t src_row_count) { auto dst_column = ColumnString::create(); auto & dst_chars = dst_column->getChars(); @@ -169,7 +147,7 @@ private: for (size_t row = 0; row < src_row_count; ++row) { const size_t src_length = src_offsets[row] - src_offset_prev - 1; - const auto outlen = Func::performCoding({src, src_length}, dst_pos); + const auto outlen = Func::perform({src, src_length}, dst_pos); /// Base64 library is using AVX-512 with some shuffle operations. /// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. @@ -188,7 +166,7 @@ private: return dst_column; } - static ColumnPtr execute(const ColumnFixedString & src_column, const size_t src_row_count) + static ColumnPtr execute(const ColumnFixedString & src_column, size_t src_row_count) { auto dst_column = ColumnString::create(); auto & dst_chars = dst_column->getChars(); @@ -207,7 +185,7 @@ private: for (size_t row = 0; row < src_row_count; ++row) { - const auto outlen = Func::performCoding({src, src_n}, dst_pos); + const auto outlen = Func::perform({src, src_n}, dst_pos); /// Base64 library is using AVX-512 with some shuffle operations. /// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. @@ -225,6 +203,7 @@ private: return dst_column; } }; + } #endif diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index c699da4eaf6..d724c35c383 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -42,6 +42,15 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -62,6 +71,7 @@ namespace ErrorCodes extern const int DECIMAL_OVERFLOW; extern const int CANNOT_ADD_DIFFERENT_AGGREGATE_STATES; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } namespace traits_ @@ -102,6 +112,9 @@ template constexpr bool IsFloatingPoint = false; template <> inline constexpr bool IsFloatingPoint = true; template <> inline constexpr bool IsFloatingPoint = true; +template constexpr bool IsArray = false; +template <> inline constexpr bool IsArray = true; + template constexpr bool IsDateOrDateTime = false; template <> inline constexpr bool IsDateOrDateTime = true; template <> inline constexpr bool IsDateOrDateTime = true; @@ -742,6 +755,9 @@ class FunctionBinaryArithmetic : public IFunction static constexpr bool is_multiply = IsOperation::multiply; static constexpr bool is_division = IsOperation::division; static constexpr bool is_bit_hamming_distance = IsOperation::bit_hamming_distance; + static constexpr bool is_modulo = IsOperation::modulo; + static constexpr bool is_div_int = IsOperation::div_int; + static constexpr bool is_div_int_or_zero = IsOperation::div_int_or_zero; ContextPtr context; bool check_decimal_overflow = true; @@ -951,13 +967,28 @@ class FunctionBinaryArithmetic : public IFunction "argument of numeric type cannot be first", name); std::string function_name; - if (is_multiply) + if constexpr (is_multiply) { function_name = "tupleMultiplyByNumber"; } - else + else // is_division { - function_name = "tupleDivideByNumber"; + if constexpr (is_modulo) + { + function_name = "tupleModuloByNumber"; + } + else if constexpr (is_div_int) + { + function_name = "tupleIntDivByNumber"; + } + else if constexpr (is_div_int_or_zero) + { + function_name = "tupleIntDivOrZeroByNumber"; + } + else + { + function_name = "tupleDivideByNumber"; + } } return FunctionFactory::instance().get(function_name, context); @@ -1125,6 +1156,61 @@ class FunctionBinaryArithmetic : public IFunction return function->execute(arguments, result_type, input_rows_count); } + ColumnPtr executeArrayImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { + const auto * return_type_array = checkAndGetDataType(result_type.get()); + + if (!return_type_array) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Return type for function {} must be array.", getName()); + + auto num_args = arguments.size(); + DataTypes data_types; + + ColumnsWithTypeAndName new_arguments {num_args}; + DataTypePtr result_array_type; + + const auto * left_const = typeid_cast(arguments[0].column.get()); + const auto * right_const = typeid_cast(arguments[1].column.get()); + + /// Unpacking arrays if both are constants. + if (left_const && right_const) + { + new_arguments[0] = {left_const->getDataColumnPtr(), arguments[0].type, arguments[0].name}; + new_arguments[1] = {right_const->getDataColumnPtr(), arguments[1].type, arguments[1].name}; + auto col = executeImpl(new_arguments, result_type, 1); + return ColumnConst::create(std::move(col), input_rows_count); + } + + /// Unpacking arrays if at least one column is constant. + if (left_const || right_const) + { + new_arguments[0] = {arguments[0].column->convertToFullColumnIfConst(), arguments[0].type, arguments[0].name}; + new_arguments[1] = {arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[1].name}; + return executeImpl(new_arguments, result_type, input_rows_count); + } + + const auto * left_array_col = typeid_cast(arguments[0].column.get()); + const auto * right_array_col = typeid_cast(arguments[1].column.get()); + if (!left_array_col->hasEqualOffsets(*right_array_col)) + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Two arguments for function {} must have equal sizes", getName()); + + const auto & left_array_type = typeid_cast(arguments[0].type.get())->getNestedType(); + new_arguments[0] = {left_array_col->getDataPtr(), left_array_type, arguments[0].name}; + + const auto & right_array_type = typeid_cast(arguments[1].type.get())->getNestedType(); + new_arguments[1] = {right_array_col->getDataPtr(), right_array_type, arguments[1].name}; + + result_array_type = typeid_cast(result_type.get())->getNestedType(); + + size_t rows_count = 0; + const auto & left_offsets = left_array_col->getOffsets(); + if (!left_offsets.empty()) + rows_count = left_offsets.back(); + auto res = executeImpl(new_arguments, result_array_type, rows_count); + + return ColumnArray::create(res, typeid_cast(arguments[0].column.get())->getOffsetsPtr()); + } + ColumnPtr executeTupleNumberOperator(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, const FunctionOverloadResolverPtr & function_builder) const { @@ -1326,6 +1412,20 @@ public: return getReturnTypeImplStatic(new_arguments, context); } + + if constexpr (is_plus || is_minus) + { + if (isArray(arguments[0]) && isArray(arguments[1])) + { + DataTypes new_arguments { + static_cast(*arguments[0]).getNestedType(), + static_cast(*arguments[1]).getNestedType(), + }; + return std::make_shared(getReturnTypeImplStatic(new_arguments, context)); + } + } + + /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval. if (auto function_builder = getFunctionForIntervalArithmetic(arguments[0], arguments[1], context)) { @@ -1919,25 +2019,6 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A return executeAggregateAddition(arguments, result_type, input_rows_count); } - /// Special case - one or both arguments are IPv4 - if (isIPv4(arguments[0].type) || isIPv4(arguments[1].type)) - { - ColumnsWithTypeAndName new_arguments { - { - isIPv4(arguments[0].type) ? castColumn(arguments[0], std::make_shared()) : arguments[0].column, - isIPv4(arguments[0].type) ? std::make_shared() : arguments[0].type, - arguments[0].name, - }, - { - isIPv4(arguments[1].type) ? castColumn(arguments[1], std::make_shared()) : arguments[1].column, - isIPv4(arguments[1].type) ? std::make_shared() : arguments[1].type, - arguments[1].name - } - }; - - return executeImpl(new_arguments, result_type, input_rows_count); - } - /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval. if (auto function_builder = getFunctionForIntervalArithmetic(arguments[0].type, arguments[1].type, context)) { @@ -1991,6 +2072,25 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A return wrapInNullable(res, arguments, result_type, input_rows_count); } + /// Special case - one or both arguments are IPv4 + if (isIPv4(arguments[0].type) || isIPv4(arguments[1].type)) + { + ColumnsWithTypeAndName new_arguments { + { + isIPv4(arguments[0].type) ? castColumn(arguments[0], std::make_shared()) : arguments[0].column, + isIPv4(arguments[0].type) ? std::make_shared() : arguments[0].type, + arguments[0].name, + }, + { + isIPv4(arguments[1].type) ? castColumn(arguments[1], std::make_shared()) : arguments[1].column, + isIPv4(arguments[1].type) ? std::make_shared() : arguments[1].type, + arguments[1].name + } + }; + + return executeImpl2(new_arguments, result_type, input_rows_count, right_nullmap); + } + const auto * const left_generic = left_argument.type.get(); const auto * const right_generic = right_argument.type.get(); ColumnPtr res; @@ -2031,6 +2131,9 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A return (res = executeNumeric(arguments, left, right, right_nullmap)) != nullptr; }); + if (isArray(result_type)) + return executeArrayImpl(arguments, result_type, input_rows_count); + if (!valid) { // This is a logical error, because the types should have been checked diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index 1546c24d30c..4444feb6129 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include @@ -14,7 +15,6 @@ #include #include #include -#include #include @@ -36,7 +36,9 @@ namespace ErrorCodes /// Corresponding types: /// - UInt16 => DataTypeDate /// - UInt32 => DataTypeDateTime +/// - Int32 => DataTypeDate32 /// - DateTime64 => DataTypeDateTime64 +/// - Int8 => error /// Please note that INPUT and OUTPUT types may differ, e.g.: /// - 'AddSecondsImpl::execute(UInt32, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(DateTime, ...) -> DateTime' /// - 'AddSecondsImpl::execute(UInt16, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(Date, ...) -> DateTime' @@ -45,35 +47,27 @@ struct AddNanosecondsImpl { static constexpr auto name = "addNanoseconds"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) - { - Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); - auto division = std::div(t.fractional * multiplier + delta, static_cast(1000000000)); - return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta}; - } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); - return t * multiplier + delta; + return DateTime64(DecimalUtils::multiplyAdd(t.value, multiplier, delta)); } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { Int64 multiplier = DecimalUtils::scaleMultiplier(9); - return static_cast(t * multiplier + delta); + return DateTime64(DecimalUtils::multiplyAdd(static_cast(t), multiplier, delta)); } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addNanoSeconds() cannot be used with Date"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "addNanoseconds() cannot be used with Date"); } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addNanoSeconds() cannot be used with Date32"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "addNanoseconds() cannot be used with Date32"); } }; @@ -81,43 +75,29 @@ struct AddMicrosecondsImpl { static constexpr auto name = "addMicroseconds"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) - { - Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); - if (scale <= 6) - { - auto division = std::div((t.fractional + delta), static_cast(10e6)); - return {t.whole * multiplier + division.quot, division.rem}; - } - else - { - auto division = std::div((t.fractional + delta * multiplier), static_cast(10e6 * multiplier)); - return {t.whole + division.quot, division.rem}; - } - } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); - return scale <= 6 ? t * multiplier + delta : t + delta * multiplier; + return DateTime64(scale <= 6 + ? DecimalUtils::multiplyAdd(t.value, multiplier, delta) + : DecimalUtils::multiplyAdd(delta, multiplier, t.value)); } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { Int64 multiplier = DecimalUtils::scaleMultiplier(6); - return static_cast(t * multiplier + delta); + return DateTime64(DecimalUtils::multiplyAdd(static_cast(t), multiplier, delta)); } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addMicroSeconds() cannot be used with Date"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "addMicroseconds() cannot be used with Date"); } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addMicroSeconds() cannot be used with Date32"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "addMicroseconds() cannot be used with Date32"); } }; @@ -125,43 +105,29 @@ struct AddMillisecondsImpl { static constexpr auto name = "addMilliseconds"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) - { - Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); - if (scale <= 3) - { - auto division = std::div((t.fractional + delta), static_cast(1000)); - return {t.whole * multiplier + division.quot, division.rem}; - } - else - { - auto division = std::div((t.fractional + delta * multiplier), static_cast(1000 * multiplier)); - return {t.whole + division.quot,division.rem}; - } - } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); - return scale <= 3 ? t * multiplier + delta : t + delta * multiplier; + return DateTime64(scale <= 3 + ? DecimalUtils::multiplyAdd(t.value, multiplier, delta) + : DecimalUtils::multiplyAdd(delta, multiplier, t.value)); } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { Int64 multiplier = DecimalUtils::scaleMultiplier(3); - return static_cast(t * multiplier + delta); + return DateTime64(DecimalUtils::multiplyAdd(static_cast(t), multiplier, delta)); } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addMilliSeconds() cannot be used with Date"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "addMilliseconds() cannot be used with Date"); } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addMilliSeconds() cannot be used with Date32"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "addMilliseconds() cannot be used with Date32"); } }; @@ -169,16 +135,10 @@ struct AddSecondsImpl { static constexpr auto name = "addSeconds"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) - { - return {t.whole + delta, t.fractional}; - } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - return t + delta * DecimalUtils::scaleMultiplier(scale); + return DateTime64(DecimalUtils::multiplyAdd(delta, DecimalUtils::scaleMultiplier(scale), t.value)); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -189,6 +149,7 @@ struct AddSecondsImpl static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale + static_assert(DataTypeDateTime64::default_scale == 3, ""); return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; } @@ -202,12 +163,6 @@ struct AddMinutesImpl { static constexpr auto name = "addMinutes"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) - { - return {t.whole + delta * 60, t.fractional}; - } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { @@ -222,6 +177,7 @@ struct AddMinutesImpl static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale + static_assert(DataTypeDateTime64::default_scale == 3, ""); return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000; } @@ -235,12 +191,6 @@ struct AddHoursImpl { static constexpr auto name = "addHours"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) - { - return {t.whole + delta * 3600, t.fractional}; - } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { @@ -255,6 +205,7 @@ struct AddHoursImpl static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale + static_assert(DataTypeDateTime64::default_scale == 3, ""); return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000; } @@ -268,12 +219,6 @@ struct AddDaysImpl { static constexpr auto name = "addDays"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) - { - return {time_zone.addDays(t.whole, delta), t.fractional}; - } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { @@ -302,12 +247,6 @@ struct AddWeeksImpl { static constexpr auto name = "addWeeks"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) - { - return {time_zone.addWeeks(t.whole, delta), t.fractional}; - } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { @@ -336,12 +275,6 @@ struct AddMonthsImpl { static constexpr auto name = "addMonths"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) - { - return {time_zone.addMonths(t.whole, delta), t.fractional}; - } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { @@ -370,12 +303,6 @@ struct AddQuartersImpl { static constexpr auto name = "addQuarters"; - static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) - { - return {time_zone.addQuarters(t.whole, delta), t.fractional}; - } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { @@ -404,12 +331,6 @@ struct AddYearsImpl { static constexpr auto name = "addYears"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) - { - return {time_zone.addYears(t.whole, delta), t.fractional}; - } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { @@ -581,11 +502,11 @@ namespace date_and_time_type_details // Compile-time mapping of value (DataType::FieldType) types to corresponding DataType template struct ResultDataTypeMap {}; template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDate; }; -template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDate; }; template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDateTime; }; template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDate32; }; template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDateTime64; }; template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDateTime64; }; +template <> struct ResultDataTypeMap { using ResultDataType = DataTypeInt8; }; // error } template @@ -705,6 +626,10 @@ public: return std::make_shared(target_scale.value_or(DataTypeDateTime64::default_scale), std::move(timezone)); } + else if constexpr (std::is_same_v) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} cannot be used with {}", getName(), arguments[0].type->getName()); + } throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type in datetime add interval function"); } diff --git a/src/Functions/FunctionFactory.h b/src/Functions/FunctionFactory.h index deea41e6677..588cae64e16 100644 --- a/src/Functions/FunctionFactory.h +++ b/src/Functions/FunctionFactory.h @@ -20,8 +20,8 @@ using FunctionCreator = std::function; using FunctionFactoryData = std::pair; /** Creates function by name. - * Function could use for initialization (take ownership of shared_ptr, for example) - * some dictionaries from Context. + * The provided Context is guaranteed to outlive the created function. Functions may use it for + * things like settings, current database, permission checks, etc. */ class FunctionFactory : private boost::noncopyable, public IFactoryWithAliases { diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index b15bab47ae0..7671129fcfc 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -28,10 +28,24 @@ namespace ErrorCodes struct NameStartsWith { static constexpr auto name = "startsWith"; + static constexpr auto is_utf8 = false; }; struct NameEndsWith { static constexpr auto name = "endsWith"; + static constexpr auto is_utf8 = false; +}; + +struct NameStartsWithUTF8 +{ + static constexpr auto name = "startsWithUTF8"; + static constexpr auto is_utf8 = true; +}; + +struct NameEndsWithUTF8 +{ + static constexpr auto name = "endsWithUTF8"; + static constexpr auto is_utf8 = true; }; DECLARE_MULTITARGET_CODE( @@ -41,6 +55,7 @@ class FunctionStartsEndsWith : public IFunction { public: static constexpr auto name = Name::name; + static constexpr auto is_utf8 = Name::is_utf8; String getName() const override { @@ -64,7 +79,8 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (isStringOrFixedString(arguments[0]) && isStringOrFixedString(arguments[1])) + if (!is_utf8 && isStringOrFixedString(arguments[0]) && isStringOrFixedString(arguments[1]) + || isString(arguments[0]) && isString(arguments[1])) return std::make_shared(); if (isArray(arguments[0]) && isArray(arguments[1])) @@ -78,8 +94,11 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto data_type = arguments[0].type; - if (isStringOrFixedString(*data_type)) + + if (!is_utf8 && isStringOrFixedString(*data_type)) return executeImplString(arguments, {}, input_rows_count); + if (is_utf8 && isString(*data_type)) + return executeImplStringUTF8(arguments, {}, input_rows_count); if (isArray(data_type)) return executeImplArray(arguments, {}, input_rows_count); return {}; @@ -131,7 +150,6 @@ private: typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(input_rows_count); - if (const ColumnString * haystack = checkAndGetColumn(haystack_column)) dispatch(StringSource(*haystack), needle_column, vec_res); else if (const ColumnFixedString * haystack_fixed = checkAndGetColumn(haystack_column)) @@ -146,6 +164,26 @@ private: return col_res; } + ColumnPtr executeImplStringUTF8(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const + { + const IColumn * haystack_column = arguments[0].column.get(); + const IColumn * needle_column = arguments[1].column.get(); + + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_res = col_res->getData(); + + vec_res.resize(input_rows_count); + if (const ColumnString * haystack = checkAndGetColumn(haystack_column)) + dispatchUTF8(UTF8StringSource(*haystack), needle_column, vec_res); + else if (const ColumnConst * haystack_const = checkAndGetColumnConst(haystack_column)) + dispatchUTF8>(ConstSource(*haystack_const), needle_column, vec_res); + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal combination of columns as arguments of function {}", getName()); + + return col_res; + } + + template void dispatch(HaystackSource haystack_source, const IColumn * needle_column, PaddedPODArray & res_data) const { @@ -161,6 +199,17 @@ private: throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal combination of columns as arguments of function {}", getName()); } + template + void dispatchUTF8(HaystackSource haystack_source, const IColumn * needle_column, PaddedPODArray & res_data) const + { + if (const ColumnString * needle = checkAndGetColumn(needle_column)) + execute(haystack_source, UTF8StringSource(*needle), res_data); + else if (const ColumnConst * needle_const = checkAndGetColumnConst(needle_column)) + execute>(haystack_source, ConstSource(*needle_const), res_data); + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal combination of columns as arguments of function {}", getName()); + } + template static void execute(HaystackSource haystack_source, NeedleSource needle_source, PaddedPODArray & res_data) { @@ -172,18 +221,27 @@ private: auto needle = needle_source.getWhole(); if (needle.size > haystack.size) - { res_data[row_num] = false; - } else { - if constexpr (std::is_same_v) - { + if constexpr (std::is_same_v) /// startsWith res_data[row_num] = StringRef(haystack.data, needle.size) == StringRef(needle.data, needle.size); - } - else /// endsWith - { + else if constexpr (std::is_same_v) /// endsWith res_data[row_num] = StringRef(haystack.data + haystack.size - needle.size, needle.size) == StringRef(needle.data, needle.size); + else /// startsWithUTF8 or endsWithUTF8 + { + auto length = UTF8::countCodePoints(needle.data, needle.size); + + if constexpr (std::is_same_v) + { + auto slice = haystack_source.getSliceFromLeft(0, length); + res_data[row_num] = StringRef(slice.data, slice.size) == StringRef(needle.data, needle.size); + } + else + { + auto slice = haystack_source.getSliceFromRight(length); + res_data[row_num] = StringRef(slice.data, slice.size) == StringRef(needle.data, needle.size); + } } } diff --git a/src/Functions/FunctionToDecimalString.cpp b/src/Functions/FunctionToDecimalString.cpp new file mode 100644 index 00000000000..fe417b19137 --- /dev/null +++ b/src/Functions/FunctionToDecimalString.cpp @@ -0,0 +1,22 @@ +#include +#include +#include + +namespace DB +{ + +REGISTER_FUNCTION(ToDecimalString) +{ + factory.registerFunction( + FunctionDocumentation{ + .description=R"( +Returns string representation of a number. First argument is the number of any numeric type, +second argument is the desired number of digits in fractional part. Returns String. + + )", + .examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)", ""}}, + .categories{"String"} + }, FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h new file mode 100644 index 00000000000..3dd946203cc --- /dev/null +++ b/src/Functions/FunctionToDecimalString.h @@ -0,0 +1,262 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER; +} + +class FunctionToDecimalString : public IFunction +{ +public: + static constexpr auto name = "toDecimalString"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors mandatory_args = { + {"Value", &isNumber, nullptr, "Number"}, + {"precision", &isNativeInteger, &isColumnConst, "const Integer"} + }; + + validateFunctionArgumentTypes(*this, arguments, mandatory_args, {}); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + +private: + /// For operations with Integer/Float + template + void vectorConstant(const FromVectorType & vec_from, UInt8 precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const + { + size_t input_rows_count = vec_from.size(); + result_offsets.resize(input_rows_count); + + /// Buffer is used here and in functions below because resulting size cannot be precisely anticipated, + /// and buffer resizes on-the-go. Also, .count() provided by buffer is convenient in this case. + WriteBufferFromVector buf_to(vec_to); + + for (size_t i = 0; i < input_rows_count; ++i) + { + format(vec_from[i], buf_to, precision); + result_offsets[i] = buf_to.count(); + } + + buf_to.finalize(); + } + + template + void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector::Container & vec_precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const + { + size_t input_rows_count = vec_from.size(); + result_offsets.resize(input_rows_count); + + WriteBufferFromVector buf_to(vec_to); + + constexpr size_t max_digits = std::numeric_limits::digits10; + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (vec_precision[i] > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested, shall not be more than {}", max_digits); + format(vec_from[i], buf_to, vec_precision[i]); + result_offsets[i] = buf_to.count(); + } + + buf_to.finalize(); + } + + /// For operations with Decimal + template + void vectorConstant(const FirstArgVectorType & vec_from, UInt8 precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const + { + /// There are no more than 77 meaning digits (as it is the max length of UInt256). So we can limit it with 77. + constexpr size_t max_digits = std::numeric_limits::digits10; + if (precision > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); + + WriteBufferFromVector buf_to(vec_to); + size_t input_rows_count = vec_from.size(); + result_offsets.resize(input_rows_count); + + for (size_t i = 0; i < input_rows_count; ++i) + { + writeText(vec_from[i], from_scale, buf_to, true, true, precision); + writeChar(0, buf_to); + result_offsets[i] = buf_to.count(); + } + buf_to.finalize(); + } + + template + void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector::Container & vec_precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const + { + size_t input_rows_count = vec_from.size(); + result_offsets.resize(input_rows_count); + + WriteBufferFromVector buf_to(vec_to); + + constexpr size_t max_digits = std::numeric_limits::digits10; + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (vec_precision[i] > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); + writeText(vec_from[i], from_scale, buf_to, true, true, vec_precision[i]); + writeChar(0, buf_to); + result_offsets[i] = buf_to.count(); + } + buf_to.finalize(); + } + + template + static void format(T value, DB::WriteBuffer & out, UInt8 precision) + { + /// Maximum of 60 is hard-coded in 'double-conversion/double-conversion.h' for floating point values, + /// Catch this here to give user a more reasonable error. + if (precision > 60) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too high precision requested for Float, must not be more than 60, got {}", Int8(precision)); + + DB::DoubleConverter::BufferType buffer; + double_conversion::StringBuilder builder{buffer, sizeof(buffer)}; + + const auto result = DB::DoubleConverter::instance().ToFixed(value, precision, &builder); + + if (!result) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, "Error processing number: {}", value); + + out.write(buffer, builder.position()); + writeChar(0, out); + } + + template + static void format(T value, DB::WriteBuffer & out, UInt8 precision) + { + /// Fractional part for Integer is just trailing zeros. Let's limit it with 77 (like with Decimals). + constexpr size_t max_digits = std::numeric_limits::digits10; + if (precision > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested, shall not be more than {}", max_digits); + writeText(value, out); + if (precision > 0) [[likely]] + { + writeChar('.', out); + for (int i = 0; i < precision; ++i) + writeChar('0', out); + writeChar(0, out); + } + } + +public: + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + switch (arguments[0].type->getTypeId()) + { + case TypeIndex::UInt8: return executeType(arguments); + case TypeIndex::UInt16: return executeType(arguments); + case TypeIndex::UInt32: return executeType(arguments); + case TypeIndex::UInt64: return executeType(arguments); + case TypeIndex::UInt128: return executeType(arguments); + case TypeIndex::UInt256: return executeType(arguments); + case TypeIndex::Int8: return executeType(arguments); + case TypeIndex::Int16: return executeType(arguments); + case TypeIndex::Int32: return executeType(arguments); + case TypeIndex::Int64: return executeType(arguments); + case TypeIndex::Int128: return executeType(arguments); + case TypeIndex::Int256: return executeType(arguments); + case TypeIndex::Float32: return executeType(arguments); + case TypeIndex::Float64: return executeType(arguments); + case TypeIndex::Decimal32: return executeType(arguments); + case TypeIndex::Decimal64: return executeType(arguments); + case TypeIndex::Decimal128: return executeType(arguments); + case TypeIndex::Decimal256: return executeType(arguments); + default: + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); + } + } + +private: + template + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const + { + const auto * precision_col = checkAndGetColumn>(arguments[1].column.get()); + const auto * precision_col_const = checkAndGetColumnConst>(arguments[1].column.get()); + + auto result_col = ColumnString::create(); + auto * result_col_string = assert_cast(result_col.get()); + ColumnString::Chars & result_chars = result_col_string->getChars(); + ColumnString::Offsets & result_offsets = result_col_string->getOffsets(); + + if constexpr (is_decimal) + { + const auto * from_col = checkAndGetColumn>(arguments[0].column.get()); + UInt8 from_scale = from_col->getScale(); + + if (from_col) + { + if (precision_col_const) + vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets, from_scale); + else if (precision_col) + vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale); + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function formatDecimal", arguments[1].column->getName()); + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); + } + else + { + const auto * from_col = checkAndGetColumn>(arguments[0].column.get()); + if (from_col) + { + if (precision_col_const) + vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets); + else if (precision_col) + vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets); + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function formatDecimal", arguments[1].column->getName()); + + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); + } + + return result_col; + } +}; + +} diff --git a/src/Functions/FunctionsBinaryRepresentation.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp index c3a8f51ee4b..0f3f8be96a7 100644 --- a/src/Functions/FunctionsBinaryRepresentation.cpp +++ b/src/Functions/FunctionsBinaryRepresentation.cpp @@ -507,8 +507,8 @@ public: // use executeOnUInt instead of using executeOneString // because the latter one outputs the string in the memory order - Impl::executeOneUIntOrInt(uuid[i].toUnderType().items[0], end, false, false); - Impl::executeOneUIntOrInt(uuid[i].toUnderType().items[1], end, false, true); + Impl::executeOneUIntOrInt(UUIDHelpers::getHighBytes(uuid[i]), end, false, false); + Impl::executeOneUIntOrInt(UUIDHelpers::getLowBytes(uuid[i]), end, false, true); pos += end - begin; out_offsets[i] = pos; diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp index a25da8f6c13..0a332ab70a9 100644 --- a/src/Functions/FunctionsCharsetClassification.cpp +++ b/src/Functions/FunctionsCharsetClassification.cpp @@ -1,9 +1,12 @@ #include + +#if USE_NLP + #include #include #include -#include + namespace DB { @@ -46,7 +49,7 @@ namespace return res; } - /// Сount how many times each bigram occurs in the text. + /// Count how many times each bigram occurs in the text. template ALWAYS_INLINE inline void calculateStats( const UInt8 * data, @@ -150,3 +153,5 @@ REGISTER_FUNCTION(DetectCharset) } } + +#endif diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index cd40880ba54..291a287919d 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -1183,15 +1183,9 @@ public: || (left_tuple && right_tuple && left_tuple->getElements().size() == right_tuple->getElements().size()) || (arguments[0]->equals(*arguments[1])))) { - try - { - getLeastSupertype(arguments); - } - catch (const Exception &) - { + if (!tryGetLeastSupertype(arguments)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal types of arguments ({}, {})" " of function {}", arguments[0]->getName(), arguments[1]->getName(), getName()); - } } if (left_tuple && right_tuple) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index b272e88d17d..cf60eea547b 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,7 @@ #include #include #include +#include "DataTypes/IDataType.h" #include @@ -203,18 +205,15 @@ struct ConvertImpl } } - if constexpr (std::is_same_v && std::is_same_v) + if constexpr (std::is_same_v && std::is_same_v) { - static_assert(std::is_same_v, "UInt128 and UUID types must be same"); - if constexpr (std::endian::native == std::endian::little) - { - vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; - vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; - } - else - { - vec_to[i] = vec_from[i].toUnderType(); - } + static_assert( + std::is_same_v, + "UInt128 and UUID types must be same"); + + vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; + vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; + continue; } @@ -886,75 +885,159 @@ struct ConvertImpl(*col_with_type_and_name.type); - - const DateLUTImpl * time_zone = nullptr; - - if constexpr (std::is_same_v || std::is_same_v) - time_zone = &DateLUT::instance(); - /// For argument of Date or DateTime type, second argument with time zone could be specified. - if constexpr (std::is_same_v || std::is_same_v) + if constexpr (IsDataTypeDateOrDateTime) { - auto non_null_args = createBlockWithNestedColumns(arguments); - time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); - } + auto datetime_arg = arguments[0]; - if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) - { - auto col_to = ColumnString::create(); + const DateLUTImpl * time_zone = nullptr; + const ColumnConst * time_zone_column = nullptr; - const typename ColVecType::Container & vec_from = col_from->getData(); - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - - if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss.") + col_from->getScale() + 1)); - else - data_to.resize(size * 3); /// Arbitrary - - offsets_to.resize(size); - - WriteBufferFromVector write_buffer(data_to); - - if (null_map) + if (arguments.size() == 1) { - for (size_t i = 0; i < size; ++i) + auto non_null_args = createBlockWithNestedColumns(arguments); + time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); + } + else /// When we have a column for timezone + { + datetime_arg.column = datetime_arg.column->convertToFullColumnIfConst(); + + if constexpr (std::is_same_v || std::is_same_v) + time_zone = &DateLUT::instance(); + /// For argument of Date or DateTime type, second argument with time zone could be specified. + if constexpr (std::is_same_v || std::is_same_v) { - bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); - null_map->getData()[i] |= !is_ok; - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); + if ((time_zone_column = checkAndGetColumnConst(arguments[1].column.get()))) + { + auto non_null_args = createBlockWithNestedColumns(arguments); + time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); + } } } - else + const auto & col_with_type_and_name = columnGetNested(datetime_arg); + + if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) { - for (size_t i = 0; i < size; ++i) + auto col_to = ColumnString::create(); + + const typename ColVecType::Container & vec_from = col_from->getData(); + ColumnString::Chars & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + size_t size = vec_from.size(); + + if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); + else if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); + else if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1)); + else if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss.") + col_from->getScale() + 1)); + else + data_to.resize(size * 3); /// Arbitrary + + offsets_to.resize(size); + + WriteBufferFromVector write_buffer(data_to); + const auto & type = static_cast(*col_with_type_and_name.type); + + ColumnUInt8::MutablePtr null_map = copyNullMap(datetime_arg.column); + + if (null_map) { - FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); + for (size_t i = 0; i < size; ++i) + { + if (!time_zone_column && arguments.size() > 1) + { + if (!arguments[1].column.get()->getDataAt(i).toString().empty()) + time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); + } + bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); + null_map->getData()[i] |= !is_ok; + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } } + else + { + for (size_t i = 0; i < size; ++i) + { + if (!time_zone_column && arguments.size() > 1) + { + if (!arguments[1].column.get()->getDataAt(i).toString().empty()) + time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); + } + FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + + write_buffer.finalize(); + + if (null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); + return col_to; } - - write_buffer.finalize(); - - if (null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), Name::name); } else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), Name::name); + { + ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); + + const auto & col_with_type_and_name = columnGetNested(arguments[0]); + const auto & type = static_cast(*col_with_type_and_name.type); + + if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) + { + auto col_to = ColumnString::create(); + + const typename ColVecType::Container & vec_from = col_from->getData(); + ColumnString::Chars & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + size_t size = vec_from.size(); + + data_to.resize(size * 3); + offsets_to.resize(size); + + WriteBufferFromVector write_buffer(data_to); + + if (null_map) + { + for (size_t i = 0; i < size; ++i) + { + bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); + /// We don't use timezones in this branch + null_map->getData()[i] |= !is_ok; + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + else + { + for (size_t i = 0; i < size; ++i) + { + FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + + write_buffer.finalize(); + + if (null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); + return col_to; + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), Name::name); + } } }; @@ -1040,13 +1123,21 @@ inline void convertFromTime(DataTypeDateTime::FieldType & x, t /** Conversion of strings to numbers, dates, datetimes: through parsing. */ template -void parseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +void parseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool precise_float_parsing) { - readText(x, rb); + if constexpr (std::is_floating_point_v) + { + if (precise_float_parsing) + readFloatTextPrecise(x, rb); + else + readFloatTextFast(x, rb); + } + else + readText(x, rb); } template <> -inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) +inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) { DayNum tmp(0); readDateText(tmp, rb, *time_zone); @@ -1054,7 +1145,7 @@ inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb } template <> -inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) +inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) { ExtendedDayNum tmp(0); readDateText(tmp, rb, *time_zone); @@ -1064,7 +1155,7 @@ inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer // NOTE: no need of extra overload of DateTime64, since readDateTimeText64 has different signature and that case is explicitly handled in the calling code. template <> -inline void parseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) +inline void parseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) { time_t time = 0; readDateTimeText(time, rb, *time_zone); @@ -1072,7 +1163,7 @@ inline void parseImpl(DataTypeDateTime::FieldType & x, ReadBuf } template <> -inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) { UUID tmp; readUUIDText(tmp, rb); @@ -1080,7 +1171,7 @@ inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb } template <> -inline void parseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +inline void parseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) { IPv4 tmp; readIPv4Text(tmp, rb); @@ -1088,7 +1179,7 @@ inline void parseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb } template <> -inline void parseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +inline void parseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) { IPv6 tmp; readIPv6Text(tmp, rb); @@ -1096,16 +1187,21 @@ inline void parseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb } template -bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool precise_float_parsing) { if constexpr (std::is_floating_point_v) - return tryReadFloatText(x, rb); + { + if (precise_float_parsing) + return tryReadFloatTextPrecise(x, rb); + else + return tryReadFloatTextFast(x, rb); + } else /*if constexpr (is_integer_v)*/ return tryReadIntText(x, rb); } template <> -inline bool tryParseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) +inline bool tryParseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) { DayNum tmp(0); if (!tryReadDateText(tmp, rb, *time_zone)) @@ -1115,7 +1211,7 @@ inline bool tryParseImpl(DataTypeDate::FieldType & x, ReadBuffer & } template <> -inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) +inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) { ExtendedDayNum tmp(0); if (!tryReadDateText(tmp, rb, *time_zone)) @@ -1125,7 +1221,7 @@ inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuff } template <> -inline bool tryParseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) +inline bool tryParseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) { time_t tmp = 0; if (!tryReadDateTimeText(tmp, rb, *time_zone)) @@ -1135,7 +1231,7 @@ inline bool tryParseImpl(DataTypeDateTime::FieldType & x, Read } template <> -inline bool tryParseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +inline bool tryParseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) { UUID tmp; if (!tryReadUUIDText(tmp, rb)) @@ -1146,7 +1242,7 @@ inline bool tryParseImpl(DataTypeUUID::FieldType & x, ReadBuffer & } template <> -inline bool tryParseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +inline bool tryParseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) { IPv4 tmp; if (!tryReadIPv4Text(tmp, rb)) @@ -1157,7 +1253,7 @@ inline bool tryParseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & } template <> -inline bool tryParseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +inline bool tryParseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) { IPv6 tmp; if (!tryReadIPv6Text(tmp, rb)) @@ -1336,6 +1432,16 @@ struct ConvertThroughParsing size_t current_offset = 0; + bool precise_float_parsing = false; + + if (DB::CurrentThread::isInitialized()) + { + const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext(); + + if (query_context) + precise_float_parsing = query_context->getSettingsRef().precise_float_parsing; + } + for (size_t i = 0; i < size; ++i) { size_t next_offset = std::is_same_v ? (*offsets)[i] : (current_offset + fixed_string_size); @@ -1402,7 +1508,7 @@ struct ConvertThroughParsing } } - parseImpl(vec_to[i], read_buffer, local_time_zone); + parseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); } while (false); } } @@ -1472,7 +1578,7 @@ struct ConvertThroughParsing } } - parsed = tryParseImpl(vec_to[i], read_buffer, local_time_zone); + parsed = tryParseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); } while (false); } } @@ -1834,7 +1940,7 @@ public: // toDateTime64(value, scale : Integer[, timezone: String]) || std::is_same_v) { - optional_args.push_back({"timezone", &isString, &isColumnConst, "const String"}); + optional_args.push_back({"timezone", &isString, nullptr, "String"}); } validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -1898,7 +2004,9 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + return {}; + else if constexpr (std::is_same_v) return {2}; return {1}; } @@ -3168,14 +3276,40 @@ private: { return &ConvertImplGenericFromString::execute; } - else + else if (const auto * agg_type = checkAndGetDataType(from_type_untyped.get())) { - if (cast_type == CastType::accurateOrNull) - return createToNullableColumnWrapper(); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", - from_type_untyped->getName(), to_type->getName()); + if (agg_type->getFunction()->haveSameStateRepresentation(*to_type->getFunction())) + { + return [function = to_type->getFunction()]( + ColumnsWithTypeAndName & arguments, + const DataTypePtr & /* result_type */, + const ColumnNullable * /* nullable_source */, + size_t /*input_rows_count*/) -> ColumnPtr + { + const auto & argument_column = arguments.front(); + const auto * col_agg = checkAndGetColumn(argument_column.column.get()); + if (col_agg) + { + auto new_col_agg = ColumnAggregateFunction::create(*col_agg); + new_col_agg->set(function); + return new_col_agg; + } + else + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Illegal column {} for function CAST AS AggregateFunction", + argument_column.column->getName()); + } + }; + } } + + if (cast_type == CastType::accurateOrNull) + return createToNullableColumnWrapper(); + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", + from_type_untyped->getName(), to_type->getName()); } WrapperType createArrayWrapper(const DataTypePtr & from_type_untyped, const DataTypeArray & to_type) const @@ -3956,7 +4090,16 @@ private: safe_convert_custom_types = to_type->getCustomName() && from_type_custom_name->getName() == to_type->getCustomName()->getName(); if (from_type->equals(*to_type) && safe_convert_custom_types) - return createIdentityWrapper(from_type); + { + /// We can only use identity conversion for DataTypeAggregateFunction when they are strictly equivalent. + if (typeid_cast(from_type.get())) + { + if (DataTypeAggregateFunction::strictEquals(from_type, to_type)) + return createIdentityWrapper(from_type); + } + else + return createIdentityWrapper(from_type); + } else if (WhichDataType(from_type).isNothing()) return createNothingWrapper(to_type.get()); diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 6af683777c3..345b9a11e0d 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -79,30 +79,56 @@ namespace impl UInt64 key1 = 0; }; - static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key) + struct SipHashKeyColumns { - SipHashKey ret{}; + ColumnPtr key0; + ColumnPtr key1; + bool is_const; - const auto * tuple = checkAndGetColumn(key.column.get()); + size_t size() const + { + assert(key0 && key1); + assert(key0->size() == key1->size()); + return key0->size(); + } + SipHashKey getKey(size_t i) const + { + if (is_const) + i = 0; + const auto & key0data = assert_cast(*key0).getData(); + const auto & key1data = assert_cast(*key1).getData(); + return {key0data[i], key1data[i]}; + } + }; + + static SipHashKeyColumns parseSipHashKeyColumns(const ColumnWithTypeAndName & key) + { + const ColumnTuple * tuple = nullptr; + const auto * column = key.column.get(); + bool is_const = false; + if (isColumnConst(*column)) + { + is_const = true; + tuple = checkAndGetColumnConstData(column); + } + else + tuple = checkAndGetColumn(column); if (!tuple) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple"); - if (tuple->tupleSize() != 2) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64"); - if (tuple->empty()) - return ret; - - if (const auto * key0col = checkAndGetColumn(&(tuple->getColumn(0)))) - ret.key0 = key0col->get64(0); - else + SipHashKeyColumns ret{tuple->getColumnPtr(0), tuple->getColumnPtr(1), is_const}; + assert(ret.key0); + if (!checkColumn(*ret.key0)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64"); - - if (const auto * key1col = checkAndGetColumn(&(tuple->getColumn(1)))) - ret.key1 = key1col->get64(0); - else + assert(ret.key1); + if (!checkColumn(*ret.key1)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64"); + if (ret.size() == 1) + ret.is_const = true; + return ret; } } @@ -153,15 +179,10 @@ struct IntHash64Impl template T combineHashesFunc(T t1, T t2) { -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - T tmp; - reverseMemcpy(&tmp, &t1, sizeof(T)); - t1 = tmp; - reverseMemcpy(&tmp, &t2, sizeof(T)); - t2 = tmp; -#endif - T hashes[] = {t1, t2}; - return HashFunction::apply(reinterpret_cast(hashes), 2 * sizeof(T)); + transformEndianness(t1); + transformEndianness(t2); + const T hashes[] {t1, t2}; + return HashFunction::apply(reinterpret_cast(hashes), sizeof(hashes)); } @@ -184,21 +205,14 @@ struct HalfMD5Impl MD5_Update(&ctx, reinterpret_cast(begin), size); MD5_Final(buf.char_data, &ctx); -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return buf.uint64_data; /// No need to flip bytes on big endian machines -#else - return std::byteswap(buf.uint64_data); /// Compatibility with existing code. Cast need for old poco AND macos where UInt64 != uint64_t -#endif + /// Compatibility with existing code. Cast need for old poco AND macos where UInt64 != uint64_t + transformEndianness(buf.uint64_data); + return buf.uint64_data; } static UInt64 combineHashes(UInt64 h1, UInt64 h2) { -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - h1 = std::byteswap(h1); - h2 = std::byteswap(h2); -#endif - UInt64 hashes[] = {h1, h2}; - return apply(reinterpret_cast(hashes), 16); + return combineHashesFunc(h1, h2); } /// If true, it will use intHash32 or intHash64 to hash POD types. This behaviour is intended for better performance of some functions. @@ -311,15 +325,8 @@ struct SipHash64Impl static constexpr auto name = "sipHash64"; using ReturnType = UInt64; - static UInt64 apply(const char * begin, size_t size) - { - return sipHash64(begin, size); - } - - static UInt64 combineHashes(UInt64 h1, UInt64 h2) - { - return combineHashesFunc(h1, h2); - } + static UInt64 apply(const char * begin, size_t size) { return sipHash64(begin, size); } + static UInt64 combineHashes(UInt64 h1, UInt64 h2) { return combineHashesFunc(h1, h2); } static constexpr bool use_int_hash_for_pods = false; }; @@ -329,19 +336,19 @@ struct SipHash64KeyedImpl static constexpr auto name = "sipHash64Keyed"; using ReturnType = UInt64; using Key = impl::SipHashKey; + using KeyColumns = impl::SipHashKeyColumns; - static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); } + static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); } + static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); } static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); } static UInt64 combineHashesKeyed(const Key & key, UInt64 h1, UInt64 h2) { -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - h1 = std::byteswap(h1); - h2 = std::byteswap(h2); -#endif - UInt64 hashes[] = {h1, h2}; - return applyKeyed(key, reinterpret_cast(hashes), 2 * sizeof(UInt64)); + transformEndianness(h1); + transformEndianness(h2); + const UInt64 hashes[]{h1, h2}; + return applyKeyed(key, reinterpret_cast(hashes), sizeof(hashes)); } static constexpr bool use_int_hash_for_pods = false; @@ -353,15 +360,8 @@ struct SipHash128Impl using ReturnType = UInt128; - static UInt128 combineHashes(UInt128 h1, UInt128 h2) - { - return combineHashesFunc(h1, h2); - } - - static UInt128 apply(const char * data, const size_t size) - { - return sipHash128(data, size); - } + static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc(h1, h2); } + static UInt128 apply(const char * data, const size_t size) { return sipHash128(data, size); } static constexpr bool use_int_hash_for_pods = false; }; @@ -371,11 +371,52 @@ struct SipHash128KeyedImpl static constexpr auto name = "sipHash128Keyed"; using ReturnType = UInt128; using Key = impl::SipHashKey; + using KeyColumns = impl::SipHashKeyColumns; - static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); } + static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); } + static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); } static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash128Keyed(key.key0, key.key1, begin, size); } + static UInt128 combineHashesKeyed(const Key & key, UInt128 h1, UInt128 h2) + { + transformEndianness(h1); + transformEndianness(h2); + const UInt128 hashes[]{h1, h2}; + return applyKeyed(key, reinterpret_cast(hashes), sizeof(hashes)); + } + + static constexpr bool use_int_hash_for_pods = false; +}; + +struct SipHash128ReferenceImpl +{ + static constexpr auto name = "sipHash128Reference"; + + using ReturnType = UInt128; + + static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc(h1, h2); } + + static UInt128 apply(const char * data, const size_t size) { return sipHash128Reference(data, size); } + + static constexpr bool use_int_hash_for_pods = false; +}; + +struct SipHash128ReferenceKeyedImpl +{ + static constexpr auto name = "sipHash128ReferenceKeyed"; + using ReturnType = UInt128; + using Key = impl::SipHashKey; + using KeyColumns = impl::SipHashKeyColumns; + + static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); } + static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); } + + static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) + { + return sipHash128ReferenceKeyed(key.key0, key.key1, begin, size); + } + static UInt128 combineHashesKeyed(const Key & key, UInt128 h1, UInt128 h2) { #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ @@ -392,20 +433,6 @@ struct SipHash128KeyedImpl static constexpr bool use_int_hash_for_pods = false; }; -struct SipHash128ReferenceImpl -{ - static constexpr auto name = "sipHash128Reference"; - - using ReturnType = UInt128; - - static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc(h1, h2); } - - static UInt128 apply(const char * data, const size_t size) { return sipHash128Reference(data, size); } - - static constexpr bool use_int_hash_for_pods = false; -}; - - /** Why we need MurmurHash2? * MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash. * Usually there is no reason to use MurmurHash. @@ -531,10 +558,7 @@ struct MurmurHash3Impl64 return h[0] ^ h[1]; } - static UInt64 combineHashes(UInt64 h1, UInt64 h2) - { - return IntHash64Impl::apply(h1) ^ h2; - } + static UInt64 combineHashes(UInt64 h1, UInt64 h2) { return IntHash64Impl::apply(h1) ^ h2; } static constexpr bool use_int_hash_for_pods = false; }; @@ -552,10 +576,7 @@ struct MurmurHash3Impl128 return *reinterpret_cast(bytes); } - static UInt128 combineHashes(UInt128 h1, UInt128 h2) - { - return combineHashesFunc(h1, h2); - } + static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc(h1, h2); } static constexpr bool use_int_hash_for_pods = false; }; @@ -573,18 +594,15 @@ struct JavaHashImpl static_cast(x) ^ static_cast(static_cast(x) >> 32)); } - template - || std::is_same_v - || std::is_same_v, T>::type * = nullptr> + template + requires std::same_as || std::same_as || std::same_as static ReturnType apply(T x) { return x; } - template - && !std::is_same_v - && !std::is_same_v - && !std::is_same_v, T>::type * = nullptr> + template + requires(!std::same_as && !std::same_as && !std::same_as) static ReturnType apply(T x) { if (std::is_unsigned_v) @@ -1023,7 +1041,7 @@ private: DECLARE_MULTITARGET_CODE( -template +template class FunctionAnyHash : public IFunction { public: @@ -1033,18 +1051,25 @@ private: using ToType = typename Impl::ReturnType; template - void executeIntType(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const { using ColVecType = ColumnVectorOrDecimal; + KeyType key{}; + if constexpr (Keyed) + key = Impl::getKey(key_cols, 0); if (const ColVecType * col_from = checkAndGetColumn(column)) { const typename ColVecType::Container & vec_from = col_from->getData(); - size_t size = vec_from.size(); + const size_t size = vec_from.size(); for (size_t i = 0; i < size; ++i) { ToType hash; + if constexpr (Keyed) + if (!key_cols.is_const && i != 0) + key = Impl::getKey(key_cols, i); + if constexpr (Impl::use_int_hash_for_pods) { if constexpr (std::is_same_v) @@ -1058,13 +1083,8 @@ private: hash = JavaHashImpl::apply(vec_from[i]); else { - FromType value = vec_from[i]; - if constexpr (std::endian::native == std::endian::big) - { - FromType value_reversed; - reverseMemcpy(&value_reversed, &value, sizeof(value)); - value = value_reversed; - } + auto value = vec_from[i]; + transformEndianness(value); hash = apply(key, reinterpret_cast(&value), sizeof(value)); } } @@ -1077,9 +1097,17 @@ private: } else if (auto col_from_const = checkAndGetColumnConst(column)) { + if constexpr (Keyed) + { + if (!key_cols.is_const) + { + ColumnPtr full_column = col_from_const->convertToFullColumn(); + return executeIntType(key_cols, full_column.get(), vec_to); + } + } auto value = col_from_const->template getValue(); - ToType hash; + ToType hash; if constexpr (Impl::use_int_hash_for_pods) { if constexpr (std::is_same_v) @@ -1093,22 +1121,24 @@ private: hash = JavaHashImpl::apply(value); else { - if constexpr (std::endian::native == std::endian::big) - { - FromType value_reversed; - reverseMemcpy(&value_reversed, &value, sizeof(value)); - value = value_reversed; - } + transformEndianness(value); hash = apply(key, reinterpret_cast(&value), sizeof(value)); } } - size_t size = vec_to.size(); + const size_t size = vec_to.size(); if constexpr (first) vec_to.assign(size, hash); else + { for (size_t i = 0; i < size; ++i) + { + if constexpr (Keyed) + if (!key_cols.is_const && i != 0) + key = Impl::getKey(key_cols, i); vec_to[i] = combineHashes(key, vec_to[i], hash); + } + } } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", @@ -1116,9 +1146,22 @@ private: } template - void executeBigIntType(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeBigIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const { using ColVecType = ColumnVectorOrDecimal; + KeyType key{}; + if constexpr (Keyed) + key = Impl::getKey(key_cols, 0); + + static const auto to_little_endian = [](auto & value) + { + // IPv6 addresses are parsed into four 32-bit components in big-endian ordering on both platforms, so no change is necessary. + // Reference: `parseIPv6orIPv4` in src/Common/formatIPv6.h. + if constexpr (std::endian::native == std::endian::big && std::is_same_v, IPv6>) + return; + + transformEndianness(value); + }; if (const ColVecType * col_from = checkAndGetColumn(column)) { @@ -1127,13 +1170,17 @@ private: for (size_t i = 0; i < size; ++i) { ToType hash; + if constexpr (Keyed) + if (!key_cols.is_const && i != 0) + key = Impl::getKey(key_cols, i); if constexpr (std::endian::native == std::endian::little) hash = apply(key, reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); else { - char tmp_buffer[sizeof(vec_from[i])]; - reverseMemcpy(tmp_buffer, &vec_from[i], sizeof(vec_from[i])); - hash = apply(key, reinterpret_cast(tmp_buffer), sizeof(vec_from[i])); + auto value = vec_from[i]; + to_little_endian(value); + + hash = apply(key, reinterpret_cast(&value), sizeof(value)); } if constexpr (first) vec_to[i] = hash; @@ -1143,23 +1190,31 @@ private: } else if (auto col_from_const = checkAndGetColumnConst(column)) { - auto value = col_from_const->template getValue(); - - ToType hash; - if constexpr (std::endian::native == std::endian::little) - hash = apply(key, reinterpret_cast(&value), sizeof(value)); - else + if constexpr (Keyed) { - char tmp_buffer[sizeof(value)]; - reverseMemcpy(tmp_buffer, &value, sizeof(value)); - hash = apply(key, reinterpret_cast(tmp_buffer), sizeof(value)); + if (!key_cols.is_const) + { + ColumnPtr full_column = col_from_const->convertToFullColumn(); + return executeBigIntType(key_cols, full_column.get(), vec_to); + } } - size_t size = vec_to.size(); + auto value = col_from_const->template getValue(); + to_little_endian(value); + + const auto hash = apply(key, reinterpret_cast(&value), sizeof(value)); + const size_t size = vec_to.size(); if constexpr (first) vec_to.assign(size, hash); else + { for (size_t i = 0; i < size; ++i) + { + if constexpr (Keyed) + if (!key_cols.is_const && i != 0) + key = Impl::getKey(key_cols, i); vec_to[i] = combineHashes(key, vec_to[i], hash); + } + } } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", @@ -1167,10 +1222,16 @@ private: } template - void executeGeneric(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeGeneric(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const { + KeyType key{}; + if constexpr (Keyed) + key = Impl::getKey(key_cols, 0); for (size_t i = 0, size = column->size(); i < size; ++i) { + if constexpr (Keyed) + if (!key_cols.is_const && i != 0) + key = Impl::getKey(key_cols, i); StringRef bytes = column->getDataAt(i); const ToType hash = apply(key, bytes.data, bytes.size); if constexpr (first) @@ -1181,8 +1242,11 @@ private: } template - void executeString(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeString(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const { + KeyType key{}; + if constexpr (Keyed) + key = Impl::getKey(key_cols, 0); if (const ColumnString * col_from = checkAndGetColumn(column)) { const typename ColumnString::Chars & data = col_from->getChars(); @@ -1192,6 +1256,9 @@ private: ColumnString::Offset current_offset = 0; for (size_t i = 0; i < size; ++i) { + if constexpr (Keyed) + if (!key_cols.is_const && i != 0) + key = Impl::getKey(key_cols, i); const ToType hash = apply(key, reinterpret_cast(&data[current_offset]), offsets[i] - current_offset - 1); @@ -1212,6 +1279,9 @@ private: for (size_t i = 0; i < size; ++i) { + if constexpr (Keyed) + if (!key_cols.is_const && i != 0) + key = Impl::getKey(key_cols, i); const ToType hash = apply(key, reinterpret_cast(&data[i * n]), n); if constexpr (first) vec_to[i] = hash; @@ -1221,6 +1291,14 @@ private: } else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column)) { + if constexpr (Keyed) + { + if (!key_cols.is_const) + { + ColumnPtr full_column = col_from_const->convertToFullColumn(); + return executeString(key_cols, full_column.get(), vec_to); + } + } String value = col_from_const->getValue(); const ToType hash = apply(key, value.data(), value.size()); const size_t size = vec_to.size(); @@ -1228,8 +1306,15 @@ private: if constexpr (first) vec_to.assign(size, hash); else + { for (size_t i = 0; i < size; ++i) + { + if constexpr (Keyed) + if (!key_cols.is_const && i != 0) + key = Impl::getKey(key_cols, i); vec_to[i] = combineHashes(key, vec_to[i], hash); + } + } } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", @@ -1237,7 +1322,7 @@ private: } template - void executeArray(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeArray(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to) const { const IDataType * nested_type = typeid_cast(*type).getNestedType().get(); @@ -1249,13 +1334,19 @@ private: typename ColumnVector::Container vec_temp(nested_size); bool nested_is_first = true; - executeForArgument(key, nested_type, nested_column, vec_temp, nested_is_first); + executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first); const size_t size = offsets.size(); ColumnArray::Offset current_offset = 0; + KeyType key{}; + if constexpr (Keyed) + key = Impl::getKey(key_cols, 0); for (size_t i = 0; i < size; ++i) { + if constexpr (Keyed) + if (!key_cols.is_const && i != 0) + key = Impl::getKey(key_cols, i); ColumnArray::Offset next_offset = offsets[i]; ToType hash; @@ -1279,7 +1370,7 @@ private: { /// NOTE: here, of course, you can do without the materialization of the column. ColumnPtr full_column = col_from_const->convertToFullColumn(); - executeArray(key, type, full_column.get(), vec_to); + executeArray(key_cols, type, full_column.get(), vec_to); } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", @@ -1287,7 +1378,7 @@ private: } template - void executeAny(const KeyType & key, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector::Container & vec_to) const + void executeAny(const KeyColumnsType & key_cols, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector::Container & vec_to) const { WhichDataType which(from_type); @@ -1295,40 +1386,44 @@ private: throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}", icolumn->getName(), icolumn->size(), vec_to.size(), getName()); - if (which.isUInt8()) executeIntType(key, icolumn, vec_to); - else if (which.isUInt16()) executeIntType(key, icolumn, vec_to); - else if (which.isUInt32()) executeIntType(key, icolumn, vec_to); - else if (which.isUInt64()) executeIntType(key, icolumn, vec_to); - else if (which.isUInt128()) executeBigIntType(key, icolumn, vec_to); - else if (which.isUInt256()) executeBigIntType(key, icolumn, vec_to); - else if (which.isInt8()) executeIntType(key, icolumn, vec_to); - else if (which.isInt16()) executeIntType(key, icolumn, vec_to); - else if (which.isInt32()) executeIntType(key, icolumn, vec_to); - else if (which.isInt64()) executeIntType(key, icolumn, vec_to); - else if (which.isInt128()) executeBigIntType(key, icolumn, vec_to); - else if (which.isInt256()) executeBigIntType(key, icolumn, vec_to); - else if (which.isUUID()) executeBigIntType(key, icolumn, vec_to); - else if (which.isIPv4()) executeIntType(key, icolumn, vec_to); - else if (which.isIPv6()) executeBigIntType(key, icolumn, vec_to); - else if (which.isEnum8()) executeIntType(key, icolumn, vec_to); - else if (which.isEnum16()) executeIntType(key, icolumn, vec_to); - else if (which.isDate()) executeIntType(key, icolumn, vec_to); - else if (which.isDate32()) executeIntType(key, icolumn, vec_to); - else if (which.isDateTime()) executeIntType(key, icolumn, vec_to); + if constexpr (Keyed) + if (key_cols.size() != vec_to.size() && key_cols.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Key column size {} doesn't match result column size {} of function {}", key_cols.size(), vec_to.size(), getName()); + + if (which.isUInt8()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isUInt16()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isUInt32()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isUInt64()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isUInt128()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isUInt256()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isInt8()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isInt16()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isInt32()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isInt64()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isInt128()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isInt256()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isUUID()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isIPv4()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isIPv6()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isEnum8()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isEnum16()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isDate()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isDate32()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isDateTime()) executeIntType(key_cols, icolumn, vec_to); /// TODO: executeIntType() for Decimal32/64 leads to incompatible result - else if (which.isDecimal32()) executeBigIntType(key, icolumn, vec_to); - else if (which.isDecimal64()) executeBigIntType(key, icolumn, vec_to); - else if (which.isDecimal128()) executeBigIntType(key, icolumn, vec_to); - else if (which.isDecimal256()) executeBigIntType(key, icolumn, vec_to); - else if (which.isFloat32()) executeIntType(key, icolumn, vec_to); - else if (which.isFloat64()) executeIntType(key, icolumn, vec_to); - else if (which.isString()) executeString(key, icolumn, vec_to); - else if (which.isFixedString()) executeString(key, icolumn, vec_to); - else if (which.isArray()) executeArray(key, from_type, icolumn, vec_to); - else executeGeneric(key, icolumn, vec_to); + else if (which.isDecimal32()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isDecimal64()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isDecimal128()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isDecimal256()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isFloat32()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isFloat64()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isString()) executeString(key_cols, icolumn, vec_to); + else if (which.isFixedString()) executeString(key_cols, icolumn, vec_to); + else if (which.isArray()) executeArray(key_cols, from_type, icolumn, vec_to); + else executeGeneric(key_cols, icolumn, vec_to); } - void executeForArgument(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first) const + void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first) const { /// Flattening of tuples. if (const ColumnTuple * tuple = typeid_cast(column)) @@ -1337,7 +1432,7 @@ private: const DataTypes & tuple_types = typeid_cast(*type).getElements(); size_t tuple_size = tuple_columns.size(); for (size_t i = 0; i < tuple_size; ++i) - executeForArgument(key, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first); + executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first); } else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData(column)) { @@ -1347,24 +1442,24 @@ private: for (size_t i = 0; i < tuple_size; ++i) { auto tmp = ColumnConst::create(tuple_columns[i], column->size()); - executeForArgument(key, tuple_types[i].get(), tmp.get(), vec_to, is_first); + executeForArgument(key_cols, tuple_types[i].get(), tmp.get(), vec_to, is_first); } } else if (const auto * map = checkAndGetColumn(column)) { const auto & type_map = assert_cast(*type); - executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first); + executeForArgument(key_cols, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first); } else if (const auto * const_map = checkAndGetColumnConst(column)) { - executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first); + executeForArgument(key_cols, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first); } else { if (is_first) - executeAny(key, type, column, vec_to); + executeAny(key_cols, type, column, vec_to); else - executeAny(key, type, column, vec_to); + executeAny(key_cols, type, column, vec_to); } is_first = false; @@ -1395,34 +1490,40 @@ public: { auto col_to = ColumnVector::create(input_rows_count); - typename ColumnVector::Container & vec_to = col_to->getData(); - - /// If using a "keyed" algorithm, the first argument is the key and - /// the data starts from the second argument. - /// Otherwise there is no key and all arguments are interpreted as data. - constexpr size_t first_data_argument = Keyed; - - if (arguments.size() <= first_data_argument) + if (input_rows_count != 0) { - /// Return a fixed random-looking magic number when input is empty - vec_to.assign(input_rows_count, static_cast(0xe28dbde7fe22e41c)); - } + typename ColumnVector::Container & vec_to = col_to->getData(); - KeyType key{}; - if constexpr (Keyed) - if (!arguments.empty()) - key = Impl::parseKey(arguments[0]); + /// If using a "keyed" algorithm, the first argument is the key and + /// the data starts from the second argument. + /// Otherwise there is no key and all arguments are interpreted as data. + constexpr size_t first_data_argument = Keyed; - /// The function supports arbitrary number of arguments of arbitrary types. - bool is_first_argument = true; - for (size_t i = first_data_argument; i < arguments.size(); ++i) - { - const auto & col = arguments[i]; - executeForArgument(key, col.type.get(), col.column.get(), vec_to, is_first_argument); + if (arguments.size() <= first_data_argument) + { + /// Return a fixed random-looking magic number when input is empty + vec_to.assign(input_rows_count, static_cast(0xe28dbde7fe22e41c)); + } + + KeyColumnsType key_cols{}; + if constexpr (Keyed) + if (!arguments.empty()) + key_cols = Impl::parseKeyColumns(arguments[0]); + + /// The function supports arbitrary number of arguments of arbitrary types. + bool is_first_argument = true; + for (size_t i = first_data_argument; i < arguments.size(); ++i) + { + const auto & col = arguments[i]; + executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument); + } } if constexpr (std::is_same_v) /// backward-compatible { + if constexpr (std::endian::native == std::endian::big) + std::ranges::for_each(col_to->getData(), transformEndianness); + auto col_to_fixed_string = ColumnFixedString::create(sizeof(UInt128)); const auto & data = col_to->getData(); auto & chars = col_to_fixed_string->getChars(); @@ -1453,17 +1554,19 @@ public: ) // DECLARE_MULTITARGET_CODE -template -class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash +template +class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash { public: explicit FunctionAnyHash(ContextPtr context) : selector(context) { - selector.registerImplementation>(); + selector + .registerImplementation>(); #if USE_MULTITARGET_CODE - selector.registerImplementation>(); - selector.registerImplementation>(); + selector.registerImplementation>(); + selector + .registerImplementation>(); #endif } @@ -1676,21 +1779,8 @@ struct ImplWyHash64 static constexpr auto name = "wyHash64"; using ReturnType = UInt64; - static UInt64 apply(const char * s, const size_t len) - { - return wyhash(s, len, 0, _wyp); - } - static UInt64 combineHashes(UInt64 h1, UInt64 h2) - { - union - { - UInt64 u64[2]; - char chars[16]; - }; - u64[0] = h1; - u64[1] = h2; - return apply(chars, 16); - } + static UInt64 apply(const char * s, const size_t len) { return wyhash(s, len, 0, _wyp); } + static UInt64 combineHashes(UInt64 h1, UInt64 h2) { return combineHashesFunc(h1, h2); } static constexpr bool use_int_hash_for_pods = false; }; @@ -1699,7 +1789,7 @@ struct NameIntHash32 { static constexpr auto name = "intHash32"; }; struct NameIntHash64 { static constexpr auto name = "intHash64"; }; using FunctionSipHash64 = FunctionAnyHash; -using FunctionSipHash64Keyed = FunctionAnyHash; +using FunctionSipHash64Keyed = FunctionAnyHash; using FunctionIntHash32 = FunctionIntHash; using FunctionIntHash64 = FunctionIntHash; #if USE_SSL @@ -1713,8 +1803,10 @@ using FunctionSHA384 = FunctionStringHashFixedString; using FunctionSHA512 = FunctionStringHashFixedString; #endif using FunctionSipHash128 = FunctionAnyHash; -using FunctionSipHash128Keyed = FunctionAnyHash; +using FunctionSipHash128Keyed = FunctionAnyHash; using FunctionSipHash128Reference = FunctionAnyHash; +using FunctionSipHash128ReferenceKeyed + = FunctionAnyHash; using FunctionCityHash64 = FunctionAnyHash; using FunctionFarmFingerprint64 = FunctionAnyHash; using FunctionFarmHash64 = FunctionAnyHash; diff --git a/src/Functions/FunctionsHashingMisc.cpp b/src/Functions/FunctionsHashingMisc.cpp index 56c3c1ed00c..f56568b2508 100644 --- a/src/Functions/FunctionsHashingMisc.cpp +++ b/src/Functions/FunctionsHashingMisc.cpp @@ -20,6 +20,11 @@ REGISTER_FUNCTION(Hashing) .examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))", ""}}, .categories{"Hash"} }); + factory.registerFunction(FunctionDocumentation{ + .description = "Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument " + "instead of using a fixed key.", + .examples{{"hash", "SELECT hex(sipHash128ReferenceKeyed((506097522914230528, 1084818905618843912),'foo', '\\x01', 3));", ""}}, + .categories{"Hash"}}); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index ca797eed856..094de0c27c2 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -336,7 +336,7 @@ private: template typename Impl> -class ExecutableFunctionJSON : public IExecutableFunction, WithContext +class ExecutableFunctionJSON : public IExecutableFunction { public: diff --git a/src/Functions/FunctionsLanguageClassification.cpp b/src/Functions/FunctionsLanguageClassification.cpp index 6088fd52efa..55485d41ce0 100644 --- a/src/Functions/FunctionsLanguageClassification.cpp +++ b/src/Functions/FunctionsLanguageClassification.cpp @@ -5,19 +5,17 @@ #include #include #include -#include #include #include #include -#include #include #include #include #include -#include #include + namespace DB { /* Determine language of Unicode UTF-8 text. diff --git a/src/Functions/FunctionsProgrammingClassification.cpp b/src/Functions/FunctionsProgrammingClassification.cpp index 8a552a30e65..a93e1d9a87d 100644 --- a/src/Functions/FunctionsProgrammingClassification.cpp +++ b/src/Functions/FunctionsProgrammingClassification.cpp @@ -1,4 +1,7 @@ #include + +#if USE_NLP + #include #include #include @@ -118,3 +121,5 @@ REGISTER_FUNCTION(DetectProgrammingLanguage) } } + +#endif diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp index d6873d9490e..ff8ff2d2651 100644 --- a/src/Functions/FunctionsStringHash.cpp +++ b/src/Functions/FunctionsStringHash.cpp @@ -292,8 +292,8 @@ struct SimHashImpl continue; // we need to store the new word hash value to the oldest location. - // for example, N = 5, array |a0|a1|a2|a3|a4|, now , a0 is the oldest location, - // so we need to store new word hash into location of a0, then ,this array become + // for example, N = 5, array |a0|a1|a2|a3|a4|, now, a0 is the oldest location, + // so we need to store new word hash into location of a0, then this array become // |a5|a1|a2|a3|a4|, next time, a1 become the oldest location, we need to store new // word hash value into location of a1, then array become |a5|a6|a2|a3|a4| words[offset] = BytesRef{word_start, length}; @@ -793,4 +793,3 @@ REGISTER_FUNCTION(StringHash) factory.registerFunction(); } } - diff --git a/src/Functions/FunctionsTonalityClassification.cpp b/src/Functions/FunctionsTonalityClassification.cpp index e39f9c63758..3de38d99c88 100644 --- a/src/Functions/FunctionsTonalityClassification.cpp +++ b/src/Functions/FunctionsTonalityClassification.cpp @@ -1,4 +1,7 @@ #include + +#if USE_NLP + #include #include #include @@ -87,3 +90,5 @@ REGISTER_FUNCTION(DetectTonality) } } + +#endif diff --git a/src/Functions/GatherUtils/sliceHasImplAnyAll.h b/src/Functions/GatherUtils/sliceHasImplAnyAll.h index 21c80b742fd..99bf1a7cc33 100644 --- a/src/Functions/GatherUtils/sliceHasImplAnyAll.h +++ b/src/Functions/GatherUtils/sliceHasImplAnyAll.h @@ -375,14 +375,14 @@ bool sliceHasImplAnyAllImplInt16( _mm256_or_si256( _mm256_andnot_si256( _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)), - _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)))), + _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)))), _mm256_andnot_si256( _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6)), _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6))))), _mm256_or_si256( _mm256_andnot_si256( _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)), - _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data ,1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)))), + _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)))), _mm256_andnot_si256( _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2)), _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2)))))) diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp new file mode 100644 index 00000000000..f28194781c2 --- /dev/null +++ b/src/Functions/GregorianDate.cpp @@ -0,0 +1,376 @@ +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; + extern const int CANNOT_PARSE_DATE; + extern const int CANNOT_FORMAT_DATETIME; + extern const int LOGICAL_ERROR; +} + +namespace +{ + inline constexpr bool is_leap_year(int32_t year) + { + return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0)); + } + + inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month) + { + switch (month) + { + case 1: return 31; + case 2: return is_leap_year ? 29 : 28; + case 3: return 31; + case 4: return 30; + case 5: return 31; + case 6: return 30; + case 7: return 31; + case 8: return 31; + case 9: return 30; + case 10: return 31; + case 11: return 30; + case 12: return 31; + default: + std::terminate(); + } + } + + /** Integer division truncated toward negative infinity. + */ + template + inline constexpr I div(I x, J y) + { + const auto y_cast = static_cast(y); + if (x > 0 && y_cast < 0) + return ((x - 1) / y_cast) - 1; + else if (x < 0 && y_cast > 0) + return ((x + 1) / y_cast) - 1; + else + return x / y_cast; + } + + /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x. + */ + template + inline constexpr I mod(I x, J y) + { + const auto y_cast = static_cast(y); + const auto r = x % y_cast; + if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0)) + return r == 0 ? static_cast(0) : r + y_cast; + else + return r; + } + + /** Like std::min(), but the type of operands may differ. + */ + template + inline constexpr I min(I x, J y) + { + const auto y_cast = static_cast(y); + return x < y_cast ? x : y_cast; + } + + inline char readDigit(ReadBuffer & in) + { + char c; + if (!in.read(c)) + throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot parse input: expected a digit at the end of stream"); + else if (c < '0' || c > '9') + throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot read input: expected a digit but got something else"); + else + return c - '0'; + } + + inline bool tryReadDigit(ReadBuffer & in, char & c) + { + if (in.read(c) && c >= '0' && c <= '9') + { + c -= '0'; + return true; + } + + return false; + } +} + +void GregorianDate::init(ReadBuffer & in) +{ + year_ = readDigit(in) * 1000 + + readDigit(in) * 100 + + readDigit(in) * 10 + + readDigit(in); + + assertChar('-', in); + + month_ = readDigit(in) * 10 + + readDigit(in); + + assertChar('-', in); + + day_of_month_ = readDigit(in) * 10 + + readDigit(in); + + assertEOF(in); + + if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_)) + throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date, out of range (year: {}, month: {}, day_of_month: {})."); +} + +bool GregorianDate::tryInit(ReadBuffer & in) +{ + char c[8]; + + if ( !tryReadDigit(in, c[0]) + || !tryReadDigit(in, c[1]) + || !tryReadDigit(in, c[2]) + || !tryReadDigit(in, c[3]) + || !checkChar('-', in) + || !tryReadDigit(in, c[4]) + || !tryReadDigit(in, c[5]) + || !checkChar('-', in) + || !tryReadDigit(in, c[6]) + || !tryReadDigit(in, c[7]) + || !in.eof()) + { + return false; + } + + year_ = c[0] * 1000 + c[1] * 100 + c[2] * 10 + c[3]; + month_ = c[4] * 10 + c[5]; + day_of_month_ = c[6] * 10 + c[7]; + + if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_)) + return false; + + return true; +} + +GregorianDate::GregorianDate(ReadBuffer & in) +{ + init(in); +} + +void GregorianDate::init(int64_t modified_julian_day) +{ + const OrdinalDate ord(modified_julian_day); + const MonthDay md(is_leap_year(ord.year()), ord.dayOfYear()); + + year_ = ord.year(); + month_ = md.month(); + day_of_month_ = md.dayOfMonth(); +} + +bool GregorianDate::tryInit(int64_t modified_julian_day) +{ + OrdinalDate ord; + if (!ord.tryInit(modified_julian_day)) + return false; + + MonthDay md(is_leap_year(ord.year()), ord.dayOfYear()); + + year_ = ord.year(); + month_ = md.month(); + day_of_month_ = md.dayOfMonth(); + + return true; +} + +GregorianDate::GregorianDate(int64_t modified_julian_day) +{ + init(modified_julian_day); +} + +int64_t GregorianDate::toModifiedJulianDay() const +{ + const MonthDay md(month_, day_of_month_); + + const auto day_of_year = md.dayOfYear(is_leap_year(year_)); + + const OrdinalDate ord(year_, day_of_year); + return ord.toModifiedJulianDay(); +} + +bool GregorianDate::tryToModifiedJulianDay(int64_t & res) const +{ + const MonthDay md(month_, day_of_month_); + const auto day_of_year = md.dayOfYear(is_leap_year(year_)); + OrdinalDate ord; + + if (!ord.tryInit(year_, day_of_year)) + return false; + + res = ord.toModifiedJulianDay(); + return true; +} + +template +ReturnType GregorianDate::writeImpl(WriteBuffer & buf) const +{ + if (year_ < 0 || year_ > 9999) + { + if constexpr (std::is_same_v) + throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME, + "Impossible to stringify: year too big or small: {}", year_); + else + return false; + } + else + { + auto y = year_; + writeChar('0' + y / 1000, buf); y %= 1000; + writeChar('0' + y / 100, buf); y %= 100; + writeChar('0' + y / 10, buf); y %= 10; + writeChar('0' + y , buf); + + writeChar('-', buf); + + auto m = month_; + writeChar('0' + m / 10, buf); m %= 10; + writeChar('0' + m , buf); + + writeChar('-', buf); + + auto d = day_of_month_; + writeChar('0' + d / 10, buf); d %= 10; + writeChar('0' + d , buf); + } + + return ReturnType(true); +} + +std::string GregorianDate::toString() const +{ + WriteBufferFromOwnString buf; + write(buf); + return buf.str(); +} + +void OrdinalDate::init(int32_t year, uint16_t day_of_year) +{ + year_ = year; + day_of_year_ = day_of_year; + + if (day_of_year < 1 || day_of_year > (is_leap_year(year) ? 366 : 365)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", year, day_of_year); +} + +bool OrdinalDate::tryInit(int32_t year, uint16_t day_of_year) +{ + year_ = year; + day_of_year_ = day_of_year; + + return !(day_of_year < 1 || day_of_year > (is_leap_year(year) ? 366 : 365)); +} + +void OrdinalDate::init(int64_t modified_julian_day) +{ + if (!tryInit(modified_julian_day)) + throw Exception( + ErrorCodes::CANNOT_FORMAT_DATETIME, + "Value cannot be represented as date because it's out of range"); +} + +bool OrdinalDate::tryInit(int64_t modified_julian_day) +{ + /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively). + + if (modified_julian_day < -678941) + return false; + + if (modified_julian_day > 2973119) + return false; + + const auto a = modified_julian_day + 678575; + const auto quad_cent = div(a, 146097); + const auto b = mod(a, 146097); + const auto cent = min(div(b, 36524), 3); + const auto c = b - cent * 36524; + const auto quad = div(c, 1461); + const auto d = mod(c, 1461); + const auto y = min(div(d, 365), 3); + + day_of_year_ = d - y * 365 + 1; + year_ = static_cast(quad_cent * 400 + cent * 100 + quad * 4 + y + 1); + + return true; +} + + +OrdinalDate::OrdinalDate(int32_t year, uint16_t day_of_year) +{ + init(year, day_of_year); +} + +OrdinalDate::OrdinalDate(int64_t modified_julian_day) +{ + init(modified_julian_day); +} + +int64_t OrdinalDate::toModifiedJulianDay() const noexcept +{ + const auto y = year_ - 1; + + return day_of_year_ + + 365 * y + + div(y, 4) + - div(y, 100) + + div(y, 400) + - 678576; +} + +MonthDay::MonthDay(uint8_t month, uint8_t day_of_month) + : month_(month) + , day_of_month_(day_of_month) +{ + if (month < 1 || month > 12) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", month); + /* We can't validate day_of_month here, because we don't know if + * it's a leap year. */ +} + +MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year) +{ + if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year: {}{}", + (is_leap_year ? "leap, " : "non-leap, "), day_of_year); + + month_ = 1; + uint16_t d = day_of_year; + while (true) + { + const auto len = monthLength(is_leap_year, month_); + if (d <= len) + break; + ++month_; + d -= len; + } + day_of_month_ = d; +} + +uint16_t MonthDay::dayOfYear(bool is_leap_year) const +{ + if (day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year, month_)) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}", + (is_leap_year ? "leap, " : "non-leap, "), month_, day_of_month_); + } + const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2; + return (367 * month_ - 362) / 12 + k + day_of_month_; +} + +template void GregorianDate::writeImpl(WriteBuffer & buf) const; +template bool GregorianDate::writeImpl(WriteBuffer & buf) const; + +} diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h index 63bc443fa31..2528223443e 100644 --- a/src/Functions/GregorianDate.h +++ b/src/Functions/GregorianDate.h @@ -1,408 +1,155 @@ #pragma once -#include -#include #include -#include -#include -#include -#include - -#include namespace DB { - namespace ErrorCodes - { - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; - extern const int CANNOT_PARSE_DATE; - extern const int CANNOT_FORMAT_DATETIME; - extern const int LOGICAL_ERROR; - } - /** Proleptic Gregorian calendar date. YearT is an integral type +class ReadBuffer; +class WriteBuffer; + +/// Proleptic Gregorian calendar date. +class GregorianDate +{ +public: + GregorianDate() {} + + void init(ReadBuffer & in); + bool tryInit(ReadBuffer & in); + + /** Construct from date in text form 'YYYY-MM-DD' by reading from + * ReadBuffer. + */ + explicit GregorianDate(ReadBuffer & in); + + void init(int64_t modified_julian_day); + bool tryInit(int64_t modified_julian_day); + + /** Construct from Modified Julian Day. The type T is an + * integral type which should be at least 32 bits wide, and + * should preferably signed. + */ + explicit GregorianDate(int64_t modified_julian_day); + + /** Convert to Modified Julian Day. The type T is an integral type * which should be at least 32 bits wide, and should preferably - * be signed. - */ - template - class GregorianDate + * signed. + */ + int64_t toModifiedJulianDay() const; + bool tryToModifiedJulianDay(int64_t & res) const; + + /** Write the date in text form 'YYYY-MM-DD' to a buffer. + */ + void write(WriteBuffer & buf) const { - public: - /** Construct from date in text form 'YYYY-MM-DD' by reading from - * ReadBuffer. - */ - explicit GregorianDate(ReadBuffer & in); + writeImpl(buf); + } - /** Construct from Modified Julian Day. The type T is an - * integral type which should be at least 32 bits wide, and - * should preferably signed. - */ - explicit GregorianDate(is_integer auto modified_julian_day); - - /** Convert to Modified Julian Day. The type T is an integral type - * which should be at least 32 bits wide, and should preferably - * signed. - */ - template - T toModifiedJulianDay() const; - - /** Write the date in text form 'YYYY-MM-DD' to a buffer. - */ - void write(WriteBuffer & buf) const; - - /** Convert to a string in text form 'YYYY-MM-DD'. - */ - std::string toString() const; - - YearT year() const noexcept - { - return year_; - } - - uint8_t month() const noexcept - { - return month_; - } - - uint8_t day_of_month() const noexcept /// NOLINT - { - return day_of_month_; - } - - private: - YearT year_; /// NOLINT - uint8_t month_; /// NOLINT - uint8_t day_of_month_; /// NOLINT - }; - - /** ISO 8601 Ordinal Date. YearT is an integral type which should - * be at least 32 bits wide, and should preferably signed. - */ - template - class OrdinalDate + bool tryWrite(WriteBuffer & buf) const { - public: - OrdinalDate(YearT year, uint16_t day_of_year); + return writeImpl(buf); + } - /** Construct from Modified Julian Day. The type T is an - * integral type which should be at least 32 bits wide, and - * should preferably signed. - */ - template - explicit OrdinalDate(DayT modified_julian_day); + /** Convert to a string in text form 'YYYY-MM-DD'. + */ + std::string toString() const; - /** Convert to Modified Julian Day. The type T is an integral - * type which should be at least 32 bits wide, and should - * preferably be signed. - */ - template - T toModifiedJulianDay() const noexcept; - - YearT year() const noexcept - { - return year_; - } - - uint16_t dayOfYear() const noexcept - { - return day_of_year_; - } - - private: - YearT year_; /// NOLINT - uint16_t day_of_year_; /// NOLINT - }; - - class MonthDay + int32_t year() const noexcept { - public: - /** Construct from month and day. */ - MonthDay(uint8_t month, uint8_t day_of_month); + return year_; + } - /** Construct from day of year in Gregorian or Julian - * calendars to month and day. - */ - MonthDay(bool is_leap_year, uint16_t day_of_year); + uint8_t month() const noexcept + { + return month_; + } - /** Convert month and day in Gregorian or Julian calendars to - * day of year. - */ - uint16_t dayOfYear(bool is_leap_year) const; + uint8_t dayOfMonth() const noexcept + { + return day_of_month_; + } - uint8_t month() const noexcept - { - return month_; - } +private: + int32_t year_ = 0; + uint8_t month_ = 0; + uint8_t day_of_month_ = 0; - uint8_t day_of_month() const noexcept /// NOLINT - { - return day_of_month_; - } + template + ReturnType writeImpl(WriteBuffer & buf) const; +}; - private: - uint8_t month_; /// NOLINT - uint8_t day_of_month_; /// NOLINT - }; -} - -/* Implementation */ - -namespace gd +/** ISO 8601 Ordinal Date. + */ +class OrdinalDate { - using namespace DB; +public: + OrdinalDate() {} - template - static inline constexpr bool is_leap_year(YearT year) - { - return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0)); - } + void init(int32_t year, uint16_t day_of_year); + bool tryInit(int32_t year, uint16_t day_of_year); - static inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month) - { - switch (month) - { - case 1: return 31; - case 2: return is_leap_year ? 29 : 28; - case 3: return 31; - case 4: return 30; - case 5: return 31; - case 6: return 30; - case 7: return 31; - case 8: return 31; - case 9: return 30; - case 10: return 31; - case 11: return 30; - case 12: return 31; - default: - std::terminate(); - } - } + void init(int64_t modified_julian_day); + bool tryInit(int64_t modified_julian_day); - /** Integer division truncated toward negative infinity. + OrdinalDate(int32_t year, uint16_t day_of_year); + + /** Construct from Modified Julian Day. The type T is an + * integral type which should be at least 32 bits wide, and + * should preferably signed. */ - template - static inline constexpr I div(I x, J y) - { - const auto y_cast = static_cast(y); - if (x > 0 && y_cast < 0) - return ((x - 1) / y_cast) - 1; - else if (x < 0 && y_cast > 0) - return ((x + 1) / y_cast) - 1; - else - return x / y_cast; - } + explicit OrdinalDate(int64_t modified_julian_day); - /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x. + /** Convert to Modified Julian Day. The type T is an integral + * type which should be at least 32 bits wide, and should + * preferably be signed. */ - template - static inline constexpr I mod(I x, J y) + int64_t toModifiedJulianDay() const noexcept; + + int32_t year() const noexcept { - const auto y_cast = static_cast(y); - const auto r = x % y_cast; - if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0)) - return r == 0 ? static_cast(0) : r + y_cast; - else - return r; + return year_; } - /** Like std::min(), but the type of operands may differ. - */ - template - static inline constexpr I min(I x, J y) + uint16_t dayOfYear() const noexcept { - const auto y_cast = static_cast(y); - return x < y_cast ? x : y_cast; + return day_of_year_; } - static inline char readDigit(ReadBuffer & in) - { - char c; - if (!in.read(c)) - throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot parse input: expected a digit at the end of stream"); - else if (c < '0' || c > '9') - throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot read input: expected a digit but got something else"); - else - return c - '0'; - } -} +private: + int32_t year_ = 0; + uint16_t day_of_year_ = 0; +}; -namespace DB +class MonthDay { - template - GregorianDate::GregorianDate(ReadBuffer & in) +public: + /** Construct from month and day. */ + MonthDay(uint8_t month, uint8_t day_of_month); + + /** Construct from day of year in Gregorian or Julian + * calendars to month and day. + */ + MonthDay(bool is_leap_year, uint16_t day_of_year); + + /** Convert month and day in Gregorian or Julian calendars to + * day of year. + */ + uint16_t dayOfYear(bool is_leap_year) const; + + uint8_t month() const noexcept { - year_ = gd::readDigit(in) * 1000 - + gd::readDigit(in) * 100 - + gd::readDigit(in) * 10 - + gd::readDigit(in); - - assertChar('-', in); - - month_ = gd::readDigit(in) * 10 - + gd::readDigit(in); - - assertChar('-', in); - - day_of_month_ = gd::readDigit(in) * 10 - + gd::readDigit(in); - - assertEOF(in); - - if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > gd::monthLength(gd::is_leap_year(year_), month_)) - throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date: {}", toString()); + return month_; } - template - GregorianDate::GregorianDate(is_integer auto modified_julian_day) + uint8_t dayOfMonth() const noexcept { - const OrdinalDate ord(modified_julian_day); - const MonthDay md(gd::is_leap_year(ord.year()), ord.dayOfYear()); - year_ = ord.year(); - month_ = md.month(); - day_of_month_ = md.day_of_month(); + return day_of_month_; } - template - template - T GregorianDate::toModifiedJulianDay() const - { - const MonthDay md(month_, day_of_month_); - const auto day_of_year = md.dayOfYear(gd::is_leap_year(year_)); - const OrdinalDate ord(year_, day_of_year); - return ord.template toModifiedJulianDay(); - } +private: + uint8_t month_ = 0; + uint8_t day_of_month_ = 0; +}; - template - void GregorianDate::write(WriteBuffer & buf) const - { - if (year_ < 0 || year_ > 9999) - { - throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME, - "Impossible to stringify: year too big or small: {}", DB::toString(year_)); - } - else - { - auto y = year_; - writeChar('0' + y / 1000, buf); y %= 1000; - writeChar('0' + y / 100, buf); y %= 100; - writeChar('0' + y / 10, buf); y %= 10; - writeChar('0' + y , buf); - - writeChar('-', buf); - - auto m = month_; - writeChar('0' + m / 10, buf); m %= 10; - writeChar('0' + m , buf); - - writeChar('-', buf); - - auto d = day_of_month_; - writeChar('0' + d / 10, buf); d %= 10; - writeChar('0' + d , buf); - } - } - - template - std::string GregorianDate::toString() const - { - WriteBufferFromOwnString buf; - write(buf); - return buf.str(); - } - - template - OrdinalDate::OrdinalDate(YearT year, uint16_t day_of_year) - : year_(year) - , day_of_year_(day_of_year) - { - if (day_of_year < 1 || day_of_year > (gd::is_leap_year(year) ? 366 : 365)) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", toString(year), toString(day_of_year)); - } - } - - template - template - OrdinalDate::OrdinalDate(DayT modified_julian_day) - { - /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively). - - if constexpr (is_signed_v && std::numeric_limits::lowest() < -678941) - if (modified_julian_day < -678941) - throw Exception( - ErrorCodes::CANNOT_FORMAT_DATETIME, - "Value cannot be represented as date because it's out of range"); - - if constexpr (std::numeric_limits::max() > 2973119) - if (modified_julian_day > 2973119) - throw Exception( - ErrorCodes::CANNOT_FORMAT_DATETIME, - "Value cannot be represented as date because it's out of range"); - - const auto a = modified_julian_day + 678575; - const auto quad_cent = gd::div(a, 146097); - const auto b = gd::mod(a, 146097); - const auto cent = gd::min(gd::div(b, 36524), 3); - const auto c = b - cent * 36524; - const auto quad = gd::div(c, 1461); - const auto d = gd::mod(c, 1461); - const auto y = gd::min(gd::div(d, 365), 3); - - day_of_year_ = d - y * 365 + 1; - year_ = static_cast(quad_cent * 400 + cent * 100 + quad * 4 + y + 1); - } - - template - template - T OrdinalDate::toModifiedJulianDay() const noexcept - { - const auto y = year_ - 1; - return day_of_year_ - + 365 * y - + gd::div(y, 4) - - gd::div(y, 100) - + gd::div(y, 400) - - 678576; - } - - inline MonthDay::MonthDay(uint8_t month, uint8_t day_of_month) - : month_(month) - , day_of_month_(day_of_month) - { - if (month < 1 || month > 12) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", DB::toString(month)); - /* We can't validate day_of_month here, because we don't know if - * it's a leap year. */ - } - - inline MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year) - { - if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year: {}{}", - (is_leap_year ? "leap, " : "non-leap, "), DB::toString(day_of_year)); - - month_ = 1; - uint16_t d = day_of_year; - while (true) - { - const auto len = gd::monthLength(is_leap_year, month_); - if (d <= len) - break; - month_++; - d -= len; - } - day_of_month_ = d; - } - - inline uint16_t MonthDay::dayOfYear(bool is_leap_year) const - { - if (day_of_month_ < 1 || day_of_month_ > gd::monthLength(is_leap_year, month_)) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}", - (is_leap_year ? "leap, " : "non-leap, "), DB::toString(month_), DB::toString(day_of_month_)); - } - const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2; - return (367 * month_ - 362) / 12 + k + day_of_month_; - } } diff --git a/src/Functions/HTMLCharacterReference.generated.cpp b/src/Functions/HTMLCharacterReference.generated.cpp new file mode 100644 index 00000000000..3f9062ab9fa --- /dev/null +++ b/src/Functions/HTMLCharacterReference.generated.cpp @@ -0,0 +1,17877 @@ +/* C++ code produced by gperf version 3.1 */ +/* Command-line: /usr/bin/gperf -t --output-file=HTMLCharacterReference.generated.cpp HTMLCharacterReference.gperf */ +/* Computed positions: -k'1-8,12,14' */ + +#if !( \ + (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) && (')' == 41) \ + && ('*' == 42) && ('+' == 43) && (',' == 44) && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) && ('1' == 49) && ('2' == 50) \ + && ('3' == 51) && ('4' == 52) && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) && ('9' == 57) && (':' == 58) && (';' == 59) \ + && ('<' == 60) && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) && ('N' == 78) \ + && ('O' == 79) && ('P' == 80) && ('Q' == 81) && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) && ('V' == 86) && ('W' == 87) \ + && ('X' == 88) && ('Y' == 89) && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) && ('^' == 94) && ('_' == 95) \ + && ('a' == 97) && ('b' == 98) && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) && ('g' == 103) && ('h' == 104) \ + && ('i' == 105) && ('j' == 106) && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) && ('o' == 111) && ('p' == 112) \ + && ('q' == 113) && ('r' == 114) && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) && ('w' == 119) && ('x' == 120) \ + && ('y' == 121) && ('z' == 122) && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +# error "gperf generated tables don't work with this execution character set. Please report a bug to ." +#endif + +#line 7 "HTMLCharacterReference.gperf" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wimplicit-fallthrough" +#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant" +#pragma GCC diagnostic ignored "-Wunused-macros" +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#pragma GCC diagnostic ignored "-Wshorten-64-to-32" +// NOLINTBEGIN(google-runtime-int,hicpp-use-nullptr,modernize-use-nullptr) +#line 16 "HTMLCharacterReference.gperf" +struct NameAndGlyph +{ + const char * name; + const char * glyph; +}; +#include + +#define TOTAL_KEYWORDS 2231 +#define MIN_WORD_LENGTH 2 +#define MAX_WORD_LENGTH 32 +#define MIN_HASH_VALUE 2 +#define MAX_HASH_VALUE 15511 +/* maximum key range = 15510, duplicates = 0 */ + +class HTMLCharacterHash +{ +private: + static inline unsigned int hash(const char * str, size_t len); + +public: + static const struct NameAndGlyph * Lookup(const char * str, size_t len); +}; + +inline unsigned int HTMLCharacterHash::hash(const char * str, size_t len) +{ + static const unsigned short asso_values[] + = {15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, + 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, + 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 0, 60, + 15, 20, 25, 15512, 90, 280, 0, 0, 0, 15512, 5, 3060, 3035, 30, 230, 2900, 1985, 3425, + 320, 185, 3555, 0, 420, 1685, 970, 1835, 1850, 430, 745, 210, 770, 205, 590, 480, 1595, 290, + 350, 900, 3370, 1240, 90, 730, 545, 1210, 30, 1340, 1135, 500, 250, 645, 190, 2210, 820, 3260, + 2230, 3545, 20, 145, 15, 50, 10, 100, 0, 55, 220, 25, 2440, 5, 1570, 610, 3951, 4666, + 320, 3633, 3130, 2755, 3874, 120, 110, 755, 1430, 1250, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, + 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, + 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, + 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, + 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, + 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, + 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, + 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512, 15512}; + unsigned int hval = len; + + switch (hval) + { + default: + hval += asso_values[static_cast(str[13])]; + /*FALLTHROUGH*/ + case 13: + case 12: + hval += asso_values[static_cast(str[11])]; + /*FALLTHROUGH*/ + case 11: + case 10: + case 9: + case 8: + hval += asso_values[static_cast(str[7])]; + /*FALLTHROUGH*/ + case 7: + hval += asso_values[static_cast(str[6] + 1)]; + /*FALLTHROUGH*/ + case 6: + hval += asso_values[static_cast(str[5] + 2)]; + /*FALLTHROUGH*/ + case 5: + hval += asso_values[static_cast(str[4] + 3)]; + /*FALLTHROUGH*/ + case 4: + hval += asso_values[static_cast(str[3] + 5)]; + /*FALLTHROUGH*/ + case 3: + hval += asso_values[static_cast(str[2] + 1)]; + /*FALLTHROUGH*/ + case 2: + hval += asso_values[static_cast(str[1])]; + /*FALLTHROUGH*/ + case 1: + hval += asso_values[static_cast(str[0] + 13)]; + break; + } + return hval; +} + +const struct NameAndGlyph * HTMLCharacterHash::Lookup(const char * str, size_t len) +{ + static const struct NameAndGlyph wordlist[] + = {{""}, + {""}, +#line 1155 "HTMLCharacterReference.gperf" + {"gt", ">"}, +#line 1156 "HTMLCharacterReference.gperf" + {"gt;", ">"}, + {""}, + {""}, + {""}, +#line 1410 "HTMLCharacterReference.gperf" + {"lt", "<"}, +#line 1411 "HTMLCharacterReference.gperf" + {"lt;", "<"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 704 "HTMLCharacterReference.gperf" + {"ap;", "≈"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1398 "HTMLCharacterReference.gperf" + {"lrm;", "‎"}, + {""}, + {""}, + {""}, + {""}, +#line 1062 "HTMLCharacterReference.gperf" + {"eta;", "η"}, +#line 1044 "HTMLCharacterReference.gperf" + {"epsi;", "ε"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1046 "HTMLCharacterReference.gperf" + {"epsiv;", "ϵ"}, + {""}, + {""}, + {""}, + {""}, +#line 1148 "HTMLCharacterReference.gperf" + {"gnsim;", "⋧"}, + {""}, + {""}, + {""}, + {""}, +#line 1373 "HTMLCharacterReference.gperf" + {"lnsim;", "⋦"}, + {""}, + {""}, + {""}, +#line 601 "HTMLCharacterReference.gperf" + {"Upsi;", "ϒ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1392 "HTMLCharacterReference.gperf" + {"lpar;", "("}, + {""}, + {""}, + {""}, + {""}, +#line 1041 "HTMLCharacterReference.gperf" + {"epar;", "⋕"}, + {""}, + {""}, + {""}, + {""}, +#line 1038 "HTMLCharacterReference.gperf" + {"ensp;", " "}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1049 "HTMLCharacterReference.gperf" + {"eqsim;", "≂"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1143 "HTMLCharacterReference.gperf" + {"gnap;", "⪊"}, + {""}, + {""}, + {""}, + {""}, +#line 1368 "HTMLCharacterReference.gperf" + {"lnap;", "⪉"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2198 "HTMLCharacterReference.gperf" + {"wr;", "≀"}, + {""}, + {""}, + {""}, + {""}, +#line 2197 "HTMLCharacterReference.gperf" + {"wp;", "℘"}, +#line 917 "HTMLCharacterReference.gperf" + {"cup;", "∪"}, +#line 1420 "HTMLCharacterReference.gperf" + {"ltri;", "◃"}, +#line 1394 "HTMLCharacterReference.gperf" + {"lrarr;", "⇆"}, + {""}, + {""}, + {""}, + {""}, +#line 1058 "HTMLCharacterReference.gperf" + {"erarr;", "⥱"}, + {""}, + {""}, +#line 1065 "HTMLCharacterReference.gperf" + {"euml", "ë"}, +#line 1066 "HTMLCharacterReference.gperf" + {"euml;", "ë"}, +#line 903 "HTMLCharacterReference.gperf" + {"crarr;", "↵"}, + {""}, + {""}, + {""}, +#line 1179 "HTMLCharacterReference.gperf" + {"hbar;", "ℏ"}, + {""}, + {""}, + {""}, +#line 720 "HTMLCharacterReference.gperf" + {"auml", "ä"}, +#line 721 "HTMLCharacterReference.gperf" + {"auml;", "ä"}, +#line 1303 "HTMLCharacterReference.gperf" + {"lbarr;", "⤌"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 606 "HTMLCharacterReference.gperf" + {"Uuml", "Ü"}, +#line 607 "HTMLCharacterReference.gperf" + {"Uuml;", "Ü"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1067 "HTMLCharacterReference.gperf" + {"euro;", "€"}, + {""}, + {""}, + {""}, + {""}, +#line 998 "HTMLCharacterReference.gperf" + {"dtri;", "▿"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 922 "HTMLCharacterReference.gperf" + {"cupor;", "⩅"}, + {""}, + {""}, +#line 715 "HTMLCharacterReference.gperf" + {"ast;", "*"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 774 "HTMLCharacterReference.gperf" + {"bnot;", "⌐"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 995 "HTMLCharacterReference.gperf" + {"dsol;", "⧶"}, +#line 1000 "HTMLCharacterReference.gperf" + {"duarr;", "⇵"}, + {""}, +#line 1250 "HTMLCharacterReference.gperf" + {"it;", "⁢"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1037 "HTMLCharacterReference.gperf" + {"eng;", "ŋ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 684 "HTMLCharacterReference.gperf" + {"ang;", "∠"}, +#line 891 "HTMLCharacterReference.gperf" + {"comp;", "∁"}, + {""}, + {""}, +#line 1225 "HTMLCharacterReference.gperf" + {"in;", "∈"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 979 "HTMLCharacterReference.gperf" + {"dot;", "˙"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1006 "HTMLCharacterReference.gperf" + {"eDot;", "≑"}, +#line 1375 "HTMLCharacterReference.gperf" + {"loarr;", "⇽"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 668 "HTMLCharacterReference.gperf" + {"af;", "⁡"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1145 "HTMLCharacterReference.gperf" + {"gne;", "⪈"}, +#line 836 "HTMLCharacterReference.gperf" + {"bump;", "≎"}, + {""}, + {""}, + {""}, +#line 1370 "HTMLCharacterReference.gperf" + {"lne;", "⪇"}, + {""}, +#line 696 "HTMLCharacterReference.gperf" + {"angrt;", "∟"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 325 "HTMLCharacterReference.gperf" + {"Lt;", "≪"}, +#line 707 "HTMLCharacterReference.gperf" + {"ape;", "≊"}, +#line 733 "HTMLCharacterReference.gperf" + {"bbrk;", "⎵"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1230 "HTMLCharacterReference.gperf" + {"int;", "∫"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1054 "HTMLCharacterReference.gperf" + {"equiv;", "≡"}, + {""}, + {""}, + {""}, +#line 831 "HTMLCharacterReference.gperf" + {"bsol;", "\\"}, +#line 1188 "HTMLCharacterReference.gperf" + {"hoarr;", "⇿"}, + {""}, + {""}, + {""}, + {""}, +#line 1421 "HTMLCharacterReference.gperf" + {"ltrie;", "⊴"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1042 "HTMLCharacterReference.gperf" + {"eparsl;", "⧣"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1090 "HTMLCharacterReference.gperf" + {"frac12", "½"}, +#line 1091 "HTMLCharacterReference.gperf" + {"frac12;", "½"}, + {""}, + {""}, + {""}, +#line 2182 "HTMLCharacterReference.gperf" + {"vprop;", "∝"}, + {""}, + {""}, + {""}, + {""}, +#line 1307 "HTMLCharacterReference.gperf" + {"lbrke;", "⦋"}, + {""}, + {""}, +#line 1253 "HTMLCharacterReference.gperf" + {"iuml", "ï"}, +#line 1254 "HTMLCharacterReference.gperf" + {"iuml;", "ï"}, +#line 1093 "HTMLCharacterReference.gperf" + {"frac14", "¼"}, +#line 1094 "HTMLCharacterReference.gperf" + {"frac14;", "¼"}, + {""}, +#line 776 "HTMLCharacterReference.gperf" + {"bot;", "⊥"}, + {""}, +#line 961 "HTMLCharacterReference.gperf" + {"dharr;", "⇂"}, +#line 1095 "HTMLCharacterReference.gperf" + {"frac15;", "⅕"}, + {""}, +#line 1133 "HTMLCharacterReference.gperf" + {"gfr;", "𝔤"}, + {""}, + {""}, +#line 1096 "HTMLCharacterReference.gperf" + {"frac16;", "⅙"}, + {""}, +#line 1351 "HTMLCharacterReference.gperf" + {"lfr;", "𝔩"}, +#line 1087 "HTMLCharacterReference.gperf" + {"fork;", "⋔"}, +#line 1100 "HTMLCharacterReference.gperf" + {"frac34", "¾"}, +#line 1101 "HTMLCharacterReference.gperf" + {"frac34;", "¾"}, + {""}, +#line 1019 "HTMLCharacterReference.gperf" + {"efr;", "𝔢"}, + {""}, +#line 1088 "HTMLCharacterReference.gperf" + {"forkv;", "⫙"}, +#line 1102 "HTMLCharacterReference.gperf" + {"frac35;", "⅗"}, + {""}, +#line 864 "HTMLCharacterReference.gperf" + {"cfr;", "𝔠"}, + {""}, + {""}, +#line 1104 "HTMLCharacterReference.gperf" + {"frac45;", "⅘"}, + {""}, +#line 669 "HTMLCharacterReference.gperf" + {"afr;", "𝔞"}, +#line 644 "HTMLCharacterReference.gperf" + {"Yuml;", "Ÿ"}, + {""}, + {""}, + {""}, +#line 1257 "HTMLCharacterReference.gperf" + {"jfr;", "𝔧"}, +#line 1279 "HTMLCharacterReference.gperf" + {"lHar;", "⥢"}, + {""}, +#line 1105 "HTMLCharacterReference.gperf" + {"frac56;", "⅚"}, + {""}, +#line 578 "HTMLCharacterReference.gperf" + {"Ufr;", "𝔘"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 908 "HTMLCharacterReference.gperf" + {"csup;", "⫐"}, + {""}, +#line 1092 "HTMLCharacterReference.gperf" + {"frac13;", "⅓"}, + {""}, +#line 1774 "HTMLCharacterReference.gperf" + {"quot", "\""}, +#line 1775 "HTMLCharacterReference.gperf" + {"quot;", "\""}, +#line 1039 "HTMLCharacterReference.gperf" + {"eogon;", "ę"}, + {""}, + {""}, + {""}, + {""}, +#line 930 "HTMLCharacterReference.gperf" + {"curren", "¤"}, +#line 931 "HTMLCharacterReference.gperf" + {"curren;", "¤"}, +#line 334 "HTMLCharacterReference.gperf" + {"Mu;", "Μ"}, +#line 959 "HTMLCharacterReference.gperf" + {"dfr;", "𝔡"}, + {""}, +#line 702 "HTMLCharacterReference.gperf" + {"aogon;", "ą"}, +#line 1163 "HTMLCharacterReference.gperf" + {"gtrarr;", "⥸"}, + {""}, +#line 1185 "HTMLCharacterReference.gperf" + {"hfr;", "𝔥"}, + {""}, + {""}, +#line 1099 "HTMLCharacterReference.gperf" + {"frac25;", "⅖"}, + {""}, + {""}, + {""}, +#line 588 "HTMLCharacterReference.gperf" + {"Uogon;", "Ų"}, + {""}, + {""}, +#line 772 "HTMLCharacterReference.gperf" + {"bne;", "=⃥"}, + {""}, + {""}, +#line 1097 "HTMLCharacterReference.gperf" + {"frac18;", "⅛"}, + {""}, + {""}, +#line 940 "HTMLCharacterReference.gperf" + {"dHar;", "⥥"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 913 "HTMLCharacterReference.gperf" + {"cuepr;", "⋞"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1103 "HTMLCharacterReference.gperf" + {"frac38;", "⅜"}, + {""}, + {""}, + {""}, +#line 960 "HTMLCharacterReference.gperf" + {"dharl;", "⇃"}, +#line 1393 "HTMLCharacterReference.gperf" + {"lparlt;", "⦓"}, + {""}, +#line 457 "HTMLCharacterReference.gperf" + {"Qfr;", "𝔔"}, + {""}, + {""}, +#line 1106 "HTMLCharacterReference.gperf" + {"frac58;", "⅝"}, + {""}, + {""}, + {""}, + {""}, +#line 1098 "HTMLCharacterReference.gperf" + {"frac23;", "⅔"}, + {""}, +#line 1078 "HTMLCharacterReference.gperf" + {"ffr;", "𝔣"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2195 "HTMLCharacterReference.gperf" + {"wfr;", "𝔴"}, + {""}, +#line 838 "HTMLCharacterReference.gperf" + {"bumpe;", "≏"}, + {""}, + {""}, + {""}, + {""}, +#line 686 "HTMLCharacterReference.gperf" + {"angle;", "∠"}, + {""}, + {""}, +#line 2177 "HTMLCharacterReference.gperf" + {"vfr;", "𝔳"}, + {""}, + {""}, +#line 924 "HTMLCharacterReference.gperf" + {"curarr;", "↷"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1146 "HTMLCharacterReference.gperf" + {"gneq;", "⪈"}, +#line 1147 "HTMLCharacterReference.gperf" + {"gneqq;", "≩"}, + {""}, + {""}, + {""}, +#line 1371 "HTMLCharacterReference.gperf" + {"lneq;", "⪇"}, +#line 1372 "HTMLCharacterReference.gperf" + {"lneqq;", "≨"}, +#line 899 "HTMLCharacterReference.gperf" + {"coprod;", "∐"}, +#line 1121 "HTMLCharacterReference.gperf" + {"ge;", "≥"}, +#line 746 "HTMLCharacterReference.gperf" + {"bfr;", "𝔟"}, + {""}, + {""}, + {""}, +#line 1321 "HTMLCharacterReference.gperf" + {"le;", "≤"}, +#line 1126 "HTMLCharacterReference.gperf" + {"ges;", "⩾"}, + {""}, +#line 1383 "HTMLCharacterReference.gperf" + {"lopar;", "⦅"}, +#line 777 "HTMLCharacterReference.gperf" + {"bottom;", "⊥"}, +#line 1017 "HTMLCharacterReference.gperf" + {"ee;", "ⅇ"}, +#line 1336 "HTMLCharacterReference.gperf" + {"les;", "⩽"}, + {""}, + {""}, +#line 1107 "HTMLCharacterReference.gperf" + {"frac78;", "⅞"}, + {""}, +#line 1123 "HTMLCharacterReference.gperf" + {"geq;", "≥"}, + {""}, + {""}, + {""}, + {""}, +#line 1333 "HTMLCharacterReference.gperf" + {"leq;", "≤"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1159 "HTMLCharacterReference.gperf" + {"gtdot;", "⋗"}, + {""}, + {""}, +#line 900 "HTMLCharacterReference.gperf" + {"copy", "©"}, +#line 901 "HTMLCharacterReference.gperf" + {"copy;", "©"}, +#line 1414 "HTMLCharacterReference.gperf" + {"ltdot;", "⋖"}, + {""}, + {""}, + {""}, + {""}, +#line 782 "HTMLCharacterReference.gperf" + {"boxDr;", "╓"}, + {""}, + {""}, + {""}, + {""}, +#line 910 "HTMLCharacterReference.gperf" + {"ctdot;", "⋯"}, + {""}, + {""}, +#line 679 "HTMLCharacterReference.gperf" + {"and;", "∧"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1131 "HTMLCharacterReference.gperf" + {"gesl;", "⋛︀"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 214 "HTMLCharacterReference.gperf" + {"Hfr;", "ℌ"}, + {""}, + {""}, + {""}, + {""}, +#line 181 "HTMLCharacterReference.gperf" + {"Ffr;", "𝔉"}, + {""}, + {""}, +#line 839 "HTMLCharacterReference.gperf" + {"bumpeq;", "≏"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1209 "HTMLCharacterReference.gperf" + {"ifr;", "𝔦"}, + {""}, +#line 997 "HTMLCharacterReference.gperf" + {"dtdot;", "⋱"}, + {""}, + {""}, + {""}, + {""}, +#line 909 "HTMLCharacterReference.gperf" + {"csupe;", "⫒"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 837 "HTMLCharacterReference.gperf" + {"bumpE;", "⪮"}, +#line 897 "HTMLCharacterReference.gperf" + {"conint;", "∮"}, + {""}, + {""}, +#line 531 "HTMLCharacterReference.gperf" + {"Star;", "⋆"}, + {""}, + {""}, + {""}, +#line 641 "HTMLCharacterReference.gperf" + {"Yfr;", "𝔜"}, + {""}, +#line 1237 "HTMLCharacterReference.gperf" + {"iogon;", "į"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 540 "HTMLCharacterReference.gperf" + {"Sum;", "∑"}, + {""}, +#line 781 "HTMLCharacterReference.gperf" + {"boxDl;", "╖"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 117 "HTMLCharacterReference.gperf" + {"Dot;", "¨"}, + {""}, +#line 1060 "HTMLCharacterReference.gperf" + {"esdot;", "≐"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1134 "HTMLCharacterReference.gperf" + {"gg;", "≫"}, +#line 309 "HTMLCharacterReference.gperf" + {"Lfr;", "𝔏"}, + {""}, + {""}, + {""}, +#line 1352 "HTMLCharacterReference.gperf" + {"lg;", "≶"}, + {""}, + {""}, + {""}, + {""}, +#line 1020 "HTMLCharacterReference.gperf" + {"eg;", "⪚"}, +#line 541 "HTMLCharacterReference.gperf" + {"Sup;", "⋑"}, + {""}, + {""}, + {""}, +#line 96 "HTMLCharacterReference.gperf" + {"DD;", "ⅅ"}, +#line 1023 "HTMLCharacterReference.gperf" + {"egs;", "⪖"}, + {""}, + {""}, + {""}, +#line 1612 "HTMLCharacterReference.gperf" + {"nu;", "ν"}, +#line 861 "HTMLCharacterReference.gperf" + {"cent", "¢"}, +#line 862 "HTMLCharacterReference.gperf" + {"cent;", "¢"}, +#line 866 "HTMLCharacterReference.gperf" + {"check;", "✓"}, + {""}, +#line 1045 "HTMLCharacterReference.gperf" + {"epsilon;", "ε"}, +#line 238 "HTMLCharacterReference.gperf" + {"Int;", "∬"}, +#line 1239 "HTMLCharacterReference.gperf" + {"iota;", "ι"}, + {""}, + {""}, + {""}, +#line 1765 "HTMLCharacterReference.gperf" + {"qfr;", "𝔮"}, + {""}, + {""}, + {""}, +#line 1167 "HTMLCharacterReference.gperf" + {"gtrless;", "≷"}, + {""}, +#line 1560 "HTMLCharacterReference.gperf" + {"npar;", "∦"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 780 "HTMLCharacterReference.gperf" + {"boxDR;", "╔"}, + {""}, +#line 602 "HTMLCharacterReference.gperf" + {"Upsilon;", "Υ"}, +#line 1613 "HTMLCharacterReference.gperf" + {"num;", "#"}, + {""}, + {""}, +#line 1181 "HTMLCharacterReference.gperf" + {"hearts;", "♥"}, + {""}, +#line 1489 "HTMLCharacterReference.gperf" + {"nbsp", " "}, +#line 1490 "HTMLCharacterReference.gperf" + {"nbsp;", " "}, + {""}, + {""}, + {""}, +#line 1332 "HTMLCharacterReference.gperf" + {"leg;", "⋚"}, +#line 522 "HTMLCharacterReference.gperf" + {"Sqrt;", "√"}, +#line 791 "HTMLCharacterReference.gperf" + {"boxUr;", "╙"}, + {""}, + {""}, +#line 330 "HTMLCharacterReference.gperf" + {"Mfr;", "𝔐"}, + {""}, +#line 1563 "HTMLCharacterReference.gperf" + {"npart;", "∂̸"}, + {""}, + {""}, + {""}, +#line 1162 "HTMLCharacterReference.gperf" + {"gtrapprox;", "⪆"}, + {""}, +#line 687 "HTMLCharacterReference.gperf" + {"angmsd;", "∡"}, + {""}, +#line 249 "HTMLCharacterReference.gperf" + {"Iuml", "Ï"}, +#line 250 "HTMLCharacterReference.gperf" + {"Iuml;", "Ï"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2176 "HTMLCharacterReference.gperf" + {"vert;", "|"}, + {""}, + {""}, + {""}, +#line 1565 "HTMLCharacterReference.gperf" + {"npr;", "⊀"}, +#line 1124 "HTMLCharacterReference.gperf" + {"geqq;", "≧"}, +#line 1571 "HTMLCharacterReference.gperf" + {"nrarr;", "↛"}, +#line 1052 "HTMLCharacterReference.gperf" + {"equals;", "="}, + {""}, + {""}, +#line 1334 "HTMLCharacterReference.gperf" + {"leqq;", "≦"}, + {""}, +#line 1573 "HTMLCharacterReference.gperf" + {"nrarrw;", "↝̸"}, +#line 954 "HTMLCharacterReference.gperf" + {"deg", "°"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 955 "HTMLCharacterReference.gperf" + {"deg;", "°"}, + {""}, +#line 1632 "HTMLCharacterReference.gperf" + {"nwarr;", "↖"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 902 "HTMLCharacterReference.gperf" + {"copysr;", "℗"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 983 "HTMLCharacterReference.gperf" + {"dotplus;", "∔"}, + {""}, +#line 1406 "HTMLCharacterReference.gperf" + {"lsqb;", "["}, + {""}, +#line 1086 "HTMLCharacterReference.gperf" + {"forall;", "∀"}, + {""}, +#line 1389 "HTMLCharacterReference.gperf" + {"loz;", "◊"}, + {""}, + {""}, + {""}, +#line 209 "HTMLCharacterReference.gperf" + {"Gt;", "≫"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 689 "HTMLCharacterReference.gperf" + {"angmsdab;", "⦩"}, + {""}, + {""}, + {""}, +#line 925 "HTMLCharacterReference.gperf" + {"curarrm;", "⤼"}, +#line 175 "HTMLCharacterReference.gperf" + {"Eta;", "Η"}, + {""}, + {""}, + {""}, + {""}, +#line 108 "HTMLCharacterReference.gperf" + {"Dfr;", "𝔇"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 790 "HTMLCharacterReference.gperf" + {"boxUl;", "╜"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1132 "HTMLCharacterReference.gperf" + {"gesles;", "⪔"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 810 "HTMLCharacterReference.gperf" + {"boxplus;", "⊞"}, + {""}, + {""}, + {""}, + {""}, +#line 1548 "HTMLCharacterReference.gperf" + {"not", "¬"}, + {""}, + {""}, +#line 832 "HTMLCharacterReference.gperf" + {"bsolb;", "⧅"}, + {""}, + {""}, +#line 1549 "HTMLCharacterReference.gperf" + {"not;", "¬"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 253 "HTMLCharacterReference.gperf" + {"Jfr;", "𝔍"}, + {""}, + {""}, + {""}, + {""}, +#line 1135 "HTMLCharacterReference.gperf" + {"ggg;", "⋙"}, +#line 1169 "HTMLCharacterReference.gperf" + {"gvertneqq;", "≩︀"}, +#line 1150 "HTMLCharacterReference.gperf" + {"grave;", "`"}, + {""}, + {""}, + {""}, +#line 1425 "HTMLCharacterReference.gperf" + {"lvertneqq;", "≨︀"}, + {""}, + {""}, + {""}, + {""}, +#line 1604 "HTMLCharacterReference.gperf" + {"ntgl;", "≹"}, +#line 789 "HTMLCharacterReference.gperf" + {"boxUR;", "╚"}, + {""}, + {""}, +#line 630 "HTMLCharacterReference.gperf" + {"Xfr;", "𝔛"}, +#line 867 "HTMLCharacterReference.gperf" + {"checkmark;", "✓"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1629 "HTMLCharacterReference.gperf" + {"nvsim;", "∼⃒"}, + {""}, + {""}, +#line 176 "HTMLCharacterReference.gperf" + {"Euml", "Ë"}, +#line 177 "HTMLCharacterReference.gperf" + {"Euml;", "Ë"}, + {""}, +#line 1184 "HTMLCharacterReference.gperf" + {"hercon;", "⊹"}, + {""}, +#line 2171 "HTMLCharacterReference.gperf" + {"vee;", "∨"}, + {""}, +#line 2218 "HTMLCharacterReference.gperf" + {"xrarr;", "⟶"}, + {""}, + {""}, + {""}, + {""}, +#line 1550 "HTMLCharacterReference.gperf" + {"notin;", "∉"}, +#line 742 "HTMLCharacterReference.gperf" + {"bernou;", "ℬ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1309 "HTMLCharacterReference.gperf" + {"lbrkslu;", "⦍"}, + {""}, + {""}, +#line 1354 "HTMLCharacterReference.gperf" + {"lhard;", "↽"}, + {""}, + {""}, +#line 514 "HTMLCharacterReference.gperf" + {"Sfr;", "𝔖"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 816 "HTMLCharacterReference.gperf" + {"boxv;", "│"}, +#line 817 "HTMLCharacterReference.gperf" + {"boxvH;", "╪"}, + {""}, + {""}, + {""}, + {""}, +#line 1523 "HTMLCharacterReference.gperf" + {"nharr;", "↮"}, + {""}, + {""}, + {""}, +#line 1618 "HTMLCharacterReference.gperf" + {"nvap;", "≍⃒"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 586 "HTMLCharacterReference.gperf" + {"Union;", "⋃"}, +#line 1562 "HTMLCharacterReference.gperf" + {"nparsl;", "⫽⃥"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1152 "HTMLCharacterReference.gperf" + {"gsim;", "≳"}, + {""}, + {""}, + {""}, +#line 695 "HTMLCharacterReference.gperf" + {"angmsdah;", "⦯"}, +#line 1403 "HTMLCharacterReference.gperf" + {"lsim;", "≲"}, + {""}, + {""}, + {""}, +#line 475 "HTMLCharacterReference.gperf" + {"Rho;", "Ρ"}, +#line 1061 "HTMLCharacterReference.gperf" + {"esim;", "≂"}, +#line 1127 "HTMLCharacterReference.gperf" + {"gescc;", "⪩"}, +#line 823 "HTMLCharacterReference.gperf" + {"bprime;", "‵"}, + {""}, +#line 231 "HTMLCharacterReference.gperf" + {"Ifr;", "ℑ"}, + {""}, +#line 1337 "HTMLCharacterReference.gperf" + {"lescc;", "⪨"}, + {""}, + {""}, + {""}, +#line 743 "HTMLCharacterReference.gperf" + {"beta;", "β"}, + {""}, + {""}, +#line 406 "HTMLCharacterReference.gperf" + {"Nu;", "Ν"}, + {""}, + {""}, +#line 1227 "HTMLCharacterReference.gperf" + {"infin;", "∞"}, + {""}, + {""}, + {""}, + {""}, +#line 822 "HTMLCharacterReference.gperf" + {"boxvr;", "├"}, + {""}, + {""}, +#line 1512 "HTMLCharacterReference.gperf" + {"nfr;", "𝔫"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1491 "HTMLCharacterReference.gperf" + {"nbump;", "≎̸"}, + {""}, + {""}, + {""}, + {""}, +#line 243 "HTMLCharacterReference.gperf" + {"Iogon;", "Į"}, + {""}, + {""}, + {""}, +#line 906 "HTMLCharacterReference.gperf" + {"csub;", "⫏"}, +#line 1240 "HTMLCharacterReference.gperf" + {"iprod;", "⨼"}, + {""}, + {""}, + {""}, +#line 1598 "HTMLCharacterReference.gperf" + {"nsup;", "⊅"}, +#line 937 "HTMLCharacterReference.gperf" + {"cwint;", "∱"}, + {""}, + {""}, + {""}, + {""}, +#line 723 "HTMLCharacterReference.gperf" + {"awint;", "⨑"}, + {""}, + {""}, +#line 693 "HTMLCharacterReference.gperf" + {"angmsdaf;", "⦭"}, + {""}, + {""}, + {""}, + {""}, +#line 652 "HTMLCharacterReference.gperf" + {"Zfr;", "ℨ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1621 "HTMLCharacterReference.gperf" + {"nvgt;", ">⃒"}, + {""}, + {""}, + {""}, +#line 868 "HTMLCharacterReference.gperf" + {"chi;", "χ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 245 "HTMLCharacterReference.gperf" + {"Iota;", "Ι"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 118 "HTMLCharacterReference.gperf" + {"DotDot;", "⃜"}, + {""}, + {""}, +#line 685 "HTMLCharacterReference.gperf" + {"ange;", "⦤"}, +#line 821 "HTMLCharacterReference.gperf" + {"boxvl;", "┤"}, + {""}, + {""}, + {""}, + {""}, +#line 2207 "HTMLCharacterReference.gperf" + {"xharr;", "⟷"}, +#line 267 "HTMLCharacterReference.gperf" + {"LT", "<"}, +#line 268 "HTMLCharacterReference.gperf" + {"LT;", "<"}, + {""}, +#line 1206 "HTMLCharacterReference.gperf" + {"iexcl", "¡"}, +#line 1207 "HTMLCharacterReference.gperf" + {"iexcl;", "¡"}, + {""}, + {""}, + {""}, + {""}, +#line 1587 "HTMLCharacterReference.gperf" + {"nspar;", "∦"}, + {""}, + {""}, + {""}, + {""}, +#line 980 "HTMLCharacterReference.gperf" + {"doteq;", "≐"}, + {""}, + {""}, + {""}, +#line 829 "HTMLCharacterReference.gperf" + {"bsim;", "∽"}, +#line 1154 "HTMLCharacterReference.gperf" + {"gsiml;", "⪐"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1416 "HTMLCharacterReference.gperf" + {"ltimes;", "⋉"}, + {""}, +#line 474 "HTMLCharacterReference.gperf" + {"Rfr;", "ℜ"}, + {""}, + {""}, + {""}, + {""}, +#line 1473 "HTMLCharacterReference.gperf" + {"nLt;", "≪⃒"}, + {""}, + {""}, +#line 680 "HTMLCharacterReference.gperf" + {"andand;", "⩕"}, + {""}, +#line 46 "HTMLCharacterReference.gperf" + {"Auml", "Ä"}, +#line 47 "HTMLCharacterReference.gperf" + {"Auml;", "Ä"}, + {""}, + {""}, + {""}, +#line 160 "HTMLCharacterReference.gperf" + {"Efr;", "𝔈"}, + {""}, +#line 1374 "HTMLCharacterReference.gperf" + {"loang;", "⟬"}, + {""}, + {""}, +#line 351 "HTMLCharacterReference.gperf" + {"Not;", "⫬"}, + {""}, +#line 934 "HTMLCharacterReference.gperf" + {"cuvee;", "⋎"}, + {""}, +#line 1501 "HTMLCharacterReference.gperf" + {"ne;", "≠"}, +#line 2205 "HTMLCharacterReference.gperf" + {"xfr;", "𝔵"}, + {""}, +#line 819 "HTMLCharacterReference.gperf" + {"boxvR;", "╞"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1768 "HTMLCharacterReference.gperf" + {"qprime;", "⁗"}, + {""}, +#line 1208 "HTMLCharacterReference.gperf" + {"iff;", "⇔"}, + {""}, +#line 1153 "HTMLCharacterReference.gperf" + {"gsime;", "⪎"}, + {""}, + {""}, +#line 198 "HTMLCharacterReference.gperf" + {"Gfr;", "𝔊"}, + {""}, +#line 1404 "HTMLCharacterReference.gperf" + {"lsime;", "⪍"}, + {""}, + {""}, + {""}, + {""}, +#line 167 "HTMLCharacterReference.gperf" + {"Eogon;", "Ę"}, + {""}, + {""}, + {""}, +#line 724 "HTMLCharacterReference.gperf" + {"bNot;", "⫭"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1509 "HTMLCharacterReference.gperf" + {"nesim;", "≂̸"}, + {""}, + {""}, +#line 1552 "HTMLCharacterReference.gperf" + {"notindot;", "⋵̸"}, +#line 683 "HTMLCharacterReference.gperf" + {"andv;", "⩚"}, + {""}, + {""}, + {""}, + {""}, +#line 1120 "HTMLCharacterReference.gperf" + {"gdot;", "ġ"}, +#line 1524 "HTMLCharacterReference.gperf" + {"nhpar;", "⫲"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1016 "HTMLCharacterReference.gperf" + {"edot;", "ė"}, + {""}, + {""}, +#line 1386 "HTMLCharacterReference.gperf" + {"lotimes;", "⨴"}, + {""}, +#line 857 "HTMLCharacterReference.gperf" + {"cdot;", "ċ"}, +#line 907 "HTMLCharacterReference.gperf" + {"csube;", "⫑"}, + {""}, +#line 1308 "HTMLCharacterReference.gperf" + {"lbrksld;", "⦏"}, + {""}, + {""}, +#line 1600 "HTMLCharacterReference.gperf" + {"nsupe;", "⊉"}, + {""}, + {""}, + {""}, +#line 858 "HTMLCharacterReference.gperf" + {"cedil", "¸"}, +#line 859 "HTMLCharacterReference.gperf" + {"cedil;", "¸"}, + {""}, +#line 950 "HTMLCharacterReference.gperf" + {"dd;", "ⅆ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2188 "HTMLCharacterReference.gperf" + {"vsupne;", "⊋︀"}, + {""}, + {""}, +#line 792 "HTMLCharacterReference.gperf" + {"boxV;", "║"}, +#line 793 "HTMLCharacterReference.gperf" + {"boxVH;", "╬"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 824 "HTMLCharacterReference.gperf" + {"breve;", "˘"}, + {""}, + {""}, + {""}, + {""}, +#line 1158 "HTMLCharacterReference.gperf" + {"gtcir;", "⩺"}, + {""}, + {""}, + {""}, + {""}, +#line 1413 "HTMLCharacterReference.gperf" + {"ltcir;", "⩹"}, + {""}, + {""}, + {""}, + {""}, +#line 1504 "HTMLCharacterReference.gperf" + {"nearr;", "↗"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 470 "HTMLCharacterReference.gperf" + {"Re;", "ℜ"}, + {""}, + {""}, + {""}, +#line 1572 "HTMLCharacterReference.gperf" + {"nrarrc;", "⤳̸"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1235 "HTMLCharacterReference.gperf" + {"intprod;", "⨼"}, + {""}, +#line 1244 "HTMLCharacterReference.gperf" + {"isin;", "∈"}, + {""}, +#line 992 "HTMLCharacterReference.gperf" + {"drcrop;", "⌌"}, + {""}, +#line 692 "HTMLCharacterReference.gperf" + {"angmsdae;", "⦬"}, + {""}, +#line 1249 "HTMLCharacterReference.gperf" + {"isinv;", "∈"}, + {""}, +#line 444 "HTMLCharacterReference.gperf" + {"Pr;", "⪻"}, + {""}, + {""}, +#line 798 "HTMLCharacterReference.gperf" + {"boxVr;", "╟"}, + {""}, + {""}, +#line 347 "HTMLCharacterReference.gperf" + {"Nfr;", "𝔑"}, + {""}, + {""}, + {""}, +#line 912 "HTMLCharacterReference.gperf" + {"cudarrr;", "⤵"}, + {""}, +#line 1151 "HTMLCharacterReference.gperf" + {"gscr;", "ℊ"}, +#line 595 "HTMLCharacterReference.gperf" + {"UpTee;", "⊥"}, +#line 1261 "HTMLCharacterReference.gperf" + {"jsercy;", "ј"}, + {""}, +#line 1116 "HTMLCharacterReference.gperf" + {"gap;", "⪆"}, +#line 1401 "HTMLCharacterReference.gperf" + {"lscr;", "𝓁"}, +#line 830 "HTMLCharacterReference.gperf" + {"bsime;", "⋍"}, +#line 991 "HTMLCharacterReference.gperf" + {"drcorn;", "⌟"}, + {""}, +#line 1287 "HTMLCharacterReference.gperf" + {"lap;", "⪅"}, +#line 1059 "HTMLCharacterReference.gperf" + {"escr;", "ℯ"}, + {""}, + {""}, + {""}, +#line 1299 "HTMLCharacterReference.gperf" + {"lat;", "⪫"}, +#line 905 "HTMLCharacterReference.gperf" + {"cscr;", "𝒸"}, + {""}, + {""}, + {""}, +#line 841 "HTMLCharacterReference.gperf" + {"cap;", "∩"}, +#line 714 "HTMLCharacterReference.gperf" + {"ascr;", "𝒶"}, + {""}, +#line 1305 "HTMLCharacterReference.gperf" + {"lbrace;", "{"}, + {""}, + {""}, +#line 1260 "HTMLCharacterReference.gperf" + {"jscr;", "𝒿"}, + {""}, + {""}, + {""}, + {""}, +#line 604 "HTMLCharacterReference.gperf" + {"Uscr;", "𝒰"}, +#line 1519 "HTMLCharacterReference.gperf" + {"ngsim;", "≵"}, + {""}, + {""}, + {""}, +#line 1165 "HTMLCharacterReference.gperf" + {"gtreqless;", "⋛"}, +#line 952 "HTMLCharacterReference.gperf" + {"ddarr;", "⇊"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1599 "HTMLCharacterReference.gperf" + {"nsupE;", "⫆̸"}, + {""}, + {""}, + {""}, +#line 993 "HTMLCharacterReference.gperf" + {"dscr;", "𝒹"}, + {""}, + {""}, + {""}, +#line 1520 "HTMLCharacterReference.gperf" + {"ngt;", "≯"}, +#line 1194 "HTMLCharacterReference.gperf" + {"hscr;", "𝒽"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1617 "HTMLCharacterReference.gperf" + {"nvHarr;", "⤄"}, + {""}, +#line 31 "HTMLCharacterReference.gperf" + {"Afr;", "𝔄"}, +#line 1157 "HTMLCharacterReference.gperf" + {"gtcc;", "⪧"}, +#line 889 "HTMLCharacterReference.gperf" + {"comma;", ","}, + {""}, + {""}, + {""}, +#line 1412 "HTMLCharacterReference.gperf" + {"ltcc;", "⪦"}, + {""}, + {""}, + {""}, + {""}, +#line 1521 "HTMLCharacterReference.gperf" + {"ngtr;", "≯"}, + {""}, + {""}, + {""}, +#line 981 "HTMLCharacterReference.gperf" + {"doteqdot;", "≑"}, +#line 1290 "HTMLCharacterReference.gperf" + {"larr;", "←"}, +#line 797 "HTMLCharacterReference.gperf" + {"boxVl;", "╢"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 459 "HTMLCharacterReference.gperf" + {"Qscr;", "𝒬"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 37 "HTMLCharacterReference.gperf" + {"Aogon;", "Ą"}, + {""}, +#line 658 "HTMLCharacterReference.gperf" + {"ac;", "∾"}, + {""}, +#line 1110 "HTMLCharacterReference.gperf" + {"fscr;", "𝒻"}, + {""}, + {""}, + {""}, + {""}, +#line 570 "HTMLCharacterReference.gperf" + {"Uarr;", "↟"}, + {""}, + {""}, + {""}, +#line 918 "HTMLCharacterReference.gperf" + {"cupbrcap;", "⩈"}, +#line 2200 "HTMLCharacterReference.gperf" + {"wscr;", "𝓌"}, +#line 1596 "HTMLCharacterReference.gperf" + {"nsucc;", "⊁"}, + {""}, + {""}, + {""}, + {""}, +#line 849 "HTMLCharacterReference.gperf" + {"caron;", "ˇ"}, + {""}, + {""}, + {""}, +#line 2184 "HTMLCharacterReference.gperf" + {"vscr;", "𝓋"}, + {""}, + {""}, + {""}, + {""}, +#line 943 "HTMLCharacterReference.gperf" + {"darr;", "↓"}, + {""}, +#line 1298 "HTMLCharacterReference.gperf" + {"larrtl;", "↢"}, + {""}, +#line 694 "HTMLCharacterReference.gperf" + {"angmsdag;", "⦮"}, +#line 1176 "HTMLCharacterReference.gperf" + {"harr;", "↔"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1586 "HTMLCharacterReference.gperf" + {"nsmid;", "∤"}, + {""}, + {""}, + {""}, + {""}, +#line 795 "HTMLCharacterReference.gperf" + {"boxVR;", "╠"}, + {""}, +#line 169 "HTMLCharacterReference.gperf" + {"Epsilon;", "Ε"}, + {""}, +#line 827 "HTMLCharacterReference.gperf" + {"bscr;", "𝒷"}, +#line 596 "HTMLCharacterReference.gperf" + {"UpTeeArrow;", "↥"}, + {""}, + {""}, + {""}, + {""}, +#line 170 "HTMLCharacterReference.gperf" + {"Equal;", "⩵"}, + {""}, +#line 199 "HTMLCharacterReference.gperf" + {"Gg;", "⋙"}, + {""}, + {""}, + {""}, +#line 1009 "HTMLCharacterReference.gperf" + {"easter;", "⩮"}, + {""}, + {""}, + {""}, +#line 1304 "HTMLCharacterReference.gperf" + {"lbbrk;", "❲"}, + {""}, + {""}, + {""}, +#line 609 "HTMLCharacterReference.gperf" + {"Vbar;", "⫫"}, +#line 2213 "HTMLCharacterReference.gperf" + {"xodot;", "⨀"}, +#line 1310 "HTMLCharacterReference.gperf" + {"lcaron;", "ľ"}, + {""}, + {""}, + {""}, + {""}, +#line 1010 "HTMLCharacterReference.gperf" + {"ecaron;", "ě"}, + {""}, + {""}, + {""}, + {""}, +#line 851 "HTMLCharacterReference.gperf" + {"ccaron;", "č"}, + {""}, + {""}, + {""}, + {""}, +#line 1014 "HTMLCharacterReference.gperf" + {"ecolon;", "≕"}, + {""}, + {""}, + {""}, + {""}, +#line 1419 "HTMLCharacterReference.gperf" + {"ltrPar;", "⦖"}, + {""}, + {""}, +#line 651 "HTMLCharacterReference.gperf" + {"Zeta;", "Ζ"}, + {""}, + {""}, + {""}, + {""}, +#line 2159 "HTMLCharacterReference.gperf" + {"varr;", "↕"}, + {""}, +#line 919 "HTMLCharacterReference.gperf" + {"cupcap;", "⩆"}, + {""}, + {""}, + {""}, +#line 1247 "HTMLCharacterReference.gperf" + {"isins;", "⋴"}, +#line 1296 "HTMLCharacterReference.gperf" + {"larrpl;", "⤹"}, + {""}, + {""}, +#line 218 "HTMLCharacterReference.gperf" + {"Hscr;", "ℋ"}, + {""}, +#line 2199 "HTMLCharacterReference.gperf" + {"wreath;", "≀"}, + {""}, + {""}, +#line 187 "HTMLCharacterReference.gperf" + {"Fscr;", "ℱ"}, + {""}, +#line 948 "HTMLCharacterReference.gperf" + {"dcaron;", "ď"}, + {""}, +#line 212 "HTMLCharacterReference.gperf" + {"Hat;", "^"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1243 "HTMLCharacterReference.gperf" + {"iscr;", "𝒾"}, + {""}, + {""}, + {""}, +#line 1395 "HTMLCharacterReference.gperf" + {"lrcorner;", "⌟"}, + {""}, + {""}, + {""}, + {""}, +#line 627 "HTMLCharacterReference.gperf" + {"Wfr;", "𝔚"}, + {""}, + {""}, + {""}, + {""}, +#line 36 "HTMLCharacterReference.gperf" + {"And;", "⩓"}, + {""}, + {""}, +#line 1295 "HTMLCharacterReference.gperf" + {"larrlp;", "↫"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 863 "HTMLCharacterReference.gperf" + {"centerdot;", "·"}, + {""}, + {""}, + {""}, +#line 1514 "HTMLCharacterReference.gperf" + {"nge;", "≱"}, +#line 643 "HTMLCharacterReference.gperf" + {"Yscr;", "𝒴"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 860 "HTMLCharacterReference.gperf" + {"cemptyv;", "⦲"}, + {""}, +#line 894 "HTMLCharacterReference.gperf" + {"complexes;", "ℂ"}, +#line 1376 "HTMLCharacterReference.gperf" + {"lobrk;", "⟦"}, +#line 189 "HTMLCharacterReference.gperf" + {"GT", ">"}, +#line 190 "HTMLCharacterReference.gperf" + {"GT;", ">"}, + {""}, +#line 1583 "HTMLCharacterReference.gperf" + {"nsim;", "≁"}, + {""}, +#line 1053 "HTMLCharacterReference.gperf" + {"equest;", "≟"}, + {""}, + {""}, +#line 1567 "HTMLCharacterReference.gperf" + {"npre;", "⪯̸"}, + {""}, +#line 890 "HTMLCharacterReference.gperf" + {"commat;", "@"}, + {""}, + {""}, +#line 322 "HTMLCharacterReference.gperf" + {"Lscr;", "ℒ"}, +#line 893 "HTMLCharacterReference.gperf" + {"complement;", "∁"}, + {""}, + {""}, +#line 2237 "HTMLCharacterReference.gperf" + {"yuml", "ÿ"}, +#line 2238 "HTMLCharacterReference.gperf" + {"yuml;", "ÿ"}, + {""}, + {""}, +#line 1201 "HTMLCharacterReference.gperf" + {"ic;", "⁣"}, + {""}, + {""}, +#line 1245 "HTMLCharacterReference.gperf" + {"isinE;", "⋹"}, + {""}, + {""}, +#line 555 "HTMLCharacterReference.gperf" + {"Tfr;", "𝔗"}, +#line 2251 "HTMLCharacterReference.gperf" + {"zwnj;", "‌"}, + {""}, + {""}, +#line 957 "HTMLCharacterReference.gperf" + {"demptyv;", "⦱"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1590 "HTMLCharacterReference.gperf" + {"nsub;", "⊄"}, + {""}, +#line 1510 "HTMLCharacterReference.gperf" + {"nexist;", "∄"}, +#line 1002 "HTMLCharacterReference.gperf" + {"dwangle;", "⦦"}, + {""}, +#line 1769 "HTMLCharacterReference.gperf" + {"qscr;", "𝓆"}, + {""}, + {""}, +#line 1461 "HTMLCharacterReference.gperf" + {"mp;", "∓"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 438 "HTMLCharacterReference.gperf" + {"Pfr;", "𝔓"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1117 "HTMLCharacterReference.gperf" + {"gbreve;", "ğ"}, + {""}, +#line 2245 "HTMLCharacterReference.gperf" + {"zfr;", "𝔷"}, +#line 994 "HTMLCharacterReference.gperf" + {"dscy;", "ѕ"}, +#line 1357 "HTMLCharacterReference.gperf" + {"lhblk;", "▄"}, + {""}, + {""}, + {""}, +#line 333 "HTMLCharacterReference.gperf" + {"Mscr;", "ℳ"}, + {""}, +#line 1614 "HTMLCharacterReference.gperf" + {"numero;", "№"}, + {""}, +#line 326 "HTMLCharacterReference.gperf" + {"Map;", "⤅"}, + {""}, + {""}, +#line 1492 "HTMLCharacterReference.gperf" + {"nbumpe;", "≏̸"}, + {""}, + {""}, + {""}, + {""}, +#line 657 "HTMLCharacterReference.gperf" + {"abreve;", "ă"}, +#line 1464 "HTMLCharacterReference.gperf" + {"mu;", "μ"}, + {""}, +#line 273 "HTMLCharacterReference.gperf" + {"Larr;", "↞"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 573 "HTMLCharacterReference.gperf" + {"Ubreve;", "Ŭ"}, + {""}, + {""}, + {""}, +#line 1506 "HTMLCharacterReference.gperf" + {"nedot;", "≐̸"}, +#line 2216 "HTMLCharacterReference.gperf" + {"xotime;", "⨂"}, + {""}, + {""}, + {""}, + {""}, +#line 1231 "HTMLCharacterReference.gperf" + {"intcal;", "⊺"}, + {""}, + {""}, +#line 2158 "HTMLCharacterReference.gperf" + {"varpropto;", "∝"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 352 "HTMLCharacterReference.gperf" + {"NotCongruent;", "≢"}, + {""}, + {""}, +#line 1021 "HTMLCharacterReference.gperf" + {"egrave", "è"}, +#line 1022 "HTMLCharacterReference.gperf" + {"egrave;", "è"}, + {""}, + {""}, +#line 865 "HTMLCharacterReference.gperf" + {"chcy;", "ч"}, +#line 850 "HTMLCharacterReference.gperf" + {"ccaps;", "⩍"}, +#line 1074 "HTMLCharacterReference.gperf" + {"female;", "♀"}, +#line 740 "HTMLCharacterReference.gperf" + {"bemptyv;", "⦰"}, + {""}, + {""}, +#line 670 "HTMLCharacterReference.gperf" + {"agrave", "à"}, +#line 671 "HTMLCharacterReference.gperf" + {"agrave;", "à"}, + {""}, + {""}, + {""}, + {""}, +#line 1174 "HTMLCharacterReference.gperf" + {"hamilt;", "ℋ"}, + {""}, + {""}, +#line 174 "HTMLCharacterReference.gperf" + {"Esim;", "⩳"}, +#line 579 "HTMLCharacterReference.gperf" + {"Ugrave", "Ù"}, +#line 580 "HTMLCharacterReference.gperf" + {"Ugrave;", "Ù"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 148 "HTMLCharacterReference.gperf" + {"Dscr;", "𝒟"}, +#line 2173 "HTMLCharacterReference.gperf" + {"veeeq;", "≚"}, +#line 710 "HTMLCharacterReference.gperf" + {"approx;", "≈"}, + {""}, +#line 621 "HTMLCharacterReference.gperf" + {"Vfr;", "𝔙"}, +#line 1474 "HTMLCharacterReference.gperf" + {"nLtv;", "≪̸"}, +#line 1407 "HTMLCharacterReference.gperf" + {"lsquo;", "‘"}, +#line 1408 "HTMLCharacterReference.gperf" + {"lsquor;", "‚"}, +#line 151 "HTMLCharacterReference.gperf" + {"ETH", "Ð"}, +#line 582 "HTMLCharacterReference.gperf" + {"UnderBar;", "_"}, + {""}, + {""}, + {""}, + {""}, +#line 152 "HTMLCharacterReference.gperf" + {"ETH;", "Ð"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 274 "HTMLCharacterReference.gperf" + {"Lcaron;", "Ľ"}, + {""}, + {""}, + {""}, + {""}, +#line 256 "HTMLCharacterReference.gperf" + {"Jsercy;", "Ј"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1515 "HTMLCharacterReference.gperf" + {"ngeq;", "≱"}, +#line 1516 "HTMLCharacterReference.gperf" + {"ngeqq;", "≧̸"}, + {""}, + {""}, + {""}, + {""}, +#line 1584 "HTMLCharacterReference.gperf" + {"nsime;", "≄"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 255 "HTMLCharacterReference.gperf" + {"Jscr;", "𝒥"}, + {""}, + {""}, + {""}, + {""}, +#line 230 "HTMLCharacterReference.gperf" + {"Idot;", "İ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1241 "HTMLCharacterReference.gperf" + {"iquest", "¿"}, +#line 1242 "HTMLCharacterReference.gperf" + {"iquest;", "¿"}, + {""}, + {""}, + {""}, +#line 1405 "HTMLCharacterReference.gperf" + {"lsimg;", "⪏"}, + {""}, + {""}, + {""}, +#line 633 "HTMLCharacterReference.gperf" + {"Xscr;", "𝒳"}, +#line 1312 "HTMLCharacterReference.gperf" + {"lceil;", "⌈"}, + {""}, + {""}, + {""}, +#line 102 "HTMLCharacterReference.gperf" + {"Darr;", "↡"}, +#line 1592 "HTMLCharacterReference.gperf" + {"nsube;", "⊈"}, +#line 523 "HTMLCharacterReference.gperf" + {"Square;", "□"}, + {""}, + {""}, +#line 712 "HTMLCharacterReference.gperf" + {"aring", "å"}, +#line 713 "HTMLCharacterReference.gperf" + {"aring;", "å"}, +#line 1047 "HTMLCharacterReference.gperf" + {"eqcirc;", "≖"}, + {""}, + {""}, +#line 663 "HTMLCharacterReference.gperf" + {"acute", "´"}, +#line 664 "HTMLCharacterReference.gperf" + {"acute;", "´"}, + {""}, + {""}, +#line 1444 "HTMLCharacterReference.gperf" + {"mho;", "℧"}, + {""}, +#line 603 "HTMLCharacterReference.gperf" + {"Uring;", "Ů"}, +#line 2186 "HTMLCharacterReference.gperf" + {"vsubne;", "⊊︀"}, + {""}, +#line 2232 "HTMLCharacterReference.gperf" + {"yfr;", "𝔶"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1568 "HTMLCharacterReference.gperf" + {"nprec;", "⊀"}, + {""}, + {""}, + {""}, +#line 649 "HTMLCharacterReference.gperf" + {"Zdot;", "Ż"}, +#line 716 "HTMLCharacterReference.gperf" + {"asymp;", "≈"}, + {""}, + {""}, + {""}, +#line 530 "HTMLCharacterReference.gperf" + {"Sscr;", "𝒮"}, + {""}, + {""}, + {""}, + {""}, +#line 286 "HTMLCharacterReference.gperf" + {"LeftFloor;", "⌊"}, +#line 2180 "HTMLCharacterReference.gperf" + {"vnsup;", "⊃⃒"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1048 "HTMLCharacterReference.gperf" + {"eqcolon;", "≕"}, + {""}, +#line 1236 "HTMLCharacterReference.gperf" + {"iocy;", "ё"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 842 "HTMLCharacterReference.gperf" + {"capand;", "⩄"}, + {""}, + {""}, + {""}, + {""}, +#line 1585 "HTMLCharacterReference.gperf" + {"nsimeq;", "≄"}, + {""}, +#line 542 "HTMLCharacterReference.gperf" + {"Superset;", "⊃"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1620 "HTMLCharacterReference.gperf" + {"nvge;", "≥⃒"}, +#line 1291 "HTMLCharacterReference.gperf" + {"larrb;", "⇤"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1463 "HTMLCharacterReference.gperf" + {"mstpos;", "∾"}, + {""}, +#line 1577 "HTMLCharacterReference.gperf" + {"nsc;", "⊁"}, +#line 246 "HTMLCharacterReference.gperf" + {"Iscr;", "ℐ"}, +#line 855 "HTMLCharacterReference.gperf" + {"ccups;", "⩌"}, +#line 104 "HTMLCharacterReference.gperf" + {"Dcaron;", "Ď"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1511 "HTMLCharacterReference.gperf" + {"nexists;", "∄"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 660 "HTMLCharacterReference.gperf" + {"acd;", "∿"}, + {""}, +#line 1210 "HTMLCharacterReference.gperf" + {"igrave", "ì"}, +#line 1211 "HTMLCharacterReference.gperf" + {"igrave;", "ì"}, + {""}, + {""}, +#line 1580 "HTMLCharacterReference.gperf" + {"nscr;", "𝓃"}, + {""}, + {""}, + {""}, +#line 1481 "HTMLCharacterReference.gperf" + {"nap;", "≉"}, + {""}, + {""}, + {""}, + {""}, +#line 691 "HTMLCharacterReference.gperf" + {"angmsdad;", "⦫"}, + {""}, + {""}, + {""}, +#line 509 "HTMLCharacterReference.gperf" + {"Sc;", "⪼"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 635 "HTMLCharacterReference.gperf" + {"YIcy;", "Ї"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1591 "HTMLCharacterReference.gperf" + {"nsubE;", "⫅̸"}, + {""}, + {""}, + {""}, +#line 159 "HTMLCharacterReference.gperf" + {"Edot;", "Ė"}, + {""}, + {""}, + {""}, + {""}, +#line 654 "HTMLCharacterReference.gperf" + {"Zscr;", "𝒵"}, + {""}, + {""}, + {""}, + {""}, +#line 636 "HTMLCharacterReference.gperf" + {"YUcy;", "Ю"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 278 "HTMLCharacterReference.gperf" + {"LeftArrow;", "←"}, +#line 815 "HTMLCharacterReference.gperf" + {"boxur;", "└"}, + {""}, + {""}, +#line 1443 "HTMLCharacterReference.gperf" + {"mfr;", "𝔪"}, +#line 197 "HTMLCharacterReference.gperf" + {"Gdot;", "Ġ"}, + {""}, +#line 43 "HTMLCharacterReference.gperf" + {"Assign;", "≔"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 279 "HTMLCharacterReference.gperf" + {"LeftArrowBar;", "⇤"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 532 "HTMLCharacterReference.gperf" + {"Sub;", "⋐"}, +#line 615 "HTMLCharacterReference.gperf" + {"Vert;", "‖"}, + {""}, + {""}, + {""}, +#line 263 "HTMLCharacterReference.gperf" + {"Kfr;", "𝔎"}, + {""}, + {""}, + {""}, + {""}, +#line 688 "HTMLCharacterReference.gperf" + {"angmsdaa;", "⦨"}, + {""}, + {""}, + {""}, + {""}, +#line 94 "HTMLCharacterReference.gperf" + {"Cup;", "⋓"}, + {""}, +#line 825 "HTMLCharacterReference.gperf" + {"brvbar", "¦"}, +#line 826 "HTMLCharacterReference.gperf" + {"brvbar;", "¦"}, + {""}, + {""}, + {""}, +#line 1050 "HTMLCharacterReference.gperf" + {"eqslantgtr;", "⪖"}, +#line 510 "HTMLCharacterReference.gperf" + {"Scaron;", "Š"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 783 "HTMLCharacterReference.gperf" + {"boxH;", "═"}, + {""}, + {""}, +#line 500 "HTMLCharacterReference.gperf" + {"RoundImplies;", "⥰"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 502 "HTMLCharacterReference.gperf" + {"Rscr;", "ℛ"}, + {""}, + {""}, + {""}, +#line 711 "HTMLCharacterReference.gperf" + {"approxeq;", "≊"}, + {""}, +#line 1466 "HTMLCharacterReference.gperf" + {"mumap;", "⊸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 173 "HTMLCharacterReference.gperf" + {"Escr;", "ℰ"}, + {""}, + {""}, + {""}, +#line 1142 "HTMLCharacterReference.gperf" + {"gnE;", "≩"}, + {""}, +#line 814 "HTMLCharacterReference.gperf" + {"boxul;", "┘"}, + {""}, + {""}, +#line 1367 "HTMLCharacterReference.gperf" + {"lnE;", "≨"}, +#line 2219 "HTMLCharacterReference.gperf" + {"xscr;", "𝓍"}, + {""}, + {""}, + {""}, + {""}, +#line 1517 "HTMLCharacterReference.gperf" + {"ngeqslant;", "⩾̸"}, + {""}, + {""}, + {""}, +#line 705 "HTMLCharacterReference.gperf" + {"apE;", "⩰"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 208 "HTMLCharacterReference.gperf" + {"Gscr;", "𝒢"}, + {""}, +#line 1494 "HTMLCharacterReference.gperf" + {"ncaron;", "ň"}, + {""}, + {""}, +#line 1493 "HTMLCharacterReference.gperf" + {"ncap;", "⩃"}, + {""}, + {""}, + {""}, +#line 150 "HTMLCharacterReference.gperf" + {"ENG;", "Ŋ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2223 "HTMLCharacterReference.gperf" + {"xvee;", "⋁"}, + {""}, + {""}, +#line 375 "HTMLCharacterReference.gperf" + {"NotLessSlantEqual;", "⩽̸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2244 "HTMLCharacterReference.gperf" + {"zeta;", "ζ"}, + {""}, + {""}, +#line 371 "HTMLCharacterReference.gperf" + {"NotLess;", "≮"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 465 "HTMLCharacterReference.gperf" + {"Rarr;", "↠"}, + {""}, +#line 647 "HTMLCharacterReference.gperf" + {"Zcaron;", "Ž"}, + {""}, + {""}, + {""}, +#line 813 "HTMLCharacterReference.gperf" + {"boxuR;", "╘"}, + {""}, +#line 2230 "HTMLCharacterReference.gperf" + {"yen", "¥"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2231 "HTMLCharacterReference.gperf" + {"yen;", "¥"}, + {""}, + {""}, +#line 1193 "HTMLCharacterReference.gperf" + {"horbar;", "―"}, + {""}, +#line 690 "HTMLCharacterReference.gperf" + {"angmsdac;", "⦪"}, +#line 556 "HTMLCharacterReference.gperf" + {"Therefore;", "∴"}, + {""}, + {""}, + {""}, + {""}, +#line 1301 "HTMLCharacterReference.gperf" + {"late;", "⪭"}, +#line 1484 "HTMLCharacterReference.gperf" + {"napos;", "ʼn"}, + {""}, + {""}, +#line 613 "HTMLCharacterReference.gperf" + {"Vee;", "⋁"}, +#line 99 "HTMLCharacterReference.gperf" + {"DScy;", "Ѕ"}, + {""}, + {""}, + {""}, +#line 1190 "HTMLCharacterReference.gperf" + {"hookleftarrow;", "↩"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 466 "HTMLCharacterReference.gperf" + {"Rarrtl;", "⤖"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1381 "HTMLCharacterReference.gperf" + {"looparrowleft;", "↫"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 467 "HTMLCharacterReference.gperf" + {"Rcaron;", "Ř"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 403 "HTMLCharacterReference.gperf" + {"Nscr;", "𝒩"}, + {""}, + {""}, +#line 1161 "HTMLCharacterReference.gperf" + {"gtquest;", "⩼"}, +#line 454 "HTMLCharacterReference.gperf" + {"Psi;", "Ψ"}, + {""}, + {""}, +#line 155 "HTMLCharacterReference.gperf" + {"Ecaron;", "Ě"}, +#line 1418 "HTMLCharacterReference.gperf" + {"ltquest;", "⩻"}, + {""}, + {""}, +#line 848 "HTMLCharacterReference.gperf" + {"caret;", "⁁"}, + {""}, + {""}, + {""}, +#line 1205 "HTMLCharacterReference.gperf" + {"iecy;", "е"}, + {""}, + {""}, + {""}, + {""}, +#line 2201 "HTMLCharacterReference.gperf" + {"xcap;", "⋂"}, +#line 718 "HTMLCharacterReference.gperf" + {"atilde", "ã"}, +#line 719 "HTMLCharacterReference.gperf" + {"atilde;", "ã"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1601 "HTMLCharacterReference.gperf" + {"nsupset;", "⊃⃒"}, +#line 55 "HTMLCharacterReference.gperf" + {"Bfr;", "𝔅"}, +#line 221 "HTMLCharacterReference.gperf" + {"HumpEqual;", "≏"}, + {""}, +#line 605 "HTMLCharacterReference.gperf" + {"Utilde;", "Ũ"}, + {""}, + {""}, +#line 1011 "HTMLCharacterReference.gperf" + {"ecir;", "≖"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 75 "HTMLCharacterReference.gperf" + {"Cfr;", "ℭ"}, + {""}, + {""}, + {""}, +#line 856 "HTMLCharacterReference.gperf" + {"ccupssm;", "⩐"}, + {""}, +#line 1313 "HTMLCharacterReference.gperf" + {"lcub;", "{"}, +#line 2179 "HTMLCharacterReference.gperf" + {"vnsub;", "⊂⃒"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1063 "HTMLCharacterReference.gperf" + {"eth", "ð"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1064 "HTMLCharacterReference.gperf" + {"eth;", "ð"}, +#line 1498 "HTMLCharacterReference.gperf" + {"ncup;", "⩂"}, + {""}, + {""}, +#line 1138 "HTMLCharacterReference.gperf" + {"gl;", "≷"}, + {""}, +#line 42 "HTMLCharacterReference.gperf" + {"Ascr;", "𝒜"}, + {""}, + {""}, +#line 1359 "HTMLCharacterReference.gperf" + {"ll;", "≪"}, + {""}, +#line 100 "HTMLCharacterReference.gperf" + {"DZcy;", "Џ"}, +#line 232 "HTMLCharacterReference.gperf" + {"Igrave", "Ì"}, +#line 233 "HTMLCharacterReference.gperf" + {"Igrave;", "Ì"}, +#line 1025 "HTMLCharacterReference.gperf" + {"el;", "⪙"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1028 "HTMLCharacterReference.gperf" + {"els;", "⪕"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2243 "HTMLCharacterReference.gperf" + {"zeetrf;", "ℨ"}, + {""}, +#line 439 "HTMLCharacterReference.gperf" + {"Phi;", "Φ"}, +#line 1382 "HTMLCharacterReference.gperf" + {"looparrowright;", "↬"}, +#line 359 "HTMLCharacterReference.gperf" + {"NotGreater;", "≯"}, +#line 59 "HTMLCharacterReference.gperf" + {"Bumpeq;", "≎"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1140 "HTMLCharacterReference.gperf" + {"gla;", "⪥"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 449 "HTMLCharacterReference.gperf" + {"Prime;", "″"}, + {""}, +#line 1246 "HTMLCharacterReference.gperf" + {"isindot;", "⋵"}, + {""}, +#line 1137 "HTMLCharacterReference.gperf" + {"gjcy;", "ѓ"}, + {""}, + {""}, + {""}, + {""}, +#line 1358 "HTMLCharacterReference.gperf" + {"ljcy;", "љ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 969 "HTMLCharacterReference.gperf" + {"div;", "÷"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 962 "HTMLCharacterReference.gperf" + {"diam;", "⋄"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1597 "HTMLCharacterReference.gperf" + {"nsucceq;", "⪰̸"}, + {""}, + {""}, +#line 1302 "HTMLCharacterReference.gperf" + {"lates;", "⪭︀"}, + {""}, +#line 1292 "HTMLCharacterReference.gperf" + {"larrbfs;", "⤟"}, + {""}, +#line 516 "HTMLCharacterReference.gperf" + {"ShortLeftArrow;", "←"}, +#line 779 "HTMLCharacterReference.gperf" + {"boxDL;", "╗"}, +#line 2194 "HTMLCharacterReference.gperf" + {"weierp;", "℘"}, +#line 1485 "HTMLCharacterReference.gperf" + {"napprox;", "≉"}, +#line 1402 "HTMLCharacterReference.gperf" + {"lsh;", "↰"}, +#line 506 "HTMLCharacterReference.gperf" + {"SHcy;", "Ш"}, + {""}, +#line 337 "HTMLCharacterReference.gperf" + {"Ncaron;", "Ň"}, +#line 206 "HTMLCharacterReference.gperf" + {"GreaterSlantEqual;", "⩾"}, + {""}, +#line 974 "HTMLCharacterReference.gperf" + {"djcy;", "ђ"}, +#line 1399 "HTMLCharacterReference.gperf" + {"lrtri;", "⊿"}, +#line 1226 "HTMLCharacterReference.gperf" + {"incare;", "℅"}, + {""}, +#line 869 "HTMLCharacterReference.gperf" + {"cir;", "○"}, + {""}, +#line 968 "HTMLCharacterReference.gperf" + {"disin;", "⋲"}, + {""}, + {""}, + {""}, + {""}, +#line 1422 "HTMLCharacterReference.gperf" + {"ltrif;", "◂"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1172 "HTMLCharacterReference.gperf" + {"hairsp;", " "}, + {""}, + {""}, + {""}, +#line 1360 "HTMLCharacterReference.gperf" + {"llarr;", "⇇"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 953 "HTMLCharacterReference.gperf" + {"ddotseq;", "⩷"}, + {""}, +#line 1081 "HTMLCharacterReference.gperf" + {"flat;", "♭"}, + {""}, + {""}, + {""}, + {""}, +#line 2203 "HTMLCharacterReference.gperf" + {"xcup;", "⋃"}, + {""}, +#line 193 "HTMLCharacterReference.gperf" + {"Gbreve;", "Ğ"}, + {""}, + {""}, + {""}, +#line 1114 "HTMLCharacterReference.gperf" + {"gamma;", "γ"}, + {""}, + {""}, + {""}, + {""}, +#line 999 "HTMLCharacterReference.gperf" + {"dtrif;", "▾"}, +#line 1251 "HTMLCharacterReference.gperf" + {"itilde;", "ĩ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 161 "HTMLCharacterReference.gperf" + {"Egrave", "È"}, +#line 162 "HTMLCharacterReference.gperf" + {"Egrave;", "È"}, + {""}, + {""}, +#line 629 "HTMLCharacterReference.gperf" + {"Wscr;", "𝒲"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2242 "HTMLCharacterReference.gperf" + {"zdot;", "ż"}, + {""}, + {""}, + {""}, + {""}, +#line 1602 "HTMLCharacterReference.gperf" + {"nsupseteq;", "⊉"}, +#line 1603 "HTMLCharacterReference.gperf" + {"nsupseteqq;", "⫆̸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 645 "HTMLCharacterReference.gperf" + {"ZHcy;", "Ж"}, +#line 735 "HTMLCharacterReference.gperf" + {"bcong;", "≌"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1212 "HTMLCharacterReference.gperf" + {"ii;", "ⅈ"}, + {""}, + {""}, + {""}, +#line 87 "HTMLCharacterReference.gperf" + {"Conint;", "∯"}, + {""}, +#line 455 "HTMLCharacterReference.gperf" + {"QUOT", "\""}, +#line 456 "HTMLCharacterReference.gperf" + {"QUOT;", "\""}, +#line 1118 "HTMLCharacterReference.gperf" + {"gcirc;", "ĝ"}, + {""}, + {""}, + {""}, + {""}, +#line 2183 "HTMLCharacterReference.gperf" + {"vrtri;", "⊳"}, + {""}, + {""}, + {""}, +#line 1012 "HTMLCharacterReference.gperf" + {"ecirc", "ê"}, +#line 1013 "HTMLCharacterReference.gperf" + {"ecirc;", "ê"}, + {""}, + {""}, + {""}, + {""}, +#line 854 "HTMLCharacterReference.gperf" + {"ccirc;", "ĉ"}, + {""}, + {""}, + {""}, +#line 661 "HTMLCharacterReference.gperf" + {"acirc", "â"}, +#line 662 "HTMLCharacterReference.gperf" + {"acirc;", "â"}, +#line 598 "HTMLCharacterReference.gperf" + {"Updownarrow;", "⇕"}, + {""}, + {""}, + {""}, +#line 1255 "HTMLCharacterReference.gperf" + {"jcirc;", "ĵ"}, +#line 892 "HTMLCharacterReference.gperf" + {"compfn;", "∘"}, + {""}, + {""}, +#line 574 "HTMLCharacterReference.gperf" + {"Ucirc", "Û"}, +#line 575 "HTMLCharacterReference.gperf" + {"Ucirc;", "Û"}, + {""}, + {""}, + {""}, +#line 566 "HTMLCharacterReference.gperf" + {"Tscr;", "𝒯"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 788 "HTMLCharacterReference.gperf" + {"boxUL;", "╝"}, +#line 374 "HTMLCharacterReference.gperf" + {"NotLessLess;", "≪̸"}, + {""}, +#line 445 "HTMLCharacterReference.gperf" + {"Precedes;", "≺"}, +#line 599 "HTMLCharacterReference.gperf" + {"UpperLeftArrow;", "↖"}, + {""}, +#line 706 "HTMLCharacterReference.gperf" + {"apacir;", "⩯"}, + {""}, + {""}, +#line 709 "HTMLCharacterReference.gperf" + {"apos;", "'"}, +#line 1180 "HTMLCharacterReference.gperf" + {"hcirc;", "ĥ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1228 "HTMLCharacterReference.gperf" + {"infintie;", "⧝"}, +#line 453 "HTMLCharacterReference.gperf" + {"Pscr;", "𝒫"}, + {""}, +#line 2175 "HTMLCharacterReference.gperf" + {"verbar;", "|"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2249 "HTMLCharacterReference.gperf" + {"zscr;", "𝓏"}, + {""}, +#line 585 "HTMLCharacterReference.gperf" + {"UnderParenthesis;", "⏝"}, + {""}, +#line 966 "HTMLCharacterReference.gperf" + {"die;", "¨"}, +#line 871 "HTMLCharacterReference.gperf" + {"circ;", "ˆ"}, + {""}, + {""}, +#line 310 "HTMLCharacterReference.gperf" + {"Ll;", "⋘"}, + {""}, + {""}, +#line 965 "HTMLCharacterReference.gperf" + {"diams;", "♦"}, +#line 799 "HTMLCharacterReference.gperf" + {"boxbox;", "⧉"}, + {""}, + {""}, +#line 1315 "HTMLCharacterReference.gperf" + {"ldca;", "⤶"}, +#line 767 "HTMLCharacterReference.gperf" + {"blank;", "␣"}, + {""}, + {""}, + {""}, + {""}, +#line 1396 "HTMLCharacterReference.gperf" + {"lrhar;", "⇋"}, + {""}, + {""}, + {""}, +#line 895 "HTMLCharacterReference.gperf" + {"cong;", "≅"}, +#line 1483 "HTMLCharacterReference.gperf" + {"napid;", "≋̸"}, + {""}, + {""}, + {""}, +#line 1322 "HTMLCharacterReference.gperf" + {"leftarrow;", "←"}, + {""}, +#line 1164 "HTMLCharacterReference.gperf" + {"gtrdot;", "⋗"}, + {""}, + {""}, + {""}, +#line 2190 "HTMLCharacterReference.gperf" + {"wcirc;", "ŵ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 923 "HTMLCharacterReference.gperf" + {"cups;", "∪︀"}, +#line 803 "HTMLCharacterReference.gperf" + {"boxdr;", "┌"}, +#line 921 "HTMLCharacterReference.gperf" + {"cupdot;", "⊍"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 27 "HTMLCharacterReference.gperf" + {"Abreve;", "Ă"}, + {""}, + {""}, +#line 280 "HTMLCharacterReference.gperf" + {"LeftArrowRightArrow;", "⇆"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1282 "HTMLCharacterReference.gperf" + {"lagran;", "ℒ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 990 "HTMLCharacterReference.gperf" + {"drbkarow;", "⤐"}, + {""}, + {""}, + {""}, +#line 525 "HTMLCharacterReference.gperf" + {"SquareSubset;", "⊏"}, + {""}, +#line 623 "HTMLCharacterReference.gperf" + {"Vscr;", "𝒱"}, +#line 771 "HTMLCharacterReference.gperf" + {"block;", "█"}, + {""}, + {""}, +#line 323 "HTMLCharacterReference.gperf" + {"Lsh;", "↰"}, + {""}, + {""}, +#line 185 "HTMLCharacterReference.gperf" + {"ForAll;", "∀"}, + {""}, + {""}, + {""}, + {""}, +#line 844 "HTMLCharacterReference.gperf" + {"capcap;", "⩋"}, + {""}, + {""}, + {""}, +#line 32 "HTMLCharacterReference.gperf" + {"Agrave", "À"}, +#line 33 "HTMLCharacterReference.gperf" + {"Agrave;", "À"}, + {""}, + {""}, + {""}, +#line 1001 "HTMLCharacterReference.gperf" + {"duhar;", "⥯"}, +#line 1619 "HTMLCharacterReference.gperf" + {"nvdash;", "⊬"}, + {""}, + {""}, + {""}, +#line 786 "HTMLCharacterReference.gperf" + {"boxHd;", "╤"}, +#line 1115 "HTMLCharacterReference.gperf" + {"gammad;", "ϝ"}, + {""}, +#line 377 "HTMLCharacterReference.gperf" + {"NotNestedGreaterGreater;", "⪢̸"}, + {""}, + {""}, +#line 552 "HTMLCharacterReference.gperf" + {"Tcaron;", "Ť"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1353 "HTMLCharacterReference.gperf" + {"lgE;", "⪑"}, +#line 54 "HTMLCharacterReference.gperf" + {"Beta;", "Β"}, +#line 213 "HTMLCharacterReference.gperf" + {"Hcirc;", "Ĥ"}, + {""}, +#line 378 "HTMLCharacterReference.gperf" + {"NotNestedLessLess;", "⪡̸"}, +#line 551 "HTMLCharacterReference.gperf" + {"Tau;", "Τ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 802 "HTMLCharacterReference.gperf" + {"boxdl;", "┐"}, +#line 2172 "HTMLCharacterReference.gperf" + {"veebar;", "⊻"}, +#line 526 "HTMLCharacterReference.gperf" + {"SquareSubsetEqual;", "⊑"}, + {""}, +#line 1202 "HTMLCharacterReference.gperf" + {"icirc", "î"}, +#line 1203 "HTMLCharacterReference.gperf" + {"icirc;", "î"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 529 "HTMLCharacterReference.gperf" + {"SquareUnion;", "⊔"}, + {""}, +#line 1144 "HTMLCharacterReference.gperf" + {"gnapprox;", "⪊"}, +#line 1579 "HTMLCharacterReference.gperf" + {"nsce;", "⪰̸"}, + {""}, + {""}, + {""}, +#line 1369 "HTMLCharacterReference.gperf" + {"lnapprox;", "⪉"}, + {""}, + {""}, +#line 2240 "HTMLCharacterReference.gperf" + {"zcaron;", "ž"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1766 "HTMLCharacterReference.gperf" + {"qint;", "⨌"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 639 "HTMLCharacterReference.gperf" + {"Ycirc;", "Ŷ"}, + {""}, + {""}, + {""}, +#line 2235 "HTMLCharacterReference.gperf" + {"yscr;", "𝓎"}, +#line 1355 "HTMLCharacterReference.gperf" + {"lharu;", "↼"}, + {""}, + {""}, + {""}, + {""}, +#line 700 "HTMLCharacterReference.gperf" + {"angst;", "Å"}, +#line 1229 "HTMLCharacterReference.gperf" + {"inodot;", "ı"}, + {""}, + {""}, +#line 527 "HTMLCharacterReference.gperf" + {"SquareSuperset;", "⊐"}, +#line 904 "HTMLCharacterReference.gperf" + {"cross;", "✗"}, + {""}, + {""}, + {""}, +#line 528 "HTMLCharacterReference.gperf" + {"SquareSupersetEqual;", "⊒"}, +#line 1083 "HTMLCharacterReference.gperf" + {"fltns;", "▱"}, + {""}, +#line 631 "HTMLCharacterReference.gperf" + {"Xi;", "Ξ"}, +#line 571 "HTMLCharacterReference.gperf" + {"Uarrocir;", "⥉"}, + {""}, +#line 801 "HTMLCharacterReference.gperf" + {"boxdR;", "╒"}, +#line 1356 "HTMLCharacterReference.gperf" + {"lharul;", "⥪"}, + {""}, +#line 843 "HTMLCharacterReference.gperf" + {"capbrcup;", "⩉"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 40 "HTMLCharacterReference.gperf" + {"Aring", "Å"}, +#line 41 "HTMLCharacterReference.gperf" + {"Aring;", "Å"}, + {""}, + {""}, + {""}, + {""}, +#line 1108 "HTMLCharacterReference.gperf" + {"frasl;", "⁄"}, + {""}, + {""}, + {""}, + {""}, +#line 1316 "HTMLCharacterReference.gperf" + {"ldquo;", "“"}, +#line 1317 "HTMLCharacterReference.gperf" + {"ldquor;", "„"}, +#line 1569 "HTMLCharacterReference.gperf" + {"npreceq;", "⪯̸"}, + {""}, +#line 1341 "HTMLCharacterReference.gperf" + {"lesg;", "⋚︀"}, +#line 828 "HTMLCharacterReference.gperf" + {"bsemi;", "⁏"}, +#line 1634 "HTMLCharacterReference.gperf" + {"nwnear;", "⤧"}, +#line 717 "HTMLCharacterReference.gperf" + {"asympeq;", "≍"}, + {""}, + {""}, +#line 818 "HTMLCharacterReference.gperf" + {"boxvL;", "╡"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1385 "HTMLCharacterReference.gperf" + {"loplus;", "⨭"}, + {""}, + {""}, +#line 681 "HTMLCharacterReference.gperf" + {"andd;", "⩜"}, + {""}, +#line 247 "HTMLCharacterReference.gperf" + {"Itilde;", "Ĩ"}, + {""}, + {""}, + {""}, + {""}, +#line 205 "HTMLCharacterReference.gperf" + {"GreaterLess;", "≷"}, + {""}, + {""}, +#line 984 "HTMLCharacterReference.gperf" + {"dotsquare;", "⊡"}, + {""}, +#line 920 "HTMLCharacterReference.gperf" + {"cupcup;", "⩊"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1605 "HTMLCharacterReference.gperf" + {"ntilde", "ñ"}, +#line 1606 "HTMLCharacterReference.gperf" + {"ntilde;", "ñ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1593 "HTMLCharacterReference.gperf" + {"nsubset;", "⊂⃒"}, + {""}, + {""}, + {""}, +#line 1397 "HTMLCharacterReference.gperf" + {"lrhard;", "⥭"}, + {""}, +#line 1232 "HTMLCharacterReference.gperf" + {"integers;", "ℤ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 557 "HTMLCharacterReference.gperf" + {"Theta;", "Θ"}, + {""}, + {""}, +#line 1267 "HTMLCharacterReference.gperf" + {"kfr;", "𝔨"}, + {""}, + {""}, + {""}, +#line 388 "HTMLCharacterReference.gperf" + {"NotSquareSuperset;", "⊐̸"}, + {""}, + {""}, + {""}, + {""}, +#line 389 "HTMLCharacterReference.gperf" + {"NotSquareSupersetEqual;", "⋣"}, + {""}, +#line 1288 "HTMLCharacterReference.gperf" + {"laquo", "«"}, +#line 1289 "HTMLCharacterReference.gperf" + {"laquo;", "«"}, +#line 872 "HTMLCharacterReference.gperf" + {"circeq;", "≗"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1525 "HTMLCharacterReference.gperf" + {"ni;", "∋"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1526 "HTMLCharacterReference.gperf" + {"nis;", "⋼"}, +#line 301 "HTMLCharacterReference.gperf" + {"Leftarrow;", "⇐"}, + {""}, + {""}, + {""}, +#line 982 "HTMLCharacterReference.gperf" + {"dotminus;", "∸"}, +#line 1462 "HTMLCharacterReference.gperf" + {"mscr;", "𝓂"}, + {""}, + {""}, + {""}, +#line 1433 "HTMLCharacterReference.gperf" + {"map;", "↦"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1326 "HTMLCharacterReference.gperf" + {"leftleftarrows;", "⇇"}, + {""}, +#line 1400 "HTMLCharacterReference.gperf" + {"lsaquo;", "‹"}, + {""}, +#line 1528 "HTMLCharacterReference.gperf" + {"niv;", "∋"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 265 "HTMLCharacterReference.gperf" + {"Kscr;", "𝒦"}, + {""}, + {""}, + {""}, + {""}, +#line 363 "HTMLCharacterReference.gperf" + {"NotGreaterLess;", "≹"}, + {""}, +#line 120 "HTMLCharacterReference.gperf" + {"DoubleContourIntegral;", "∯"}, + {""}, +#line 76 "HTMLCharacterReference.gperf" + {"Chi;", "Χ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 515 "HTMLCharacterReference.gperf" + {"ShortDownArrow;", "↓"}, +#line 737 "HTMLCharacterReference.gperf" + {"bdquo;", "„"}, + {""}, + {""}, +#line 809 "HTMLCharacterReference.gperf" + {"boxminus;", "⊟"}, +#line 549 "HTMLCharacterReference.gperf" + {"TScy;", "Ц"}, +#line 1542 "HTMLCharacterReference.gperf" + {"nlsim;", "≴"}, + {""}, +#line 132 "HTMLCharacterReference.gperf" + {"DoubleUpDownArrow;", "⇕"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1529 "HTMLCharacterReference.gperf" + {"njcy;", "њ"}, +#line 1496 "HTMLCharacterReference.gperf" + {"ncong;", "≇"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2191 "HTMLCharacterReference.gperf" + {"wedbar;", "⩟"}, + {""}, + {""}, +#line 2246 "HTMLCharacterReference.gperf" + {"zhcy;", "ж"}, +#line 57 "HTMLCharacterReference.gperf" + {"Breve;", "˘"}, + {""}, +#line 1111 "HTMLCharacterReference.gperf" + {"gE;", "≧"}, +#line 1543 "HTMLCharacterReference.gperf" + {"nlt;", "≮"}, +#line 2040 "HTMLCharacterReference.gperf" + {"tbrk;", "⎴"}, + {""}, +#line 730 "HTMLCharacterReference.gperf" + {"barvee;", "⊽"}, +#line 1277 "HTMLCharacterReference.gperf" + {"lE;", "≦"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1119 "HTMLCharacterReference.gperf" + {"gcy;", "г"}, + {""}, +#line 251 "HTMLCharacterReference.gperf" + {"Jcirc;", "Ĵ"}, + {""}, + {""}, +#line 1314 "HTMLCharacterReference.gperf" + {"lcy;", "л"}, + {""}, + {""}, + {""}, + {""}, +#line 1015 "HTMLCharacterReference.gperf" + {"ecy;", "э"}, + {""}, + {""}, + {""}, +#line 597 "HTMLCharacterReference.gperf" + {"Uparrow;", "⇑"}, + {""}, +#line 932 "HTMLCharacterReference.gperf" + {"curvearrowleft;", "↶"}, +#line 1575 "HTMLCharacterReference.gperf" + {"nrtri;", "⋫"}, + {""}, + {""}, +#line 665 "HTMLCharacterReference.gperf" + {"acy;", "а"}, + {""}, + {""}, + {""}, + {""}, +#line 1256 "HTMLCharacterReference.gperf" + {"jcy;", "й"}, +#line 2236 "HTMLCharacterReference.gperf" + {"yucy;", "ю"}, + {""}, +#line 1128 "HTMLCharacterReference.gperf" + {"gesdot;", "⪀"}, + {""}, +#line 576 "HTMLCharacterReference.gperf" + {"Ucy;", "У"}, + {""}, +#line 1057 "HTMLCharacterReference.gperf" + {"erDot;", "≓"}, +#line 1338 "HTMLCharacterReference.gperf" + {"lesdot;", "⩿"}, + {""}, + {""}, + {""}, +#line 1532 "HTMLCharacterReference.gperf" + {"nlarr;", "↚"}, + {""}, + {""}, +#line 2067 "HTMLCharacterReference.gperf" + {"top;", "⊤"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1178 "HTMLCharacterReference.gperf" + {"harrw;", "↭"}, + {""}, +#line 2208 "HTMLCharacterReference.gperf" + {"xi;", "ξ"}, +#line 949 "HTMLCharacterReference.gperf" + {"dcy;", "д"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 583 "HTMLCharacterReference.gperf" + {"UnderBrace;", "⏟"}, +#line 1627 "HTMLCharacterReference.gperf" + {"nvrArr;", "⤃"}, +#line 584 "HTMLCharacterReference.gperf" + {"UnderBracket;", "⎵"}, +#line 1340 "HTMLCharacterReference.gperf" + {"lesdotor;", "⪃"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 512 "HTMLCharacterReference.gperf" + {"Scirc;", "Ŝ"}, +#line 1342 "HTMLCharacterReference.gperf" + {"lesges;", "⪓"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 751 "HTMLCharacterReference.gperf" + {"bigoplus;", "⨁"}, + {""}, + {""}, +#line 1477 "HTMLCharacterReference.gperf" + {"nVdash;", "⊮"}, +#line 1771 "HTMLCharacterReference.gperf" + {"quatint;", "⨖"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1594 "HTMLCharacterReference.gperf" + {"nsubseteq;", "⊈"}, +#line 1595 "HTMLCharacterReference.gperf" + {"nsubseteqq;", "⫅̸"}, + {""}, + {""}, + {""}, +#line 72 "HTMLCharacterReference.gperf" + {"Cdot;", "Ċ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 914 "HTMLCharacterReference.gperf" + {"cuesc;", "⋟"}, + {""}, + {""}, +#line 1073 "HTMLCharacterReference.gperf" + {"fcy;", "ф"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 762 "HTMLCharacterReference.gperf" + {"blacksquare;", "▪"}, + {""}, + {""}, +#line 227 "HTMLCharacterReference.gperf" + {"Icirc", "Î"}, +#line 228 "HTMLCharacterReference.gperf" + {"Icirc;", "Î"}, + {""}, + {""}, +#line 811 "HTMLCharacterReference.gperf" + {"boxtimes;", "⊠"}, + {""}, +#line 794 "HTMLCharacterReference.gperf" + {"boxVL;", "╣"}, + {""}, + {""}, +#line 2169 "HTMLCharacterReference.gperf" + {"vcy;", "в"}, +#line 2057 "HTMLCharacterReference.gperf" + {"thorn", "þ"}, +#line 2058 "HTMLCharacterReference.gperf" + {"thorn;", "þ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 503 "HTMLCharacterReference.gperf" + {"Rsh;", "↱"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 376 "HTMLCharacterReference.gperf" + {"NotLessTilde;", "≴"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 736 "HTMLCharacterReference.gperf" + {"bcy;", "б"}, + {""}, +#line 404 "HTMLCharacterReference.gperf" + {"Ntilde", "Ñ"}, +#line 405 "HTMLCharacterReference.gperf" + {"Ntilde;", "Ñ"}, + {""}, + {""}, +#line 708 "HTMLCharacterReference.gperf" + {"apid;", "≋"}, + {""}, +#line 1508 "HTMLCharacterReference.gperf" + {"nesear;", "⤨"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1003 "HTMLCharacterReference.gperf" + {"dzcy;", "џ"}, + {""}, +#line 1415 "HTMLCharacterReference.gperf" + {"lthree;", "⋋"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 23 "HTMLCharacterReference.gperf" + {"AMP", "&"}, + {""}, + {""}, + {""}, +#line 1438 "HTMLCharacterReference.gperf" + {"marker;", "▮"}, + {""}, +#line 24 "HTMLCharacterReference.gperf" + {"AMP;", "&"}, + {""}, +#line 1772 "HTMLCharacterReference.gperf" + {"quest;", "?"}, + {""}, + {""}, +#line 1534 "HTMLCharacterReference.gperf" + {"nle;", "≰"}, +#line 58 "HTMLCharacterReference.gperf" + {"Bscr;", "ℬ"}, +#line 2210 "HTMLCharacterReference.gperf" + {"xlarr;", "⟵"}, +#line 1024 "HTMLCharacterReference.gperf" + {"egsdot;", "⪘"}, +#line 289 "HTMLCharacterReference.gperf" + {"LeftTee;", "⊣"}, +#line 1278 "HTMLCharacterReference.gperf" + {"lEg;", "⪋"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 963 "HTMLCharacterReference.gperf" + {"diamond;", "⋄"}, +#line 2046 "HTMLCharacterReference.gperf" + {"tfr;", "𝔱"}, +#line 93 "HTMLCharacterReference.gperf" + {"Cscr;", "𝒞"}, + {""}, + {""}, + {""}, +#line 64 "HTMLCharacterReference.gperf" + {"Cap;", "⋒"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2222 "HTMLCharacterReference.gperf" + {"xutri;", "△"}, + {""}, + {""}, +#line 180 "HTMLCharacterReference.gperf" + {"Fcy;", "Ф"}, +#line 1182 "HTMLCharacterReference.gperf" + {"heartsuit;", "♥"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 44 "HTMLCharacterReference.gperf" + {"Atilde", "Ã"}, +#line 45 "HTMLCharacterReference.gperf" + {"Atilde;", "Ã"}, + {""}, +#line 1204 "HTMLCharacterReference.gperf" + {"icy;", "и"}, + {""}, + {""}, +#line 1197 "HTMLCharacterReference.gperf" + {"hybull;", "⁃"}, + {""}, + {""}, + {""}, +#line 1556 "HTMLCharacterReference.gperf" + {"notni;", "∌"}, + {""}, + {""}, + {""}, +#line 880 "HTMLCharacterReference.gperf" + {"cire;", "≗"}, +#line 191 "HTMLCharacterReference.gperf" + {"Gamma;", "Γ"}, + {""}, + {""}, + {""}, +#line 2072 "HTMLCharacterReference.gperf" + {"tosa;", "⤩"}, +#line 1276 "HTMLCharacterReference.gperf" + {"lBarr;", "⤎"}, + {""}, + {""}, +#line 1497 "HTMLCharacterReference.gperf" + {"ncongdot;", "⩭̸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 640 "HTMLCharacterReference.gperf" + {"Ycy;", "Ы"}, + {""}, +#line 1005 "HTMLCharacterReference.gperf" + {"eDDot;", "⩷"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 967 "HTMLCharacterReference.gperf" + {"digamma;", "ϝ"}, + {""}, + {""}, +#line 1273 "HTMLCharacterReference.gperf" + {"lAarr;", "⇚"}, + {""}, + {""}, + {""}, +#line 156 "HTMLCharacterReference.gperf" + {"Ecirc", "Ê"}, +#line 157 "HTMLCharacterReference.gperf" + {"Ecirc;", "Ê"}, + {""}, + {""}, + {""}, +#line 1971 "HTMLCharacterReference.gperf" + {"star;", "☆"}, +#line 1018 "HTMLCharacterReference.gperf" + {"efDot;", "≒"}, + {""}, + {""}, + {""}, +#line 1274 "HTMLCharacterReference.gperf" + {"lArr;", "⇐"}, +#line 2202 "HTMLCharacterReference.gperf" + {"xcirc;", "◯"}, + {""}, + {""}, + {""}, + {""}, +#line 1136 "HTMLCharacterReference.gperf" + {"gimel;", "ℷ"}, + {""}, + {""}, + {""}, +#line 1949 "HTMLCharacterReference.gperf" + {"spar;", "∥"}, +#line 517 "HTMLCharacterReference.gperf" + {"ShortRightArrow;", "→"}, + {""}, + {""}, +#line 276 "HTMLCharacterReference.gperf" + {"Lcy;", "Л"}, + {""}, +#line 195 "HTMLCharacterReference.gperf" + {"Gcirc;", "Ĝ"}, + {""}, + {""}, +#line 2002 "HTMLCharacterReference.gperf" + {"sum;", "∑"}, +#line 2149 "HTMLCharacterReference.gperf" + {"vBar;", "⫨"}, + {""}, +#line 964 "HTMLCharacterReference.gperf" + {"diamondsuit;", "♦"}, +#line 240 "HTMLCharacterReference.gperf" + {"Intersection;", "⋂"}, + {""}, + {""}, +#line 2150 "HTMLCharacterReference.gperf" + {"vBarv;", "⫩"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1615 "HTMLCharacterReference.gperf" + {"numsp;", " "}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 939 "HTMLCharacterReference.gperf" + {"dArr;", "⇓"}, + {""}, +#line 1459 "HTMLCharacterReference.gperf" + {"models;", "⊧"}, +#line 1129 "HTMLCharacterReference.gperf" + {"gesdoto;", "⪂"}, +#line 2010 "HTMLCharacterReference.gperf" + {"sup;", "⊃"}, +#line 1171 "HTMLCharacterReference.gperf" + {"hArr;", "⇔"}, + {""}, +#line 1770 "HTMLCharacterReference.gperf" + {"quaternions;", "ℍ"}, +#line 1339 "HTMLCharacterReference.gperf" + {"lesdoto;", "⪁"}, +#line 1513 "HTMLCharacterReference.gperf" + {"ngE;", "≧̸"}, + {""}, + {""}, + {""}, + {""}, +#line 2004 "HTMLCharacterReference.gperf" + {"sup1", "¹"}, +#line 2005 "HTMLCharacterReference.gperf" + {"sup1;", "¹"}, +#line 1966 "HTMLCharacterReference.gperf" + {"srarr;", "→"}, + {""}, + {""}, +#line 2006 "HTMLCharacterReference.gperf" + {"sup2", "²"}, +#line 2007 "HTMLCharacterReference.gperf" + {"sup2;", "²"}, + {""}, + {""}, + {""}, +#line 2008 "HTMLCharacterReference.gperf" + {"sup3", "³"}, +#line 2009 "HTMLCharacterReference.gperf" + {"sup3;", "³"}, + {""}, + {""}, + {""}, +#line 327 "HTMLCharacterReference.gperf" + {"Mcy;", "М"}, + {""}, +#line 2033 "HTMLCharacterReference.gperf" + {"swarr;", "↙"}, + {""}, + {""}, +#line 131 "HTMLCharacterReference.gperf" + {"DoubleUpArrow;", "⇑"}, + {""}, + {""}, +#line 67 "HTMLCharacterReference.gperf" + {"Ccaron;", "Č"}, +#line 1564 "HTMLCharacterReference.gperf" + {"npolint;", "⨔"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1387 "HTMLCharacterReference.gperf" + {"lowast;", "∗"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 386 "HTMLCharacterReference.gperf" + {"NotSquareSubset;", "⊏̸"}, + {""}, + {""}, + {""}, +#line 2148 "HTMLCharacterReference.gperf" + {"vArr;", "⇕"}, +#line 387 "HTMLCharacterReference.gperf" + {"NotSquareSubsetEqual;", "⋢"}, + {""}, + {""}, + {""}, +#line 1537 "HTMLCharacterReference.gperf" + {"nleq;", "≰"}, +#line 1538 "HTMLCharacterReference.gperf" + {"nleqq;", "≦̸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 207 "HTMLCharacterReference.gperf" + {"GreaterTilde;", "≳"}, +#line 543 "HTMLCharacterReference.gperf" + {"SupersetEqual;", "⊇"}, + {""}, +#line 884 "HTMLCharacterReference.gperf" + {"clubs;", "♣"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 105 "HTMLCharacterReference.gperf" + {"Dcy;", "Д"}, + {""}, + {""}, + {""}, +#line 73 "HTMLCharacterReference.gperf" + {"Cedilla;", "¸"}, +#line 659 "HTMLCharacterReference.gperf" + {"acE;", "∾̳"}, + {""}, + {""}, +#line 624 "HTMLCharacterReference.gperf" + {"Vvdash;", "⊪"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 535 "HTMLCharacterReference.gperf" + {"Succeeds;", "≻"}, +#line 1533 "HTMLCharacterReference.gperf" + {"nldr;", "‥"}, + {""}, + {""}, + {""}, + {""}, +#line 258 "HTMLCharacterReference.gperf" + {"KHcy;", "Х"}, +#line 600 "HTMLCharacterReference.gperf" + {"UpperRightArrow;", "↗"}, + {""}, + {""}, + {""}, + {""}, +#line 1214 "HTMLCharacterReference.gperf" + {"iiint;", "∭"}, + {""}, +#line 423 "HTMLCharacterReference.gperf" + {"Or;", "⩔"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 211 "HTMLCharacterReference.gperf" + {"Hacek;", "ˇ"}, + {""}, + {""}, +#line 252 "HTMLCharacterReference.gperf" + {"Jcy;", "Й"}, +#line 1084 "HTMLCharacterReference.gperf" + {"fnof;", "ƒ"}, + {""}, +#line 192 "HTMLCharacterReference.gperf" + {"Gammad;", "Ϝ"}, + {""}, + {""}, +#line 2066 "HTMLCharacterReference.gperf" + {"toea;", "⤨"}, + {""}, + {""}, + {""}, +#line 1962 "HTMLCharacterReference.gperf" + {"squ;", "□"}, +#line 28 "HTMLCharacterReference.gperf" + {"Acirc", "Â"}, +#line 29 "HTMLCharacterReference.gperf" + {"Acirc;", "Â"}, +#line 2221 "HTMLCharacterReference.gperf" + {"xuplus;", "⨄"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 95 "HTMLCharacterReference.gperf" + {"CupCap;", "≍"}, + {""}, + {""}, + {""}, + {""}, +#line 2047 "HTMLCharacterReference.gperf" + {"there4;", "∴"}, + {""}, + {""}, +#line 1345 "HTMLCharacterReference.gperf" + {"lesseqgtr;", "⋚"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 440 "HTMLCharacterReference.gperf" + {"Pi;", "Π"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1423 "HTMLCharacterReference.gperf" + {"lurdshar;", "⥊"}, +#line 1149 "HTMLCharacterReference.gperf" + {"gopf;", "𝕘"}, + {""}, + {""}, + {""}, +#line 513 "HTMLCharacterReference.gperf" + {"Scy;", "С"}, +#line 1384 "HTMLCharacterReference.gperf" + {"lopf;", "𝕝"}, + {""}, +#line 976 "HTMLCharacterReference.gperf" + {"dlcrop;", "⌍"}, + {""}, + {""}, +#line 1040 "HTMLCharacterReference.gperf" + {"eopf;", "𝕖"}, + {""}, + {""}, + {""}, + {""}, +#line 898 "HTMLCharacterReference.gperf" + {"copf;", "𝕔"}, + {""}, + {""}, + {""}, + {""}, +#line 703 "HTMLCharacterReference.gperf" + {"aopf;", "𝕒"}, +#line 1912 "HTMLCharacterReference.gperf" + {"sharp;", "♯"}, +#line 760 "HTMLCharacterReference.gperf" + {"bkarow;", "⤍"}, + {""}, + {""}, +#line 1259 "HTMLCharacterReference.gperf" + {"jopf;", "𝕛"}, + {""}, + {""}, +#line 362 "HTMLCharacterReference.gperf" + {"NotGreaterGreater;", "≫̸"}, +#line 1910 "HTMLCharacterReference.gperf" + {"sfr;", "𝔰"}, +#line 589 "HTMLCharacterReference.gperf" + {"Uopf;", "𝕌"}, + {""}, +#line 975 "HTMLCharacterReference.gperf" + {"dlcorn;", "⌞"}, +#line 114 "HTMLCharacterReference.gperf" + {"Diamond;", "⋄"}, + {""}, + {""}, +#line 272 "HTMLCharacterReference.gperf" + {"Laplacetrf;", "ℒ"}, + {""}, + {""}, +#line 430 "HTMLCharacterReference.gperf" + {"Ouml", "Ö"}, +#line 431 "HTMLCharacterReference.gperf" + {"Ouml;", "Ö"}, + {""}, +#line 2215 "HTMLCharacterReference.gperf" + {"xoplus;", "⨁"}, + {""}, + {""}, + {""}, +#line 1486 "HTMLCharacterReference.gperf" + {"natur;", "♮"}, +#line 544 "HTMLCharacterReference.gperf" + {"Supset;", "⋑"}, + {""}, +#line 229 "HTMLCharacterReference.gperf" + {"Icy;", "И"}, +#line 978 "HTMLCharacterReference.gperf" + {"dopf;", "𝕕"}, + {""}, +#line 2220 "HTMLCharacterReference.gperf" + {"xsqcup;", "⨆"}, + {""}, +#line 1056 "HTMLCharacterReference.gperf" + {"eqvparsl;", "⧥"}, +#line 1192 "HTMLCharacterReference.gperf" + {"hopf;", "𝕙"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 299 "HTMLCharacterReference.gperf" + {"LeftVector;", "↼"}, +#line 1968 "HTMLCharacterReference.gperf" + {"ssetmn;", "∖"}, +#line 1633 "HTMLCharacterReference.gperf" + {"nwarrow;", "↖"}, + {""}, + {""}, +#line 360 "HTMLCharacterReference.gperf" + {"NotGreaterEqual;", "≱"}, + {""}, + {""}, +#line 1499 "HTMLCharacterReference.gperf" + {"ncy;", "н"}, + {""}, +#line 2021 "HTMLCharacterReference.gperf" + {"supne;", "⊋"}, + {""}, + {""}, + {""}, + {""}, +#line 2192 "HTMLCharacterReference.gperf" + {"wedge;", "∧"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1213 "HTMLCharacterReference.gperf" + {"iiiint;", "⨌"}, + {""}, + {""}, +#line 458 "HTMLCharacterReference.gperf" + {"Qopf;", "ℚ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1975 "HTMLCharacterReference.gperf" + {"strns;", "¯"}, +#line 2073 "HTMLCharacterReference.gperf" + {"tprime;", "‴"}, + {""}, + {""}, +#line 1085 "HTMLCharacterReference.gperf" + {"fopf;", "𝕗"}, + {""}, + {""}, + {""}, +#line 648 "HTMLCharacterReference.gperf" + {"Zcy;", "З"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2196 "HTMLCharacterReference.gperf" + {"wopf;", "𝕨"}, + {""}, + {""}, +#line 896 "HTMLCharacterReference.gperf" + {"congdot;", "⩭"}, + {""}, + {""}, +#line 625 "HTMLCharacterReference.gperf" + {"Wcirc;", "Ŵ"}, + {""}, + {""}, + {""}, +#line 2181 "HTMLCharacterReference.gperf" + {"vopf;", "𝕧"}, +#line 785 "HTMLCharacterReference.gperf" + {"boxHU;", "╩"}, +#line 1576 "HTMLCharacterReference.gperf" + {"nrtrie;", "⋭"}, +#line 750 "HTMLCharacterReference.gperf" + {"bigodot;", "⨀"}, + {""}, + {""}, +#line 186 "HTMLCharacterReference.gperf" + {"Fouriertrf;", "ℱ"}, + {""}, + {""}, + {""}, + {""}, +#line 1216 "HTMLCharacterReference.gperf" + {"iiota;", "℩"}, + {""}, + {""}, +#line 1281 "HTMLCharacterReference.gperf" + {"laemptyv;", "⦴"}, +#line 1284 "HTMLCharacterReference.gperf" + {"lang;", "⟨"}, + {""}, +#line 699 "HTMLCharacterReference.gperf" + {"angsph;", "∢"}, + {""}, + {""}, +#line 134 "HTMLCharacterReference.gperf" + {"DownArrow;", "↓"}, +#line 970 "HTMLCharacterReference.gperf" + {"divide", "÷"}, +#line 971 "HTMLCharacterReference.gperf" + {"divide;", "÷"}, + {""}, +#line 725 "HTMLCharacterReference.gperf" + {"backcong;", "≌"}, +#line 775 "HTMLCharacterReference.gperf" + {"bopf;", "𝕓"}, +#line 451 "HTMLCharacterReference.gperf" + {"Proportion;", "∷"}, + {""}, + {""}, +#line 1089 "HTMLCharacterReference.gperf" + {"fpartint;", "⨍"}, +#line 1539 "HTMLCharacterReference.gperf" + {"nleqslant;", "⩽̸"}, + {""}, +#line 1293 "HTMLCharacterReference.gperf" + {"larrfs;", "⤝"}, +#line 135 "HTMLCharacterReference.gperf" + {"DownArrowBar;", "⤓"}, + {""}, +#line 1036 "HTMLCharacterReference.gperf" + {"emsp;", " "}, + {""}, + {""}, +#line 677 "HTMLCharacterReference.gperf" + {"amp", "&"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 678 "HTMLCharacterReference.gperf" + {"amp;", "&"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 60 "HTMLCharacterReference.gperf" + {"CHcy;", "Ч"}, + {""}, +#line 1035 "HTMLCharacterReference.gperf" + {"emsp14;", " "}, + {""}, + {""}, +#line 2048 "HTMLCharacterReference.gperf" + {"therefore;", "∴"}, + {""}, +#line 2193 "HTMLCharacterReference.gperf" + {"wedgeq;", "≙"}, +#line 1553 "HTMLCharacterReference.gperf" + {"notinva;", "∉"}, + {""}, +#line 1068 "HTMLCharacterReference.gperf" + {"excl;", "!"}, + {""}, + {""}, + {""}, + {""}, +#line 1909 "HTMLCharacterReference.gperf" + {"sext;", "✶"}, + {""}, +#line 505 "HTMLCharacterReference.gperf" + {"SHCHcy;", "Щ"}, + {""}, +#line 469 "HTMLCharacterReference.gperf" + {"Rcy;", "Р"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2020 "HTMLCharacterReference.gperf" + {"supnE;", "⫌"}, + {""}, + {""}, +#line 158 "HTMLCharacterReference.gperf" + {"Ecy;", "Э"}, +#line 216 "HTMLCharacterReference.gperf" + {"Hopf;", "ℍ"}, + {""}, + {""}, + {""}, + {""}, +#line 184 "HTMLCharacterReference.gperf" + {"Fopf;", "𝔽"}, + {""}, + {""}, + {""}, + {""}, +#line 1905 "HTMLCharacterReference.gperf" + {"semi;", ";"}, + {""}, +#line 1034 "HTMLCharacterReference.gperf" + {"emsp13;", " "}, + {""}, + {""}, +#line 1238 "HTMLCharacterReference.gperf" + {"iopf;", "𝕚"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 196 "HTMLCharacterReference.gperf" + {"Gcy;", "Г"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1272 "HTMLCharacterReference.gperf" + {"kscr;", "𝓀"}, +#line 812 "HTMLCharacterReference.gperf" + {"boxuL;", "╛"}, + {""}, +#line 1862 "HTMLCharacterReference.gperf" + {"rpar;", ")"}, +#line 1361 "HTMLCharacterReference.gperf" + {"llcorner;", "⌞"}, +#line 642 "HTMLCharacterReference.gperf" + {"Yopf;", "𝕐"}, + {""}, +#line 1189 "HTMLCharacterReference.gperf" + {"homtht;", "∻"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 847 "HTMLCharacterReference.gperf" + {"caps;", "∩︀"}, +#line 2157 "HTMLCharacterReference.gperf" + {"varpi;", "ϖ"}, +#line 846 "HTMLCharacterReference.gperf" + {"capdot;", "⩀"}, + {""}, + {""}, + {""}, + {""}, +#line 614 "HTMLCharacterReference.gperf" + {"Verbar;", "‖"}, + {""}, +#line 414 "HTMLCharacterReference.gperf" + {"Ofr;", "𝔒"}, + {""}, + {""}, + {""}, + {""}, +#line 682 "HTMLCharacterReference.gperf" + {"andslope;", "⩘"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1570 "HTMLCharacterReference.gperf" + {"nrArr;", "⇏"}, + {""}, + {""}, + {""}, + {""}, +#line 1901 "HTMLCharacterReference.gperf" + {"searr;", "↘"}, +#line 1409 "HTMLCharacterReference.gperf" + {"lstrok;", "ł"}, +#line 2022 "HTMLCharacterReference.gperf" + {"supplus;", "⫀"}, + {""}, +#line 319 "HTMLCharacterReference.gperf" + {"Lopf;", "𝕃"}, +#line 1908 "HTMLCharacterReference.gperf" + {"setmn;", "∖"}, + {""}, + {""}, + {""}, + {""}, +#line 1630 "HTMLCharacterReference.gperf" + {"nwArr;", "⇖"}, + {""}, +#line 1874 "HTMLCharacterReference.gperf" + {"rtri;", "▹"}, +#line 1865 "HTMLCharacterReference.gperf" + {"rrarr;", "⇉"}, +#line 49 "HTMLCharacterReference.gperf" + {"Barv;", "⫧"}, +#line 886 "HTMLCharacterReference.gperf" + {"colon;", ":"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 550 "HTMLCharacterReference.gperf" + {"Tab;", "\t"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1300 "HTMLCharacterReference.gperf" + {"latail;", "⤙"}, + {""}, +#line 1806 "HTMLCharacterReference.gperf" + {"rbarr;", "⤍"}, +#line 1767 "HTMLCharacterReference.gperf" + {"qopf;", "𝕢"}, + {""}, + {""}, +#line 163 "HTMLCharacterReference.gperf" + {"Element;", "∈"}, + {""}, + {""}, + {""}, +#line 1970 "HTMLCharacterReference.gperf" + {"sstarf;", "⋆"}, + {""}, + {""}, + {""}, + {""}, +#line 996 "HTMLCharacterReference.gperf" + {"dstrok;", "đ"}, + {""}, + {""}, + {""}, + {""}, +#line 1196 "HTMLCharacterReference.gperf" + {"hstrok;", "ħ"}, + {""}, + {""}, +#line 834 "HTMLCharacterReference.gperf" + {"bull;", "•"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 332 "HTMLCharacterReference.gperf" + {"Mopf;", "𝕄"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1350 "HTMLCharacterReference.gperf" + {"lfloor;", "⌊"}, + {""}, +#line 339 "HTMLCharacterReference.gperf" + {"Ncy;", "Н"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2068 "HTMLCharacterReference.gperf" + {"topbot;", "⌶"}, + {""}, + {""}, + {""}, +#line 1030 "HTMLCharacterReference.gperf" + {"emacr;", "ē"}, +#line 1051 "HTMLCharacterReference.gperf" + {"eqslantless;", "⪕"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 291 "HTMLCharacterReference.gperf" + {"LeftTeeVector;", "⥚"}, +#line 804 "HTMLCharacterReference.gperf" + {"boxh;", "─"}, +#line 675 "HTMLCharacterReference.gperf" + {"amacr;", "ā"}, +#line 1836 "HTMLCharacterReference.gperf" + {"rho;", "ρ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1071 "HTMLCharacterReference.gperf" + {"exponentiale;", "ⅇ"}, + {""}, + {""}, +#line 581 "HTMLCharacterReference.gperf" + {"Umacr;", "Ū"}, + {""}, + {""}, +#line 2154 "HTMLCharacterReference.gperf" + {"varkappa;", "ϰ"}, + {""}, + {""}, +#line 1417 "HTMLCharacterReference.gperf" + {"ltlarr;", "⥶"}, + {""}, + {""}, + {""}, + {""}, +#line 1198 "HTMLCharacterReference.gperf" + {"hyphen;", "‐"}, + {""}, + {""}, + {""}, +#line 2228 "HTMLCharacterReference.gperf" + {"ycirc;", "ŷ"}, +#line 1283 "HTMLCharacterReference.gperf" + {"lambda;", "λ"}, + {""}, + {""}, + {""}, + {""}, +#line 941 "HTMLCharacterReference.gperf" + {"dagger;", "†"}, + {""}, + {""}, +#line 271 "HTMLCharacterReference.gperf" + {"Lang;", "⟪"}, +#line 460 "HTMLCharacterReference.gperf" + {"RBarr;", "⤐"}, + {""}, + {""}, +#line 833 "HTMLCharacterReference.gperf" + {"bsolhsub;", "⟈"}, +#line 224 "HTMLCharacterReference.gperf" + {"IOcy;", "Ё"}, +#line 2217 "HTMLCharacterReference.gperf" + {"xrArr;", "⟹"}, +#line 747 "HTMLCharacterReference.gperf" + {"bigcap;", "⋂"}, + {""}, +#line 30 "HTMLCharacterReference.gperf" + {"Acy;", "А"}, +#line 116 "HTMLCharacterReference.gperf" + {"Dopf;", "𝔻"}, + {""}, + {""}, + {""}, +#line 1856 "HTMLCharacterReference.gperf" + {"roarr;", "⇾"}, + {""}, + {""}, + {""}, + {""}, +#line 1319 "HTMLCharacterReference.gperf" + {"ldrushar;", "⥋"}, +#line 90 "HTMLCharacterReference.gperf" + {"Coproduct;", "∐"}, +#line 1346 "HTMLCharacterReference.gperf" + {"lesseqqgtr;", "⪋"}, +#line 1964 "HTMLCharacterReference.gperf" + {"squarf;", "▪"}, + {""}, + {""}, + {""}, +#line 558 "HTMLCharacterReference.gperf" + {"ThickSpace;", "  "}, +#line 1248 "HTMLCharacterReference.gperf" + {"isinsv;", "⋳"}, + {""}, +#line 448 "HTMLCharacterReference.gperf" + {"PrecedesTilde;", "≾"}, + {""}, + {""}, +#line 973 "HTMLCharacterReference.gperf" + {"divonx;", "⋇"}, + {""}, +#line 2130 "HTMLCharacterReference.gperf" + {"upsi;", "υ"}, +#line 2044 "HTMLCharacterReference.gperf" + {"tdot;", "⃛"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1522 "HTMLCharacterReference.gperf" + {"nhArr;", "⇎"}, +#line 845 "HTMLCharacterReference.gperf" + {"capcup;", "⩇"}, + {""}, + {""}, +#line 1170 "HTMLCharacterReference.gperf" + {"gvnE;", "≩︀"}, + {""}, +#line 915 "HTMLCharacterReference.gperf" + {"cularr;", "↶"}, + {""}, + {""}, +#line 1426 "HTMLCharacterReference.gperf" + {"lvnE;", "≨︀"}, + {""}, +#line 379 "HTMLCharacterReference.gperf" + {"NotPrecedes;", "⊀"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1233 "HTMLCharacterReference.gperf" + {"intercal;", "⊺"}, +#line 254 "HTMLCharacterReference.gperf" + {"Jopf;", "𝕁"}, + {""}, + {""}, + {""}, +#line 1428 "HTMLCharacterReference.gperf" + {"macr", "¯"}, +#line 1429 "HTMLCharacterReference.gperf" + {"macr;", "¯"}, + {""}, +#line 219 "HTMLCharacterReference.gperf" + {"Hstrok;", "Ħ"}, + {""}, +#line 1875 "HTMLCharacterReference.gperf" + {"rtrie;", "⊵"}, +#line 1518 "HTMLCharacterReference.gperf" + {"nges;", "⩾̸"}, + {""}, + {""}, +#line 461 "HTMLCharacterReference.gperf" + {"REG", "®"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 462 "HTMLCharacterReference.gperf" + {"REG;", "®"}, +#line 632 "HTMLCharacterReference.gperf" + {"Xopf;", "𝕏"}, + {""}, + {""}, + {""}, +#line 1672 "HTMLCharacterReference.gperf" + {"or;", "∨"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1810 "HTMLCharacterReference.gperf" + {"rbrke;", "⦌"}, +#line 1191 "HTMLCharacterReference.gperf" + {"hookrightarrow;", "↪"}, + {""}, +#line 2187 "HTMLCharacterReference.gperf" + {"vsupnE;", "⫌︀"}, + {""}, + {""}, + {""}, +#line 935 "HTMLCharacterReference.gperf" + {"cuwed;", "⋏"}, + {""}, +#line 1344 "HTMLCharacterReference.gperf" + {"lessdot;", "⋖"}, +#line 446 "HTMLCharacterReference.gperf" + {"PrecedesEqual;", "⪯"}, + {""}, + {""}, +#line 2152 "HTMLCharacterReference.gperf" + {"vangrt;", "⦜"}, + {""}, + {""}, + {""}, + {""}, +#line 1832 "HTMLCharacterReference.gperf" + {"rfr;", "𝔯"}, + {""}, + {""}, +#line 1684 "HTMLCharacterReference.gperf" + {"orv;", "⩛"}, + {""}, +#line 758 "HTMLCharacterReference.gperf" + {"bigvee;", "⋁"}, +#line 1505 "HTMLCharacterReference.gperf" + {"nearrow;", "↗"}, +#line 2142 "HTMLCharacterReference.gperf" + {"utri;", "▵"}, + {""}, + {""}, + {""}, +#line 2145 "HTMLCharacterReference.gperf" + {"uuml", "ü"}, +#line 2146 "HTMLCharacterReference.gperf" + {"uuml;", "ü"}, +#line 521 "HTMLCharacterReference.gperf" + {"Sopf;", "𝕊"}, +#line 1682 "HTMLCharacterReference.gperf" + {"oror;", "⩖"}, +#line 1566 "HTMLCharacterReference.gperf" + {"nprcue;", "⋠"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1780 "HTMLCharacterReference.gperf" + {"rHar;", "⥤"}, + {""}, + {""}, + {""}, +#line 874 "HTMLCharacterReference.gperf" + {"circlearrowright;", "↻"}, + {""}, + {""}, +#line 2089 "HTMLCharacterReference.gperf" + {"tscr;", "𝓉"}, + {""}, + {""}, + {""}, +#line 876 "HTMLCharacterReference.gperf" + {"circledS;", "Ⓢ"}, +#line 1391 "HTMLCharacterReference.gperf" + {"lozf;", "⧫"}, +#line 1669 "HTMLCharacterReference.gperf" + {"opar;", "⦷"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 324 "HTMLCharacterReference.gperf" + {"Lstrok;", "Ł"}, +#line 1773 "HTMLCharacterReference.gperf" + {"questeq;", "≟"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1026 "HTMLCharacterReference.gperf" + {"elinters;", "⏧"}, +#line 2212 "HTMLCharacterReference.gperf" + {"xnis;", "⋻"}, +#line 1379 "HTMLCharacterReference.gperf" + {"longmapsto;", "⟼"}, + {""}, + {""}, + {""}, +#line 2144 "HTMLCharacterReference.gperf" + {"uuarr;", "⇈"}, +#line 1218 "HTMLCharacterReference.gperf" + {"imacr;", "ī"}, + {""}, +#line 297 "HTMLCharacterReference.gperf" + {"LeftUpVector;", "↿"}, + {""}, +#line 244 "HTMLCharacterReference.gperf" + {"Iopf;", "𝕀"}, +#line 260 "HTMLCharacterReference.gperf" + {"Kappa;", "Κ"}, + {""}, + {""}, +#line 1122 "HTMLCharacterReference.gperf" + {"gel;", "⋛"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2014 "HTMLCharacterReference.gperf" + {"supe;", "⊇"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2206 "HTMLCharacterReference.gperf" + {"xhArr;", "⟺"}, +#line 1969 "HTMLCharacterReference.gperf" + {"ssmile;", "⌣"}, + {""}, + {""}, +#line 1547 "HTMLCharacterReference.gperf" + {"nopf;", "𝕟"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1673 "HTMLCharacterReference.gperf" + {"orarr;", "↻"}, + {""}, + {""}, +#line 1693 "HTMLCharacterReference.gperf" + {"ouml", "ö"}, +#line 1694 "HTMLCharacterReference.gperf" + {"ouml;", "ö"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1269 "HTMLCharacterReference.gperf" + {"khcy;", "х"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 519 "HTMLCharacterReference.gperf" + {"Sigma;", "Σ"}, + {""}, +#line 2085 "HTMLCharacterReference.gperf" + {"triplus;", "⨹"}, +#line 1468 "HTMLCharacterReference.gperf" + {"nGt;", "≫⃒"}, +#line 2227 "HTMLCharacterReference.gperf" + {"yacy;", "я"}, + {""}, +#line 2054 "HTMLCharacterReference.gperf" + {"thinsp;", " "}, + {""}, + {""}, +#line 653 "HTMLCharacterReference.gperf" + {"Zopf;", "ℤ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1070 "HTMLCharacterReference.gperf" + {"expectation;", "ℰ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1688 "HTMLCharacterReference.gperf" + {"osol;", "⊘"}, + {""}, + {""}, + {""}, +#line 986 "HTMLCharacterReference.gperf" + {"downarrow;", "↓"}, + {""}, + {""}, + {""}, +#line 554 "HTMLCharacterReference.gperf" + {"Tcy;", "Т"}, + {""}, +#line 947 "HTMLCharacterReference.gperf" + {"dblac;", "˝"}, +#line 1109 "HTMLCharacterReference.gperf" + {"frown;", "⌢"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 234 "HTMLCharacterReference.gperf" + {"Im;", "ℑ"}, +#line 1858 "HTMLCharacterReference.gperf" + {"ropar;", "⦆"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 270 "HTMLCharacterReference.gperf" + {"Lambda;", "Λ"}, + {""}, +#line 437 "HTMLCharacterReference.gperf" + {"Pcy;", "П"}, + {""}, +#line 1343 "HTMLCharacterReference.gperf" + {"lessapprox;", "⪅"}, + {""}, + {""}, +#line 1635 "HTMLCharacterReference.gperf" + {"oS;", "Ⓢ"}, + {""}, + {""}, + {""}, + {""}, +#line 2241 "HTMLCharacterReference.gperf" + {"zcy;", "з"}, +#line 1656 "HTMLCharacterReference.gperf" + {"ohm;", "Ω"}, +#line 820 "HTMLCharacterReference.gperf" + {"boxvh;", "┼"}, +#line 149 "HTMLCharacterReference.gperf" + {"Dstrok;", "Đ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2114 "HTMLCharacterReference.gperf" + {"uharr;", "↾"}, + {""}, +#line 2041 "HTMLCharacterReference.gperf" + {"tcaron;", "ť"}, + {""}, +#line 885 "HTMLCharacterReference.gperf" + {"clubsuit;", "♣"}, + {""}, + {""}, +#line 1306 "HTMLCharacterReference.gperf" + {"lbrack;", "["}, + {""}, + {""}, +#line 499 "HTMLCharacterReference.gperf" + {"Ropf;", "ℝ"}, + {""}, +#line 1458 "HTMLCharacterReference.gperf" + {"mnplus;", "∓"}, + {""}, +#line 2039 "HTMLCharacterReference.gperf" + {"tau;", "τ"}, + {""}, + {""}, +#line 577 "HTMLCharacterReference.gperf" + {"Udblac;", "Ű"}, + {""}, + {""}, +#line 1480 "HTMLCharacterReference.gperf" + {"nang;", "∠⃒"}, + {""}, + {""}, + {""}, + {""}, +#line 168 "HTMLCharacterReference.gperf" + {"Eopf;", "𝔼"}, +#line 2204 "HTMLCharacterReference.gperf" + {"xdtri;", "▽"}, + {""}, + {""}, +#line 61 "HTMLCharacterReference.gperf" + {"COPY", "©"}, +#line 62 "HTMLCharacterReference.gperf" + {"COPY;", "©"}, + {""}, +#line 835 "HTMLCharacterReference.gperf" + {"bullet;", "•"}, + {""}, + {""}, +#line 2214 "HTMLCharacterReference.gperf" + {"xopf;", "𝕩"}, + {""}, +#line 1268 "HTMLCharacterReference.gperf" + {"kgreen;", "ĸ"}, +#line 1823 "HTMLCharacterReference.gperf" + {"real;", "ℜ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1139 "HTMLCharacterReference.gperf" + {"glE;", "⪒"}, + {""}, + {""}, + {""}, + {""}, +#line 1916 "HTMLCharacterReference.gperf" + {"shortparallel;", "∥"}, +#line 200 "HTMLCharacterReference.gperf" + {"Gopf;", "𝔾"}, +#line 1502 "HTMLCharacterReference.gperf" + {"neArr;", "⇗"}, + {""}, +#line 2110 "HTMLCharacterReference.gperf" + {"ufr;", "𝔲"}, +#line 1467 "HTMLCharacterReference.gperf" + {"nGg;", "⋙̸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1285 "HTMLCharacterReference.gperf" + {"langd;", "⦑"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 744 "HTMLCharacterReference.gperf" + {"beth;", "ℶ"}, + {""}, + {""}, + {""}, +#line 2097 "HTMLCharacterReference.gperf" + {"uHar;", "⥣"}, + {""}, + {""}, +#line 101 "HTMLCharacterReference.gperf" + {"Dagger;", "‡"}, +#line 916 "HTMLCharacterReference.gperf" + {"cularrp;", "⤽"}, +#line 610 "HTMLCharacterReference.gperf" + {"Vcy;", "В"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2123 "HTMLCharacterReference.gperf" + {"uogon;", "ų"}, + {""}, + {""}, + {""}, + {""}, +#line 2113 "HTMLCharacterReference.gperf" + {"uharl;", "↿"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1447 "HTMLCharacterReference.gperf" + {"mid;", "∣"}, +#line 1896 "HTMLCharacterReference.gperf" + {"sdot;", "⋅"}, +#line 70 "HTMLCharacterReference.gperf" + {"Ccirc;", "Ĉ"}, + {""}, + {""}, + {""}, +#line 545 "HTMLCharacterReference.gperf" + {"THORN", "Þ"}, +#line 546 "HTMLCharacterReference.gperf" + {"THORN;", "Þ"}, +#line 1439 "HTMLCharacterReference.gperf" + {"mcomma;", "⨩"}, + {""}, + {""}, +#line 559 "HTMLCharacterReference.gperf" + {"ThinSpace;", " "}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 572 "HTMLCharacterReference.gperf" + {"Ubrcy;", "Ў"}, + {""}, + {""}, +#line 1650 "HTMLCharacterReference.gperf" + {"ofr;", "𝔬"}, + {""}, +#line 1551 "HTMLCharacterReference.gperf" + {"notinE;", "⋹̸"}, + {""}, + {""}, +#line 2090 "HTMLCharacterReference.gperf" + {"tscy;", "ц"}, + {""}, + {""}, + {""}, + {""}, +#line 464 "HTMLCharacterReference.gperf" + {"Rang;", "⟫"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1828 "HTMLCharacterReference.gperf" + {"reg", "®"}, + {""}, + {""}, + {""}, +#line 1457 "HTMLCharacterReference.gperf" + {"mldr;", "…"}, + {""}, +#line 1829 "HTMLCharacterReference.gperf" + {"reg;", "®"}, + {""}, + {""}, + {""}, +#line 88 "HTMLCharacterReference.gperf" + {"ContourIntegral;", "∮"}, + {""}, + {""}, + {""}, + {""}, +#line 2049 "HTMLCharacterReference.gperf" + {"theta;", "θ"}, + {""}, + {""}, +#line 1854 "HTMLCharacterReference.gperf" + {"rnmid;", "⫮"}, + {""}, +#line 800 "HTMLCharacterReference.gperf" + {"boxdL;", "╕"}, +#line 1670 "HTMLCharacterReference.gperf" + {"operp;", "⦹"}, +#line 697 "HTMLCharacterReference.gperf" + {"angrtvb;", "⊾"}, +#line 2229 "HTMLCharacterReference.gperf" + {"ycy;", "ы"}, + {""}, +#line 1478 "HTMLCharacterReference.gperf" + {"nabla;", "∇"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1963 "HTMLCharacterReference.gperf" + {"square;", "□"}, + {""}, + {""}, +#line 350 "HTMLCharacterReference.gperf" + {"Nopf;", "ℕ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1347 "HTMLCharacterReference.gperf" + {"lessgtr;", "≶"}, + {""}, + {""}, + {""}, +#line 1388 "HTMLCharacterReference.gperf" + {"lowbar;", "_"}, +#line 1390 "HTMLCharacterReference.gperf" + {"lozenge;", "◊"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1625 "HTMLCharacterReference.gperf" + {"nvlt;", "<⃒"}, + {""}, +#line 1286 "HTMLCharacterReference.gperf" + {"langle;", "⟨"}, + {""}, + {""}, +#line 2211 "HTMLCharacterReference.gperf" + {"xmap;", "⟼"}, + {""}, + {""}, +#line 1869 "HTMLCharacterReference.gperf" + {"rsqb;", "]"}, + {""}, +#line 1967 "HTMLCharacterReference.gperf" + {"sscr;", "𝓈"}, + {""}, +#line 938 "HTMLCharacterReference.gperf" + {"cylcty;", "⌭"}, + {""}, +#line 1377 "HTMLCharacterReference.gperf" + {"longleftarrow;", "⟵"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2140 "HTMLCharacterReference.gperf" + {"utdot;", "⋰"}, + {""}, +#line 887 "HTMLCharacterReference.gperf" + {"colone;", "≔"}, + {""}, + {""}, +#line 2083 "HTMLCharacterReference.gperf" + {"trie;", "≜"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 235 "HTMLCharacterReference.gperf" + {"Imacr;", "Ī"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 972 "HTMLCharacterReference.gperf" + {"divideontimes;", "⋇"}, + {""}, +#line 787 "HTMLCharacterReference.gperf" + {"boxHu;", "╧"}, + {""}, +#line 518 "HTMLCharacterReference.gperf" + {"ShortUpArrow;", "↑"}, + {""}, +#line 38 "HTMLCharacterReference.gperf" + {"Aopf;", "𝔸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1826 "HTMLCharacterReference.gperf" + {"reals;", "ℝ"}, + {""}, + {""}, +#line 1175 "HTMLCharacterReference.gperf" + {"hardcy;", "ъ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 429 "HTMLCharacterReference.gperf" + {"Otimes;", "⨷"}, +#line 1837 "HTMLCharacterReference.gperf" + {"rhov;", "ϱ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 796 "HTMLCharacterReference.gperf" + {"boxVh;", "╫"}, + {""}, +#line 292 "HTMLCharacterReference.gperf" + {"LeftTriangle;", "⊲"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1674 "HTMLCharacterReference.gperf" + {"ord;", "⩝"}, +#line 676 "HTMLCharacterReference.gperf" + {"amalg;", "⨿"}, + {""}, +#line 1882 "HTMLCharacterReference.gperf" + {"sc;", "≻"}, + {""}, +#line 1679 "HTMLCharacterReference.gperf" + {"ordm", "º"}, +#line 1680 "HTMLCharacterReference.gperf" + {"ordm;", "º"}, + {""}, + {""}, + {""}, + {""}, +#line 1898 "HTMLCharacterReference.gperf" + {"sdote;", "⩦"}, + {""}, + {""}, +#line 106 "HTMLCharacterReference.gperf" + {"Del;", "∇"}, + {""}, + {""}, + {""}, + {""}, +#line 1440 "HTMLCharacterReference.gperf" + {"mcy;", "м"}, + {""}, +#line 807 "HTMLCharacterReference.gperf" + {"boxhd;", "┬"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 147 "HTMLCharacterReference.gperf" + {"Downarrow;", "⇓"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1894 "HTMLCharacterReference.gperf" + {"scsim;", "≿"}, + {""}, + {""}, +#line 262 "HTMLCharacterReference.gperf" + {"Kcy;", "К"}, +#line 1994 "HTMLCharacterReference.gperf" + {"succ;", "≻"}, + {""}, + {""}, +#line 294 "HTMLCharacterReference.gperf" + {"LeftTriangleEqual;", "⊴"}, + {""}, + {""}, +#line 364 "HTMLCharacterReference.gperf" + {"NotGreaterSlantEqual;", "⩾̸"}, + {""}, + {""}, + {""}, + {""}, +#line 1812 "HTMLCharacterReference.gperf" + {"rbrkslu;", "⦐"}, +#line 210 "HTMLCharacterReference.gperf" + {"HARDcy;", "Ъ"}, + {""}, +#line 1833 "HTMLCharacterReference.gperf" + {"rhard;", "⇁"}, + {""}, + {""}, +#line 738 "HTMLCharacterReference.gperf" + {"becaus;", "∵"}, + {""}, +#line 1976 "HTMLCharacterReference.gperf" + {"sub;", "⊂"}, + {""}, + {""}, +#line 2132 "HTMLCharacterReference.gperf" + {"upsilon;", "υ"}, + {""}, + {""}, + {""}, +#line 92 "HTMLCharacterReference.gperf" + {"Cross;", "⨯"}, + {""}, + {""}, +#line 2250 "HTMLCharacterReference.gperf" + {"zwj;", "‍"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1363 "HTMLCharacterReference.gperf" + {"lltri;", "◺"}, + {""}, + {""}, + {""}, +#line 361 "HTMLCharacterReference.gperf" + {"NotGreaterFullEqual;", "≧̸"}, + {""}, +#line 1885 "HTMLCharacterReference.gperf" + {"scaron;", "š"}, + {""}, +#line 875 "HTMLCharacterReference.gperf" + {"circledR;", "®"}, +#line 1884 "HTMLCharacterReference.gperf" + {"scap;", "⪸"}, +#line 626 "HTMLCharacterReference.gperf" + {"Wedge;", "⋀"}, + {""}, + {""}, +#line 757 "HTMLCharacterReference.gperf" + {"biguplus;", "⨄"}, + {""}, + {""}, +#line 2185 "HTMLCharacterReference.gperf" + {"vsubnE;", "⫋︀"}, + {""}, +#line 1683 "HTMLCharacterReference.gperf" + {"orslope;", "⩗"}, + {""}, +#line 164 "HTMLCharacterReference.gperf" + {"Emacr;", "Ē"}, + {""}, + {""}, +#line 239 "HTMLCharacterReference.gperf" + {"Integral;", "∫"}, +#line 48 "HTMLCharacterReference.gperf" + {"Backslash;", "∖"}, + {""}, + {""}, + {""}, + {""}, +#line 126 "HTMLCharacterReference.gperf" + {"DoubleLongLeftArrow;", "⟸"}, + {""}, + {""}, + {""}, +#line 1472 "HTMLCharacterReference.gperf" + {"nLl;", "⋘̸"}, +#line 127 "HTMLCharacterReference.gperf" + {"DoubleLongLeftRightArrow;", "⟺"}, +#line 123 "HTMLCharacterReference.gperf" + {"DoubleLeftArrow;", "⇐"}, + {""}, + {""}, +#line 1125 "HTMLCharacterReference.gperf" + {"geqslant;", "⩾"}, + {""}, + {""}, +#line 1113 "HTMLCharacterReference.gperf" + {"gacute;", "ǵ"}, + {""}, +#line 1335 "HTMLCharacterReference.gperf" + {"leqslant;", "⩽"}, +#line 628 "HTMLCharacterReference.gperf" + {"Wopf;", "𝕎"}, + {""}, +#line 1280 "HTMLCharacterReference.gperf" + {"lacute;", "ĺ"}, + {""}, + {""}, + {""}, +#line 1007 "HTMLCharacterReference.gperf" + {"eacute", "é"}, +#line 1008 "HTMLCharacterReference.gperf" + {"eacute;", "é"}, + {""}, + {""}, + {""}, + {""}, +#line 840 "HTMLCharacterReference.gperf" + {"cacute;", "ć"}, + {""}, + {""}, + {""}, +#line 655 "HTMLCharacterReference.gperf" + {"aacute", "á"}, +#line 656 "HTMLCharacterReference.gperf" + {"aacute;", "á"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 568 "HTMLCharacterReference.gperf" + {"Uacute", "Ú"}, +#line 569 "HTMLCharacterReference.gperf" + {"Uacute;", "Ú"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 424 "HTMLCharacterReference.gperf" + {"Oscr;", "𝒪"}, + {""}, + {""}, + {""}, +#line 936 "HTMLCharacterReference.gperf" + {"cwconint;", "∲"}, + {""}, + {""}, + {""}, + {""}, +#line 722 "HTMLCharacterReference.gperf" + {"awconint;", "∳"}, +#line 345 "HTMLCharacterReference.gperf" + {"NestedLessLess;", "≪"}, +#line 769 "HTMLCharacterReference.gperf" + {"blk14;", "░"}, + {""}, + {""}, +#line 398 "HTMLCharacterReference.gperf" + {"NotTilde;", "≁"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 770 "HTMLCharacterReference.gperf" + {"blk34;", "▓"}, + {""}, + {""}, + {""}, +#line 1654 "HTMLCharacterReference.gperf" + {"ogt;", "⧁"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2178 "HTMLCharacterReference.gperf" + {"vltri;", "⊲"}, + {""}, + {""}, + {""}, +#line 564 "HTMLCharacterReference.gperf" + {"Topf;", "𝕋"}, +#line 1950 "HTMLCharacterReference.gperf" + {"sqcap;", "⊓"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1736 "HTMLCharacterReference.gperf" + {"pr;", "≺"}, + {""}, + {""}, + {""}, + {""}, +#line 316 "HTMLCharacterReference.gperf" + {"Longleftarrow;", "⟸"}, + {""}, +#line 768 "HTMLCharacterReference.gperf" + {"blk12;", "▒"}, +#line 1029 "HTMLCharacterReference.gperf" + {"elsdot;", "⪗"}, + {""}, + {""}, +#line 1320 "HTMLCharacterReference.gperf" + {"ldsh;", "↲"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 888 "HTMLCharacterReference.gperf" + {"coloneq;", "≔"}, + {""}, +#line 443 "HTMLCharacterReference.gperf" + {"Popf;", "ℙ"}, + {""}, + {""}, + {""}, + {""}, +#line 1469 "HTMLCharacterReference.gperf" + {"nGtv;", "≫̸"}, + {""}, + {""}, +#line 911 "HTMLCharacterReference.gperf" + {"cudarrl;", "⤸"}, + {""}, +#line 2248 "HTMLCharacterReference.gperf" + {"zopf;", "𝕫"}, + {""}, +#line 2224 "HTMLCharacterReference.gperf" + {"xwedge;", "⋀"}, + {""}, + {""}, +#line 1873 "HTMLCharacterReference.gperf" + {"rtimes;", "⋊"}, +#line 1651 "HTMLCharacterReference.gperf" + {"ogon;", "˛"}, +#line 1760 "HTMLCharacterReference.gperf" + {"prsim;", "≾"}, + {""}, + {""}, +#line 1270 "HTMLCharacterReference.gperf" + {"kjcy;", "ќ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1951 "HTMLCharacterReference.gperf" + {"sqcaps;", "⊓︀"}, + {""}, +#line 1887 "HTMLCharacterReference.gperf" + {"sce;", "⪰"}, +#line 86 "HTMLCharacterReference.gperf" + {"Congruent;", "≡"}, +#line 1427 "HTMLCharacterReference.gperf" + {"mDDot;", "∺"}, + {""}, +#line 761 "HTMLCharacterReference.gperf" + {"blacklozenge;", "⧫"}, +#line 1855 "HTMLCharacterReference.gperf" + {"roang;", "⟭"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1076 "HTMLCharacterReference.gperf" + {"fflig;", "ff"}, + {""}, +#line 1555 "HTMLCharacterReference.gperf" + {"notinvc;", "⋶"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1863 "HTMLCharacterReference.gperf" + {"rpargt;", "⦔"}, +#line 1758 "HTMLCharacterReference.gperf" + {"prop;", "∝"}, + {""}, + {""}, +#line 51 "HTMLCharacterReference.gperf" + {"Bcy;", "Б"}, + {""}, +#line 1881 "HTMLCharacterReference.gperf" + {"sbquo;", "‚"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1738 "HTMLCharacterReference.gperf" + {"prap;", "⪷"}, + {""}, + {""}, + {""}, +#line 634 "HTMLCharacterReference.gperf" + {"YAcy;", "Я"}, + {""}, +#line 2160 "HTMLCharacterReference.gperf" + {"varrho;", "ϱ"}, + {""}, +#line 1903 "HTMLCharacterReference.gperf" + {"sect", "§"}, +#line 1904 "HTMLCharacterReference.gperf" + {"sect;", "§"}, + {""}, + {""}, +#line 145 "HTMLCharacterReference.gperf" + {"DownTee;", "⊤"}, + {""}, +#line 1914 "HTMLCharacterReference.gperf" + {"shcy;", "ш"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1628 "HTMLCharacterReference.gperf" + {"nvrtrie;", "⊵⃒"}, +#line 538 "HTMLCharacterReference.gperf" + {"SucceedsTilde;", "≿"}, + {""}, +#line 1897 "HTMLCharacterReference.gperf" + {"sdotb;", "⊡"}, +#line 1942 "HTMLCharacterReference.gperf" + {"softcy;", "ь"}, + {""}, + {""}, + {""}, +#line 35 "HTMLCharacterReference.gperf" + {"Amacr;", "Ā"}, +#line 533 "HTMLCharacterReference.gperf" + {"Subset;", "⋐"}, + {""}, + {""}, +#line 622 "HTMLCharacterReference.gperf" + {"Vopf;", "𝕍"}, +#line 1861 "HTMLCharacterReference.gperf" + {"rotimes;", "⨵"}, + {""}, + {""}, + {""}, + {""}, +#line 1811 "HTMLCharacterReference.gperf" + {"rbrksld;", "⦎"}, +#line 1997 "HTMLCharacterReference.gperf" + {"succeq;", "⪰"}, +#line 2018 "HTMLCharacterReference.gperf" + {"suplarr;", "⥻"}, +#line 442 "HTMLCharacterReference.gperf" + {"Poincareplane;", "ℌ"}, +#line 944 "HTMLCharacterReference.gperf" + {"dash;", "‐"}, +#line 1199 "HTMLCharacterReference.gperf" + {"iacute", "í"}, +#line 1200 "HTMLCharacterReference.gperf" + {"iacute;", "í"}, + {""}, + {""}, + {""}, +#line 945 "HTMLCharacterReference.gperf" + {"dashv;", "⊣"}, + {""}, + {""}, +#line 734 "HTMLCharacterReference.gperf" + {"bbrktbrk;", "⎶"}, + {""}, +#line 1983 "HTMLCharacterReference.gperf" + {"subne;", "⊊"}, +#line 882 "HTMLCharacterReference.gperf" + {"cirmid;", "⫯"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1759 "HTMLCharacterReference.gperf" + {"propto;", "∝"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1531 "HTMLCharacterReference.gperf" + {"nlE;", "≦̸"}, + {""}, +#line 933 "HTMLCharacterReference.gperf" + {"curvearrowright;", "↷"}, + {""}, + {""}, + {""}, + {""}, +#line 637 "HTMLCharacterReference.gperf" + {"Yacute", "Ý"}, +#line 638 "HTMLCharacterReference.gperf" + {"Yacute;", "Ý"}, + {""}, +#line 1608 "HTMLCharacterReference.gperf" + {"ntriangleleft;", "⋪"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1958 "HTMLCharacterReference.gperf" + {"sqsup;", "⊐"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 536 "HTMLCharacterReference.gperf" + {"SucceedsEqual;", "⪰"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 269 "HTMLCharacterReference.gperf" + {"Lacute;", "Ĺ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2234 "HTMLCharacterReference.gperf" + {"yopf;", "𝕪"}, + {""}, +#line 1160 "HTMLCharacterReference.gperf" + {"gtlPar;", "⦕"}, +#line 1917 "HTMLCharacterReference.gperf" + {"shy", "­"}, + {""}, + {""}, +#line 1263 "HTMLCharacterReference.gperf" + {"kappa;", "κ"}, + {""}, +#line 1867 "HTMLCharacterReference.gperf" + {"rscr;", "𝓇"}, +#line 1918 "HTMLCharacterReference.gperf" + {"shy;", "­"}, +#line 1327 "HTMLCharacterReference.gperf" + {"leftrightarrow;", "↔"}, +#line 1328 "HTMLCharacterReference.gperf" + {"leftrightarrows;", "⇆"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 666 "HTMLCharacterReference.gperf" + {"aelig", "æ"}, +#line 667 "HTMLCharacterReference.gperf" + {"aelig;", "æ"}, + {""}, + {""}, + {""}, +#line 1808 "HTMLCharacterReference.gperf" + {"rbrace;", "}"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1168 "HTMLCharacterReference.gperf" + {"gtrsim;", "≳"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 977 "HTMLCharacterReference.gperf" + {"dollar;", "$"}, + {""}, +#line 1804 "HTMLCharacterReference.gperf" + {"ratio;", "∶"}, +#line 1740 "HTMLCharacterReference.gperf" + {"pre;", "⪯"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 473 "HTMLCharacterReference.gperf" + {"ReverseUpEquilibrium;", "⥯"}, + {""}, +#line 1554 "HTMLCharacterReference.gperf" + {"notinvb;", "⋷"}, + {""}, +#line 222 "HTMLCharacterReference.gperf" + {"IEcy;", "Е"}, +#line 1982 "HTMLCharacterReference.gperf" + {"subnE;", "⫋"}, +#line 567 "HTMLCharacterReference.gperf" + {"Tstrok;", "Ŧ"}, + {""}, + {""}, +#line 1445 "HTMLCharacterReference.gperf" + {"micro", "µ"}, +#line 1446 "HTMLCharacterReference.gperf" + {"micro;", "µ"}, + {""}, + {""}, + {""}, +#line 1610 "HTMLCharacterReference.gperf" + {"ntriangleright;", "⋫"}, + {""}, +#line 1611 "HTMLCharacterReference.gperf" + {"ntrianglerighteq;", "⋭"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1891 "HTMLCharacterReference.gperf" + {"scnap;", "⪺"}, + {""}, +#line 1791 "HTMLCharacterReference.gperf" + {"rarr;", "→"}, + {""}, + {""}, + {""}, +#line 1448 "HTMLCharacterReference.gperf" + {"midast;", "*"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1456 "HTMLCharacterReference.gperf" + {"mlcp;", "⫛"}, + {""}, +#line 1195 "HTMLCharacterReference.gperf" + {"hslash;", "ℏ"}, + {""}, + {""}, + {""}, +#line 368 "HTMLCharacterReference.gperf" + {"NotLeftTriangle;", "⋪"}, + {""}, + {""}, +#line 369 "HTMLCharacterReference.gperf" + {"NotLeftTriangleBar;", "⧏̸"}, + {""}, +#line 370 "HTMLCharacterReference.gperf" + {"NotLeftTriangleEqual;", "⋬"}, +#line 1362 "HTMLCharacterReference.gperf" + {"llhard;", "⥫"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 300 "HTMLCharacterReference.gperf" + {"LeftVectorBar;", "⥒"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1801 "HTMLCharacterReference.gperf" + {"rarrtl;", "↣"}, +#line 415 "HTMLCharacterReference.gperf" + {"Ograve", "Ò"}, +#line 416 "HTMLCharacterReference.gperf" + {"Ograve;", "Ò"}, +#line 1329 "HTMLCharacterReference.gperf" + {"leftrightharpoons;", "⇋"}, + {""}, +#line 1709 "HTMLCharacterReference.gperf" + {"pfr;", "𝔭"}, +#line 877 "HTMLCharacterReference.gperf" + {"circledast;", "⊛"}, +#line 1616 "HTMLCharacterReference.gperf" + {"nvDash;", "⊭"}, + {""}, + {""}, +#line 1460 "HTMLCharacterReference.gperf" + {"mopf;", "𝕞"}, + {""}, +#line 328 "HTMLCharacterReference.gperf" + {"MediumSpace;", " "}, +#line 1691 "HTMLCharacterReference.gperf" + {"otimes;", "⊗"}, + {""}, +#line 1527 "HTMLCharacterReference.gperf" + {"nisd;", "⋺"}, + {""}, + {""}, +#line 1984 "HTMLCharacterReference.gperf" + {"subplus;", "⪿"}, +#line 1323 "HTMLCharacterReference.gperf" + {"leftarrowtail;", "↢"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1807 "HTMLCharacterReference.gperf" + {"rbbrk;", "❳"}, +#line 264 "HTMLCharacterReference.gperf" + {"Kopf;", "𝕂"}, + {""}, + {""}, + {""}, + {""}, +#line 1813 "HTMLCharacterReference.gperf" + {"rcaron;", "ř"}, +#line 171 "HTMLCharacterReference.gperf" + {"EqualTilde;", "≂"}, + {""}, +#line 739 "HTMLCharacterReference.gperf" + {"because;", "∵"}, + {""}, + {""}, +#line 1609 "HTMLCharacterReference.gperf" + {"ntrianglelefteq;", "⋬"}, + {""}, + {""}, + {""}, + {""}, +#line 2136 "HTMLCharacterReference.gperf" + {"urcrop;", "⌎"}, +#line 1911 "HTMLCharacterReference.gperf" + {"sfrown;", "⌢"}, + {""}, + {""}, + {""}, +#line 1166 "HTMLCharacterReference.gperf" + {"gtreqqless;", "⪌"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 381 "HTMLCharacterReference.gperf" + {"NotPrecedesSlantEqual;", "⋠"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2015 "HTMLCharacterReference.gperf" + {"supedot;", "⫄"}, + {""}, +#line 1799 "HTMLCharacterReference.gperf" + {"rarrpl;", "⥅"}, +#line 2134 "HTMLCharacterReference.gperf" + {"urcorn;", "⌝"}, + {""}, + {""}, + {""}, +#line 2233 "HTMLCharacterReference.gperf" + {"yicy;", "ї"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1646 "HTMLCharacterReference.gperf" + {"odot;", "⊙"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2065 "HTMLCharacterReference.gperf" + {"tint;", "∭"}, +#line 1741 "HTMLCharacterReference.gperf" + {"prec;", "≺"}, + {""}, + {""}, + {""}, +#line 2106 "HTMLCharacterReference.gperf" + {"udarr;", "⇅"}, + {""}, +#line 1578 "HTMLCharacterReference.gperf" + {"nsccue;", "⋡"}, +#line 1297 "HTMLCharacterReference.gperf" + {"larrsim;", "⥳"}, + {""}, +#line 1798 "HTMLCharacterReference.gperf" + {"rarrlp;", "↬"}, + {""}, + {""}, +#line 926 "HTMLCharacterReference.gperf" + {"curlyeqprec;", "⋞"}, + {""}, + {""}, + {""}, +#line 508 "HTMLCharacterReference.gperf" + {"Sacute;", "Ś"}, +#line 1761 "HTMLCharacterReference.gperf" + {"prurel;", "⊰"}, +#line 2139 "HTMLCharacterReference.gperf" + {"uscr;", "𝓊"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 348 "HTMLCharacterReference.gperf" + {"NoBreak;", "⁠"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1544 "HTMLCharacterReference.gperf" + {"nltri;", "⋪"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1960 "HTMLCharacterReference.gperf" + {"sqsupset;", "⊐"}, +#line 1692 "HTMLCharacterReference.gperf" + {"otimesas;", "⨶"}, +#line 1961 "HTMLCharacterReference.gperf" + {"sqsupseteq;", "⊒"}, +#line 1713 "HTMLCharacterReference.gperf" + {"phone;", "☎"}, + {""}, +#line 1857 "HTMLCharacterReference.gperf" + {"robrk;", "⟧"}, + {""}, +#line 1638 "HTMLCharacterReference.gperf" + {"oast;", "⊛"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1752 "HTMLCharacterReference.gperf" + {"prnap;", "⪹"}, + {""}, + {""}, + {""}, +#line 225 "HTMLCharacterReference.gperf" + {"Iacute", "Í"}, +#line 226 "HTMLCharacterReference.gperf" + {"Iacute;", "Í"}, + {""}, + {""}, +#line 302 "HTMLCharacterReference.gperf" + {"Leftrightarrow;", "⇔"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 749 "HTMLCharacterReference.gperf" + {"bigcup;", "⋃"}, + {""}, +#line 1130 "HTMLCharacterReference.gperf" + {"gesdotol;", "⪄"}, + {""}, +#line 103 "HTMLCharacterReference.gperf" + {"Dashv;", "⫤"}, + {""}, +#line 2247 "HTMLCharacterReference.gperf" + {"zigrarr;", "⇝"}, + {""}, +#line 1624 "HTMLCharacterReference.gperf" + {"nvle;", "≤⃒"}, +#line 2074 "HTMLCharacterReference.gperf" + {"trade;", "™"}, +#line 1479 "HTMLCharacterReference.gperf" + {"nacute;", "ń"}, + {""}, +#line 698 "HTMLCharacterReference.gperf" + {"angrtvbd;", "⦝"}, + {""}, + {""}, +#line 726 "HTMLCharacterReference.gperf" + {"backepsilon;", "϶"}, + {""}, +#line 125 "HTMLCharacterReference.gperf" + {"DoubleLeftTee;", "⫤"}, + {""}, + {""}, + {""}, + {""}, +#line 2100 "HTMLCharacterReference.gperf" + {"uarr;", "↑"}, + {""}, +#line 1685 "HTMLCharacterReference.gperf" + {"oscr;", "ℴ"}, + {""}, +#line 927 "HTMLCharacterReference.gperf" + {"curlyeqsucc;", "⋟"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1795 "HTMLCharacterReference.gperf" + {"rarrc;", "⤳"}, +#line 373 "HTMLCharacterReference.gperf" + {"NotLessGreater;", "≸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 879 "HTMLCharacterReference.gperf" + {"circleddash;", "⊝"}, + {""}, + {""}, + {""}, + {""}, +#line 646 "HTMLCharacterReference.gperf" + {"Zacute;", "Ź"}, + {""}, + {""}, + {""}, +#line 956 "HTMLCharacterReference.gperf" + {"delta;", "δ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 296 "HTMLCharacterReference.gperf" + {"LeftUpTeeVector;", "⥠"}, + {""}, + {""}, + {""}, + {""}, +#line 1954 "HTMLCharacterReference.gperf" + {"sqsub;", "⊏"}, + {""}, + {""}, + {""}, + {""}, +#line 298 "HTMLCharacterReference.gperf" + {"LeftUpVectorBar;", "⥘"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 320 "HTMLCharacterReference.gperf" + {"LowerLeftArrow;", "↙"}, + {""}, + {""}, + {""}, +#line 1999 "HTMLCharacterReference.gperf" + {"succneqq;", "⪶"}, +#line 587 "HTMLCharacterReference.gperf" + {"UnionPlus;", "⊎"}, + {""}, + {""}, +#line 1827 "HTMLCharacterReference.gperf" + {"rect;", "▭"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 121 "HTMLCharacterReference.gperf" + {"DoubleDot;", "¨"}, +#line 1224 "HTMLCharacterReference.gperf" + {"imped;", "Ƶ"}, + {""}, +#line 1177 "HTMLCharacterReference.gperf" + {"harrcir;", "⥈"}, + {""}, + {""}, + {""}, + {""}, +#line 1764 "HTMLCharacterReference.gperf" + {"puncsp;", " "}, + {""}, +#line 56 "HTMLCharacterReference.gperf" + {"Bopf;", "𝔹"}, + {""}, + {""}, +#line 951 "HTMLCharacterReference.gperf" + {"ddagger;", "‡"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 266 "HTMLCharacterReference.gperf" + {"LJcy;", "Љ"}, + {""}, + {""}, + {""}, + {""}, +#line 89 "HTMLCharacterReference.gperf" + {"Copf;", "ℂ"}, +#line 1080 "HTMLCharacterReference.gperf" + {"fjlig;", "fj"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1186 "HTMLCharacterReference.gperf" + {"hksearow;", "⤥"}, +#line 353 "HTMLCharacterReference.gperf" + {"NotCupCap;", "≭"}, + {""}, +#line 463 "HTMLCharacterReference.gperf" + {"Racute;", "Ŕ"}, + {""}, + {""}, + {""}, +#line 1707 "HTMLCharacterReference.gperf" + {"perp;", "⊥"}, + {""}, + {""}, +#line 1870 "HTMLCharacterReference.gperf" + {"rsquo;", "’"}, +#line 1871 "HTMLCharacterReference.gperf" + {"rsquor;", "’"}, + {""}, + {""}, + {""}, + {""}, +#line 287 "HTMLCharacterReference.gperf" + {"LeftRightArrow;", "↔"}, +#line 153 "HTMLCharacterReference.gperf" + {"Eacute", "É"}, +#line 154 "HTMLCharacterReference.gperf" + {"Eacute;", "É"}, + {""}, + {""}, + {""}, + {""}, +#line 1675 "HTMLCharacterReference.gperf" + {"order;", "ℴ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1979 "HTMLCharacterReference.gperf" + {"sube;", "⊆"}, + {""}, +#line 1434 "HTMLCharacterReference.gperf" + {"mapsto;", "↦"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2135 "HTMLCharacterReference.gperf" + {"urcorner;", "⌝"}, +#line 1266 "HTMLCharacterReference.gperf" + {"kcy;", "к"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1043 "HTMLCharacterReference.gperf" + {"eplus;", "⩱"}, + {""}, + {""}, +#line 1922 "HTMLCharacterReference.gperf" + {"sim;", "∼"}, + {""}, +#line 741 "HTMLCharacterReference.gperf" + {"bepsi;", "϶"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2069 "HTMLCharacterReference.gperf" + {"topcir;", "⫱"}, + {""}, +#line 1378 "HTMLCharacterReference.gperf" + {"longleftrightarrow;", "⟷"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 701 "HTMLCharacterReference.gperf" + {"angzarr;", "⍼"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 946 "HTMLCharacterReference.gperf" + {"dbkarow;", "⤏"}, +#line 1815 "HTMLCharacterReference.gperf" + {"rceil;", "⌉"}, + {""}, + {""}, +#line 1476 "HTMLCharacterReference.gperf" + {"nVDash;", "⊯"}, + {""}, + {""}, + {""}, +#line 1972 "HTMLCharacterReference.gperf" + {"starf;", "★"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2147 "HTMLCharacterReference.gperf" + {"uwangle;", "⦧"}, + {""}, + {""}, + {""}, +#line 784 "HTMLCharacterReference.gperf" + {"boxHD;", "╦"}, + {""}, +#line 1744 "HTMLCharacterReference.gperf" + {"preceq;", "⪯"}, + {""}, +#line 1928 "HTMLCharacterReference.gperf" + {"siml;", "⪝"}, + {""}, +#line 1649 "HTMLCharacterReference.gperf" + {"ofcir;", "⦿"}, + {""}, +#line 447 "HTMLCharacterReference.gperf" + {"PrecedesSlantEqual;", "≼"}, +#line 81 "HTMLCharacterReference.gperf" + {"ClockwiseContourIntegral;", "∲"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 97 "HTMLCharacterReference.gperf" + {"DDotrahd;", "⤑"}, +#line 1482 "HTMLCharacterReference.gperf" + {"napE;", "⩰̸"}, +#line 1217 "HTMLCharacterReference.gperf" + {"ijlig;", "ij"}, + {""}, + {""}, + {""}, +#line 1581 "HTMLCharacterReference.gperf" + {"nshortmid;", "∤"}, + {""}, +#line 878 "HTMLCharacterReference.gperf" + {"circledcirc;", "⊚"}, + {""}, + {""}, + {""}, +#line 806 "HTMLCharacterReference.gperf" + {"boxhU;", "╨"}, + {""}, + {""}, + {""}, +#line 98 "HTMLCharacterReference.gperf" + {"DJcy;", "Ђ"}, +#line 122 "HTMLCharacterReference.gperf" + {"DoubleDownArrow;", "⇓"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2153 "HTMLCharacterReference.gperf" + {"varepsilon;", "ϵ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2168 "HTMLCharacterReference.gperf" + {"vartriangleright;", "⊳"}, + {""}, +#line 1793 "HTMLCharacterReference.gperf" + {"rarrb;", "⇥"}, + {""}, +#line 1933 "HTMLCharacterReference.gperf" + {"slarr;", "←"}, + {""}, +#line 1055 "HTMLCharacterReference.gperf" + {"equivDD;", "⩸"}, + {""}, +#line 2115 "HTMLCharacterReference.gperf" + {"uhblk;", "▀"}, +#line 2102 "HTMLCharacterReference.gperf" + {"ubreve;", "ŭ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1647 "HTMLCharacterReference.gperf" + {"odsold;", "⦼"}, + {""}, + {""}, + {""}, +#line 336 "HTMLCharacterReference.gperf" + {"Nacute;", "Ń"}, + {""}, +#line 1915 "HTMLCharacterReference.gperf" + {"shortmid;", "∣"}, + {""}, + {""}, + {""}, + {""}, +#line 1141 "HTMLCharacterReference.gperf" + {"glj;", "⪤"}, +#line 204 "HTMLCharacterReference.gperf" + {"GreaterGreater;", "⪢"}, +#line 547 "HTMLCharacterReference.gperf" + {"TRADE;", "™"}, +#line 1959 "HTMLCharacterReference.gperf" + {"sqsupe;", "⊒"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2111 "HTMLCharacterReference.gperf" + {"ugrave", "ù"}, +#line 2112 "HTMLCharacterReference.gperf" + {"ugrave;", "ù"}, + {""}, + {""}, + {""}, + {""}, +#line 2055 "HTMLCharacterReference.gperf" + {"thkap;", "≈"}, + {""}, + {""}, + {""}, +#line 1540 "HTMLCharacterReference.gperf" + {"nles;", "⩽̸"}, +#line 427 "HTMLCharacterReference.gperf" + {"Otilde", "Õ"}, +#line 428 "HTMLCharacterReference.gperf" + {"Otilde;", "Õ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1705 "HTMLCharacterReference.gperf" + {"period;", "."}, + {""}, +#line 2133 "HTMLCharacterReference.gperf" + {"upuparrows;", "⇈"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 25 "HTMLCharacterReference.gperf" + {"Aacute", "Á"}, +#line 26 "HTMLCharacterReference.gperf" + {"Aacute;", "Á"}, + {""}, +#line 1956 "HTMLCharacterReference.gperf" + {"sqsubset;", "⊏"}, +#line 2003 "HTMLCharacterReference.gperf" + {"sung;", "♪"}, +#line 1957 "HTMLCharacterReference.gperf" + {"sqsubseteq;", "⊑"}, + {""}, + {""}, + {""}, +#line 1330 "HTMLCharacterReference.gperf" + {"leftrightsquigarrow;", "↭"}, +#line 1889 "HTMLCharacterReference.gperf" + {"scirc;", "ŝ"}, +#line 929 "HTMLCharacterReference.gperf" + {"curlywedge;", "⋏"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1365 "HTMLCharacterReference.gperf" + {"lmoust;", "⎰"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1437 "HTMLCharacterReference.gperf" + {"mapstoup;", "↥"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1763 "HTMLCharacterReference.gperf" + {"psi;", "ψ"}, +#line 84 "HTMLCharacterReference.gperf" + {"Colon;", "∷"}, +#line 303 "HTMLCharacterReference.gperf" + {"LessEqualGreater;", "⋚"}, + {""}, + {""}, + {""}, + {""}, +#line 1652 "HTMLCharacterReference.gperf" + {"ograve", "ò"}, +#line 1653 "HTMLCharacterReference.gperf" + {"ograve;", "ò"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 365 "HTMLCharacterReference.gperf" + {"NotGreaterTilde;", "≵"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 778 "HTMLCharacterReference.gperf" + {"bowtie;", "⋈"}, + {""}, +#line 1783 "HTMLCharacterReference.gperf" + {"radic;", "√"}, + {""}, +#line 107 "HTMLCharacterReference.gperf" + {"Delta;", "Δ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1275 "HTMLCharacterReference.gperf" + {"lAtail;", "⤛"}, + {""}, + {""}, +#line 2137 "HTMLCharacterReference.gperf" + {"uring;", "ů"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1947 "HTMLCharacterReference.gperf" + {"spades;", "♠"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1930 "HTMLCharacterReference.gperf" + {"simne;", "≆"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 317 "HTMLCharacterReference.gperf" + {"Longleftrightarrow;", "⟺"}, + {""}, + {""}, +#line 1075 "HTMLCharacterReference.gperf" + {"ffilig;", "ffi"}, + {""}, + {""}, + {""}, + {""}, +#line 2052 "HTMLCharacterReference.gperf" + {"thickapprox;", "≈"}, + {""}, +#line 2043 "HTMLCharacterReference.gperf" + {"tcy;", "т"}, + {""}, +#line 293 "HTMLCharacterReference.gperf" + {"LeftTriangleBar;", "⧏"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2012 "HTMLCharacterReference.gperf" + {"supdot;", "⪾"}, + {""}, + {""}, +#line 1710 "HTMLCharacterReference.gperf" + {"phi;", "φ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 396 "HTMLCharacterReference.gperf" + {"NotSuperset;", "⊃⃒"}, + {""}, + {""}, + {""}, + {""}, +#line 1749 "HTMLCharacterReference.gperf" + {"prime;", "′"}, +#line 342 "HTMLCharacterReference.gperf" + {"NegativeThinSpace;", "​"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1452 "HTMLCharacterReference.gperf" + {"minus;", "−"}, + {""}, + {""}, + {""}, + {""}, +#line 1995 "HTMLCharacterReference.gperf" + {"succapprox;", "⪸"}, + {""}, + {""}, +#line 753 "HTMLCharacterReference.gperf" + {"bigsqcup;", "⨆"}, + {""}, + {""}, + {""}, +#line 1704 "HTMLCharacterReference.gperf" + {"percnt;", "%"}, + {""}, + {""}, +#line 1929 "HTMLCharacterReference.gperf" + {"simlE;", "⪟"}, + {""}, + {""}, + {""}, + {""}, +#line 1069 "HTMLCharacterReference.gperf" + {"exist;", "∃"}, + {""}, + {""}, + {""}, +#line 74 "HTMLCharacterReference.gperf" + {"CenterDot;", "·"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 548 "HTMLCharacterReference.gperf" + {"TSHcy;", "Ћ"}, + {""}, + {""}, + {""}, +#line 1541 "HTMLCharacterReference.gperf" + {"nless;", "≮"}, + {""}, +#line 1750 "HTMLCharacterReference.gperf" + {"primes;", "ℙ"}, + {""}, + {""}, + {""}, +#line 1825 "HTMLCharacterReference.gperf" + {"realpart;", "ℜ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 66 "HTMLCharacterReference.gperf" + {"Cayleys;", "ℭ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1734 "HTMLCharacterReference.gperf" + {"pound", "£"}, +#line 1735 "HTMLCharacterReference.gperf" + {"pound;", "£"}, + {""}, + {""}, +#line 188 "HTMLCharacterReference.gperf" + {"GJcy;", "Ѓ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1655 "HTMLCharacterReference.gperf" + {"ohbar;", "⦵"}, + {""}, +#line 1027 "HTMLCharacterReference.gperf" + {"ell;", "ℓ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1545 "HTMLCharacterReference.gperf" + {"nltrie;", "⋬"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 410 "HTMLCharacterReference.gperf" + {"Ocirc", "Ô"}, +#line 411 "HTMLCharacterReference.gperf" + {"Ocirc;", "Ô"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 306 "HTMLCharacterReference.gperf" + {"LessLess;", "⪡"}, + {""}, +#line 673 "HTMLCharacterReference.gperf" + {"aleph;", "ℵ"}, +#line 1695 "HTMLCharacterReference.gperf" + {"ovbar;", "⌽"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 452 "HTMLCharacterReference.gperf" + {"Proportional;", "∝"}, + {""}, + {""}, +#line 873 "HTMLCharacterReference.gperf" + {"circlearrowleft;", "↺"}, +#line 2239 "HTMLCharacterReference.gperf" + {"zacute;", "ź"}, +#line 1816 "HTMLCharacterReference.gperf" + {"rcub;", "}"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 471 "HTMLCharacterReference.gperf" + {"ReverseElement;", "∋"}, + {""}, + {""}, +#line 731 "HTMLCharacterReference.gperf" + {"barwed;", "⌅"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 343 "HTMLCharacterReference.gperf" + {"NegativeVeryThinSpace;", "​"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1931 "HTMLCharacterReference.gperf" + {"simplus;", "⨤"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1746 "HTMLCharacterReference.gperf" + {"precneqq;", "⪵"}, + {""}, + {""}, + {""}, + {""}, +#line 870 "HTMLCharacterReference.gperf" + {"cirE;", "⧃"}, + {""}, + {""}, + {""}, + {""}, +#line 1561 "HTMLCharacterReference.gperf" + {"nparallel;", "∦"}, + {""}, +#line 1851 "HTMLCharacterReference.gperf" + {"rlm;", "‏"}, +#line 71 "HTMLCharacterReference.gperf" + {"Cconint;", "∰"}, + {""}, + {""}, + {""}, + {""}, +#line 1557 "HTMLCharacterReference.gperf" + {"notniva;", "∌"}, + {""}, +#line 1271 "HTMLCharacterReference.gperf" + {"kopf;", "𝕜"}, + {""}, +#line 1955 "HTMLCharacterReference.gperf" + {"sqsube;", "⊑"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 290 "HTMLCharacterReference.gperf" + {"LeftTeeArrow;", "↤"}, + {""}, +#line 335 "HTMLCharacterReference.gperf" + {"NJcy;", "Њ"}, + {""}, + {""}, + {""}, +#line 745 "HTMLCharacterReference.gperf" + {"between;", "≬"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1762 "HTMLCharacterReference.gperf" + {"pscr;", "𝓅"}, + {""}, + {""}, + {""}, +#line 130 "HTMLCharacterReference.gperf" + {"DoubleRightTee;", "⊨"}, + {""}, + {""}, +#line 590 "HTMLCharacterReference.gperf" + {"UpArrow;", "↑"}, + {""}, +#line 565 "HTMLCharacterReference.gperf" + {"TripleDot;", "⃛"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1794 "HTMLCharacterReference.gperf" + {"rarrbfs;", "⤠"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1868 "HTMLCharacterReference.gperf" + {"rsh;", "↱"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1530 "HTMLCharacterReference.gperf" + {"nlArr;", "⇍"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1876 "HTMLCharacterReference.gperf" + {"rtrif;", "▸"}, + {""}, + {""}, +#line 1454 "HTMLCharacterReference.gperf" + {"minusd;", "∸"}, + {""}, + {""}, + {""}, +#line 355 "HTMLCharacterReference.gperf" + {"NotElement;", "∉"}, + {""}, + {""}, +#line 1849 "HTMLCharacterReference.gperf" + {"rlarr;", "⇄"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1696 "HTMLCharacterReference.gperf" + {"par;", "∥"}, +#line 1702 "HTMLCharacterReference.gperf" + {"part;", "∂"}, + {""}, + {""}, + {""}, + {""}, +#line 1952 "HTMLCharacterReference.gperf" + {"sqcup;", "⊔"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1435 "HTMLCharacterReference.gperf" + {"mapstodown;", "↧"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2225 "HTMLCharacterReference.gperf" + {"yacute", "ý"}, +#line 2226 "HTMLCharacterReference.gperf" + {"yacute;", "ý"}, + {""}, + {""}, + {""}, +#line 1470 "HTMLCharacterReference.gperf" + {"nLeftarrow;", "⇍"}, + {""}, + {""}, + {""}, +#line 2060 "HTMLCharacterReference.gperf" + {"times", "×"}, +#line 2061 "HTMLCharacterReference.gperf" + {"times;", "×"}, +#line 2082 "HTMLCharacterReference.gperf" + {"tridot;", "◬"}, + {""}, +#line 620 "HTMLCharacterReference.gperf" + {"VeryThinSpace;", " "}, +#line 39 "HTMLCharacterReference.gperf" + {"ApplyFunction;", "⁡"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2141 "HTMLCharacterReference.gperf" + {"utilde;", "ũ"}, + {""}, +#line 2034 "HTMLCharacterReference.gperf" + {"swarrow;", "↙"}, + {""}, + {""}, + {""}, +#line 1953 "HTMLCharacterReference.gperf" + {"sqcups;", "⊔︀"}, +#line 2019 "HTMLCharacterReference.gperf" + {"supmult;", "⫂"}, +#line 1895 "HTMLCharacterReference.gperf" + {"scy;", "с"}, +#line 985 "HTMLCharacterReference.gperf" + {"doublebarwedge;", "⌆"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1311 "HTMLCharacterReference.gperf" + {"lcedil;", "ļ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 401 "HTMLCharacterReference.gperf" + {"NotTildeTilde;", "≉"}, + {""}, +#line 852 "HTMLCharacterReference.gperf" + {"ccedil", "ç"}, +#line 853 "HTMLCharacterReference.gperf" + {"ccedil;", "ç"}, +#line 1985 "HTMLCharacterReference.gperf" + {"subrarr;", "⥹"}, + {""}, + {""}, +#line 1879 "HTMLCharacterReference.gperf" + {"rx;", "℞"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 172 "HTMLCharacterReference.gperf" + {"Equilibrium;", "⇌"}, + {""}, + {""}, + {""}, +#line 2151 "HTMLCharacterReference.gperf" + {"vDash;", "⊨"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2023 "HTMLCharacterReference.gperf" + {"supset;", "⊃"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2209 "HTMLCharacterReference.gperf" + {"xlArr;", "⟸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1689 "HTMLCharacterReference.gperf" + {"otilde", "õ"}, +#line 1690 "HTMLCharacterReference.gperf" + {"otilde;", "õ"}, + {""}, +#line 1792 "HTMLCharacterReference.gperf" + {"rarrap;", "⥵"}, +#line 344 "HTMLCharacterReference.gperf" + {"NestedGreaterGreater;", "≫"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1639 "HTMLCharacterReference.gperf" + {"ocir;", "⊚"}, + {""}, + {""}, +#line 399 "HTMLCharacterReference.gperf" + {"NotTildeEqual;", "≄"}, + {""}, + {""}, + {""}, +#line 1589 "HTMLCharacterReference.gperf" + {"nsqsupe;", "⋣"}, + {""}, +#line 987 "HTMLCharacterReference.gperf" + {"downdownarrows;", "⇊"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 321 "HTMLCharacterReference.gperf" + {"LowerRightArrow;", "↘"}, + {""}, +#line 1818 "HTMLCharacterReference.gperf" + {"rdca;", "⤷"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2084 "HTMLCharacterReference.gperf" + {"triminus;", "⨺"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1004 "HTMLCharacterReference.gperf" + {"dzigrarr;", "⟿"}, +#line 1924 "HTMLCharacterReference.gperf" + {"sime;", "≃"}, +#line 1925 "HTMLCharacterReference.gperf" + {"simeq;", "≃"}, + {""}, + {""}, + {""}, +#line 307 "HTMLCharacterReference.gperf" + {"LessSlantEqual;", "⩽"}, +#line 2155 "HTMLCharacterReference.gperf" + {"varnothing;", "∅"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2138 "HTMLCharacterReference.gperf" + {"urtri;", "◹"}, +#line 1471 "HTMLCharacterReference.gperf" + {"nLeftrightarrow;", "⇎"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2143 "HTMLCharacterReference.gperf" + {"utrif;", "▴"}, + {""}, + {""}, + {""}, + {""}, +#line 2070 "HTMLCharacterReference.gperf" + {"topf;", "𝕥"}, + {""}, + {""}, + {""}, +#line 1325 "HTMLCharacterReference.gperf" + {"leftharpoonup;", "↼"}, + {""}, +#line 1219 "HTMLCharacterReference.gperf" + {"image;", "ℑ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1697 "HTMLCharacterReference.gperf" + {"para", "¶"}, +#line 1698 "HTMLCharacterReference.gperf" + {"para;", "¶"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 85 "HTMLCharacterReference.gperf" + {"Colone;", "⩴"}, + {""}, + {""}, +#line 1662 "HTMLCharacterReference.gperf" + {"olt;", "⧀"}, + {""}, + {""}, +#line 1712 "HTMLCharacterReference.gperf" + {"phmmat;", "ℳ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1998 "HTMLCharacterReference.gperf" + {"succnapprox;", "⪺"}, + {""}, + {""}, +#line 1331 "HTMLCharacterReference.gperf" + {"leftthreetimes;", "⋋"}, + {""}, + {""}, + {""}, +#line 756 "HTMLCharacterReference.gperf" + {"bigtriangleup;", "△"}, +#line 140 "HTMLCharacterReference.gperf" + {"DownLeftVector;", "↽"}, + {""}, + {""}, +#line 141 "HTMLCharacterReference.gperf" + {"DownLeftVectorBar;", "⥖"}, +#line 1907 "HTMLCharacterReference.gperf" + {"setminus;", "∖"}, +#line 1380 "HTMLCharacterReference.gperf" + {"longrightarrow;", "⟶"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2031 "HTMLCharacterReference.gperf" + {"swArr;", "⇙"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2064 "HTMLCharacterReference.gperf" + {"timesd;", "⨰"}, + {""}, + {""}, + {""}, +#line 808 "HTMLCharacterReference.gperf" + {"boxhu;", "┴"}, + {""}, + {""}, +#line 412 "HTMLCharacterReference.gperf" + {"Ocy;", "О"}, + {""}, +#line 1711 "HTMLCharacterReference.gperf" + {"phiv;", "ϕ"}, + {""}, +#line 728 "HTMLCharacterReference.gperf" + {"backsim;", "∽"}, + {""}, + {""}, + {""}, +#line 1658 "HTMLCharacterReference.gperf" + {"olarr;", "↺"}, +#line 220 "HTMLCharacterReference.gperf" + {"HumpDownHump;", "≎"}, +#line 537 "HTMLCharacterReference.gperf" + {"SucceedsSlantEqual;", "≽"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2126 "HTMLCharacterReference.gperf" + {"updownarrow;", "↕"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2103 "HTMLCharacterReference.gperf" + {"ucirc", "û"}, +#line 2104 "HTMLCharacterReference.gperf" + {"ucirc;", "û"}, +#line 1657 "HTMLCharacterReference.gperf" + {"oint;", "∮"}, + {""}, + {""}, +#line 1676 "HTMLCharacterReference.gperf" + {"orderof;", "ℴ"}, + {""}, + {""}, + {""}, + {""}, +#line 1834 "HTMLCharacterReference.gperf" + {"rharu;", "⇀"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1742 "HTMLCharacterReference.gperf" + {"precapprox;", "⪷"}, + {""}, + {""}, +#line 1431 "HTMLCharacterReference.gperf" + {"malt;", "✠"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1835 "HTMLCharacterReference.gperf" + {"rharul;", "⥬"}, + {""}, +#line 275 "HTMLCharacterReference.gperf" + {"Lcedil;", "Ļ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 752 "HTMLCharacterReference.gperf" + {"bigotimes;", "⨂"}, + {""}, + {""}, + {""}, +#line 1820 "HTMLCharacterReference.gperf" + {"rdquo;", "”"}, +#line 1821 "HTMLCharacterReference.gperf" + {"rdquor;", "”"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 313 "HTMLCharacterReference.gperf" + {"LongLeftArrow;", "⟵"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1860 "HTMLCharacterReference.gperf" + {"roplus;", "⨮"}, + {""}, + {""}, +#line 1980 "HTMLCharacterReference.gperf" + {"subedot;", "⫃"}, + {""}, + {""}, + {""}, + {""}, +#line 748 "HTMLCharacterReference.gperf" + {"bigcirc;", "◯"}, + {""}, +#line 1607 "HTMLCharacterReference.gperf" + {"ntlg;", "≸"}, +#line 1640 "HTMLCharacterReference.gperf" + {"ocirc", "ô"}, +#line 1641 "HTMLCharacterReference.gperf" + {"ocirc;", "ô"}, + {""}, +#line 2024 "HTMLCharacterReference.gperf" + {"supseteq;", "⊇"}, +#line 2025 "HTMLCharacterReference.gperf" + {"supseteqq;", "⫆"}, + {""}, +#line 1183 "HTMLCharacterReference.gperf" + {"hellip;", "…"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1623 "HTMLCharacterReference.gperf" + {"nvlArr;", "⤂"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1877 "HTMLCharacterReference.gperf" + {"rtriltri;", "⧎"}, +#line 1789 "HTMLCharacterReference.gperf" + {"raquo", "»"}, +#line 1790 "HTMLCharacterReference.gperf" + {"raquo;", "»"}, + {""}, + {""}, +#line 63 "HTMLCharacterReference.gperf" + {"Cacute;", "Ć"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1112 "HTMLCharacterReference.gperf" + {"gEl;", "⪌"}, + {""}, + {""}, +#line 1262 "HTMLCharacterReference.gperf" + {"jukcy;", "є"}, + {""}, + {""}, + {""}, + {""}, +#line 2174 "HTMLCharacterReference.gperf" + {"vellip;", "⋮"}, + {""}, +#line 1883 "HTMLCharacterReference.gperf" + {"scE;", "⪴"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1455 "HTMLCharacterReference.gperf" + {"minusdu;", "⨪"}, + {""}, + {""}, + {""}, +#line 942 "HTMLCharacterReference.gperf" + {"daleth;", "ℸ"}, + {""}, +#line 119 "HTMLCharacterReference.gperf" + {"DotEqual;", "≐"}, +#line 1866 "HTMLCharacterReference.gperf" + {"rsaquo;", "›"}, +#line 128 "HTMLCharacterReference.gperf" + {"DoubleLongRightArrow;", "⟹"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 137 "HTMLCharacterReference.gperf" + {"DownBreve;", "̑"}, + {""}, +#line 2092 "HTMLCharacterReference.gperf" + {"tstrok;", "ŧ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2087 "HTMLCharacterReference.gperf" + {"tritime;", "⨻"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 729 "HTMLCharacterReference.gperf" + {"backsimeq;", "⋍"}, + {""}, + {""}, +#line 1902 "HTMLCharacterReference.gperf" + {"searrow;", "↘"}, + {""}, + {""}, +#line 1645 "HTMLCharacterReference.gperf" + {"odiv;", "⨸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2086 "HTMLCharacterReference.gperf" + {"trisb;", "⧍"}, +#line 178 "HTMLCharacterReference.gperf" + {"Exists;", "∃"}, +#line 282 "HTMLCharacterReference.gperf" + {"LeftDoubleBracket;", "⟦"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1802 "HTMLCharacterReference.gperf" + {"rarrw;", "↝"}, +#line 732 "HTMLCharacterReference.gperf" + {"barwedge;", "⌅"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1817 "HTMLCharacterReference.gperf" + {"rcy;", "р"}, + {""}, + {""}, +#line 318 "HTMLCharacterReference.gperf" + {"Longrightarrow;", "⟹"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1946 "HTMLCharacterReference.gperf" + {"sopf;", "𝕤"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 511 "HTMLCharacterReference.gperf" + {"Scedil;", "Ş"}, + {""}, + {""}, + {""}, + {""}, +#line 295 "HTMLCharacterReference.gperf" + {"LeftUpDownVector;", "⥑"}, + {""}, + {""}, +#line 259 "HTMLCharacterReference.gperf" + {"KJcy;", "Ќ"}, + {""}, +#line 380 "HTMLCharacterReference.gperf" + {"NotPrecedesEqual;", "⪯̸"}, + {""}, +#line 2166 "HTMLCharacterReference.gperf" + {"vartheta;", "ϑ"}, + {""}, + {""}, + {""}, + {""}, +#line 539 "HTMLCharacterReference.gperf" + {"SuchThat;", "∋"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 773 "HTMLCharacterReference.gperf" + {"bnequiv;", "≡⃥"}, + {""}, + {""}, + {""}, +#line 1252 "HTMLCharacterReference.gperf" + {"iukcy;", "і"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1965 "HTMLCharacterReference.gperf" + {"squf;", "▪"}, + {""}, +#line 1495 "HTMLCharacterReference.gperf" + {"ncedil;", "ņ"}, + {""}, + {""}, +#line 1737 "HTMLCharacterReference.gperf" + {"prE;", "⪳"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 136 "HTMLCharacterReference.gperf" + {"DownArrowUpArrow;", "⇵"}, + {""}, + {""}, + {""}, +#line 1919 "HTMLCharacterReference.gperf" + {"sigma;", "σ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1079 "HTMLCharacterReference.gperf" + {"filig;", "fi"}, + {""}, + {""}, + {""}, +#line 315 "HTMLCharacterReference.gperf" + {"LongRightArrow;", "⟶"}, + {""}, +#line 1449 "HTMLCharacterReference.gperf" + {"midcir;", "⫰"}, + {""}, + {""}, +#line 1872 "HTMLCharacterReference.gperf" + {"rthree;", "⋌"}, + {""}, +#line 2038 "HTMLCharacterReference.gperf" + {"target;", "⌖"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1424 "HTMLCharacterReference.gperf" + {"luruhar;", "⥦"}, + {""}, + {""}, +#line 1082 "HTMLCharacterReference.gperf" + {"fllig;", "fl"}, + {""}, + {""}, + {""}, + {""}, +#line 1450 "HTMLCharacterReference.gperf" + {"middot", "·"}, +#line 1451 "HTMLCharacterReference.gperf" + {"middot;", "·"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1348 "HTMLCharacterReference.gperf" + {"lesssim;", "≲"}, + {""}, + {""}, + {""}, + {""}, +#line 754 "HTMLCharacterReference.gperf" + {"bigstar;", "★"}, +#line 1939 "HTMLCharacterReference.gperf" + {"smt;", "⪪"}, +#line 2026 "HTMLCharacterReference.gperf" + {"supsetneq;", "⊋"}, +#line 2027 "HTMLCharacterReference.gperf" + {"supsetneqq;", "⫌"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1899 "HTMLCharacterReference.gperf" + {"seArr;", "⇘"}, + {""}, +#line 52 "HTMLCharacterReference.gperf" + {"Because;", "∵"}, + {""}, + {""}, + {""}, + {""}, +#line 450 "HTMLCharacterReference.gperf" + {"Product;", "∏"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 329 "HTMLCharacterReference.gperf" + {"Mellintrf;", "ℳ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2125 "HTMLCharacterReference.gperf" + {"uparrow;", "↑"}, + {""}, + {""}, +#line 390 "HTMLCharacterReference.gperf" + {"NotSubset;", "⊂⃒"}, + {""}, +#line 2051 "HTMLCharacterReference.gperf" + {"thetav;", "ϑ"}, + {""}, +#line 1779 "HTMLCharacterReference.gperf" + {"rBarr;", "⤏"}, + {""}, +#line 288 "HTMLCharacterReference.gperf" + {"LeftRightVector;", "⥎"}, +#line 468 "HTMLCharacterReference.gperf" + {"Rcedil;", "Ŗ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2105 "HTMLCharacterReference.gperf" + {"ucy;", "у"}, + {""}, +#line 2011 "HTMLCharacterReference.gperf" + {"supE;", "⫆"}, + {""}, + {""}, +#line 133 "HTMLCharacterReference.gperf" + {"DoubleVerticalBar;", "∥"}, +#line 524 "HTMLCharacterReference.gperf" + {"SquareIntersection;", "⊓"}, + {""}, + {""}, + {""}, +#line 1781 "HTMLCharacterReference.gperf" + {"race;", "∽̱"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1776 "HTMLCharacterReference.gperf" + {"rAarr;", "⇛"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1364 "HTMLCharacterReference.gperf" + {"lmidot;", "ŀ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1777 "HTMLCharacterReference.gperf" + {"rArr;", "⇒"}, + {""}, + {""}, + {""}, +#line 194 "HTMLCharacterReference.gperf" + {"Gcedil;", "Ģ"}, + {""}, +#line 1943 "HTMLCharacterReference.gperf" + {"sol;", "/"}, +#line 420 "HTMLCharacterReference.gperf" + {"Oopf;", "𝕆"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 124 "HTMLCharacterReference.gperf" + {"DoubleLeftRightArrow;", "⇔"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 477 "HTMLCharacterReference.gperf" + {"RightArrow;", "→"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1453 "HTMLCharacterReference.gperf" + {"minusb;", "⊟"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1745 "HTMLCharacterReference.gperf" + {"precnapprox;", "⪹"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1642 "HTMLCharacterReference.gperf" + {"ocy;", "о"}, +#line 1258 "HTMLCharacterReference.gperf" + {"jmath;", "ȷ"}, +#line 1920 "HTMLCharacterReference.gperf" + {"sigmaf;", "ς"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1714 "HTMLCharacterReference.gperf" + {"pi;", "π"}, + {""}, + {""}, + {""}, +#line 2164 "HTMLCharacterReference.gperf" + {"varsupsetneq;", "⊋︀"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 672 "HTMLCharacterReference.gperf" + {"alefsym;", "ℵ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1754 "HTMLCharacterReference.gperf" + {"prod;", "∏"}, + {""}, +#line 1932 "HTMLCharacterReference.gperf" + {"simrarr;", "⥲"}, + {""}, +#line 1716 "HTMLCharacterReference.gperf" + {"piv;", "ϖ"}, + {""}, +#line 257 "HTMLCharacterReference.gperf" + {"Jukcy;", "Є"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 372 "HTMLCharacterReference.gperf" + {"NotLessEqual;", "≰"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 400 "HTMLCharacterReference.gperf" + {"NotTildeFullEqual;", "≇"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 356 "HTMLCharacterReference.gperf" + {"NotEqual;", "≠"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1366 "HTMLCharacterReference.gperf" + {"lmoustache;", "⎰"}, +#line 338 "HTMLCharacterReference.gperf" + {"Ncedil;", "Ņ"}, + {""}, +#line 1234 "HTMLCharacterReference.gperf" + {"intlarhk;", "⨗"}, + {""}, + {""}, +#line 507 "HTMLCharacterReference.gperf" + {"SOFTcy;", "Ь"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1978 "HTMLCharacterReference.gperf" + {"subdot;", "⪽"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 727 "HTMLCharacterReference.gperf" + {"backprime;", "‵"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 674 "HTMLCharacterReference.gperf" + {"alpha;", "α"}, + {""}, +#line 179 "HTMLCharacterReference.gperf" + {"ExponentialE;", "ⅇ"}, + {""}, + {""}, +#line 2170 "HTMLCharacterReference.gperf" + {"vdash;", "⊢"}, + {""}, + {""}, + {""}, + {""}, +#line 1824 "HTMLCharacterReference.gperf" + {"realine;", "ℛ"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1349 "HTMLCharacterReference.gperf" + {"lfisht;", "⥼"}, +#line 1559 "HTMLCharacterReference.gperf" + {"notnivc;", "⋽"}, + {""}, + {""}, + {""}, +#line 203 "HTMLCharacterReference.gperf" + {"GreaterFullEqual;", "≧"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 248 "HTMLCharacterReference.gperf" + {"Iukcy;", "І"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 138 "HTMLCharacterReference.gperf" + {"DownLeftRightVector;", "⥐"}, + {""}, + {""}, + {""}, + {""}, +#line 1223 "HTMLCharacterReference.gperf" + {"imof;", "⊷"}, +#line 1324 "HTMLCharacterReference.gperf" + {"leftharpoondown;", "↽"}, +#line 958 "HTMLCharacterReference.gperf" + {"dfisht;", "⥿"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 346 "HTMLCharacterReference.gperf" + {"NewLine;", "\n"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2096 "HTMLCharacterReference.gperf" + {"uArr;", "⇑"}, + {""}, +#line 1222 "HTMLCharacterReference.gperf" + {"imath;", "ı"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 650 "HTMLCharacterReference.gperf" + {"ZeroWidthSpace;", "​"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 755 "HTMLCharacterReference.gperf" + {"bigtriangledown;", "▽"}, + {""}, +#line 1859 "HTMLCharacterReference.gperf" + {"ropf;", "𝕣"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 312 "HTMLCharacterReference.gperf" + {"Lmidot;", "Ŀ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1432 "HTMLCharacterReference.gperf" + {"maltese;", "✠"}, + {""}, + {""}, + {""}, +#line 534 "HTMLCharacterReference.gperf" + {"SubsetEqual;", "⊆"}, +#line 2127 "HTMLCharacterReference.gperf" + {"upharpoonleft;", "↿"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 354 "HTMLCharacterReference.gperf" + {"NotDoubleVerticalBar;", "∦"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1430 "HTMLCharacterReference.gperf" + {"male;", "♂"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2165 "HTMLCharacterReference.gperf" + {"varsupsetneqq;", "⫌︀"}, + {""}, + {""}, +#line 2062 "HTMLCharacterReference.gperf" + {"timesb;", "⊠"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2131 "HTMLCharacterReference.gperf" + {"upsih;", "ϒ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1546 "HTMLCharacterReference.gperf" + {"nmid;", "∤"}, + {""}, + {""}, +#line 201 "HTMLCharacterReference.gperf" + {"GreaterEqual;", "≥"}, + {""}, + {""}, +#line 417 "HTMLCharacterReference.gperf" + {"Omacr;", "Ō"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1558 "HTMLCharacterReference.gperf" + {"notnivb;", "⋾"}, +#line 2063 "HTMLCharacterReference.gperf" + {"timesbar;", "⨱"}, + {""}, + {""}, + {""}, +#line 1805 "HTMLCharacterReference.gperf" + {"rationals;", "ℚ"}, + {""}, + {""}, + {""}, + {""}, +#line 1681 "HTMLCharacterReference.gperf" + {"origof;", "⊶"}, +#line 341 "HTMLCharacterReference.gperf" + {"NegativeThickSpace;", "​"}, + {""}, + {""}, +#line 1784 "HTMLCharacterReference.gperf" + {"raemptyv;", "⦳"}, +#line 1785 "HTMLCharacterReference.gperf" + {"rang;", "⟩"}, + {""}, + {""}, + {""}, +#line 616 "HTMLCharacterReference.gperf" + {"VerticalBar;", "∣"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2128 "HTMLCharacterReference.gperf" + {"upharpoonright;", "↾"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1796 "HTMLCharacterReference.gperf" + {"rarrfs;", "⤞"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1864 "HTMLCharacterReference.gperf" + {"rppolint;", "⨒"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 553 "HTMLCharacterReference.gperf" + {"Tcedil;", "Ţ"}, + {""}, + {""}, + {""}, +#line 805 "HTMLCharacterReference.gperf" + {"boxhD;", "╥"}, + {""}, +#line 1588 "HTMLCharacterReference.gperf" + {"nsqsube;", "⋢"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2118 "HTMLCharacterReference.gperf" + {"ulcrop;", "⌏"}, + {""}, + {""}, + {""}, + {""}, +#line 2167 "HTMLCharacterReference.gperf" + {"vartriangleleft;", "⊲"}, +#line 1661 "HTMLCharacterReference.gperf" + {"oline;", "‾"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 608 "HTMLCharacterReference.gperf" + {"VDash;", "⊫"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2116 "HTMLCharacterReference.gperf" + {"ulcorn;", "⌜"}, + {""}, +#line 2013 "HTMLCharacterReference.gperf" + {"supdsub;", "⫘"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2124 "HTMLCharacterReference.gperf" + {"uopf;", "𝕦"}, + {""}, + {""}, +#line 1264 "HTMLCharacterReference.gperf" + {"kappav;", "ϰ"}, +#line 1981 "HTMLCharacterReference.gperf" + {"submult;", "⫁"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 479 "HTMLCharacterReference.gperf" + {"RightArrowLeftArrow;", "⇄"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1507 "HTMLCharacterReference.gperf" + {"nequiv;", "≢"}, + {""}, + {""}, +#line 928 "HTMLCharacterReference.gperf" + {"curlyvee;", "⋎"}, +#line 1886 "HTMLCharacterReference.gperf" + {"sccue;", "≽"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1986 "HTMLCharacterReference.gperf" + {"subset;", "⊂"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1659 "HTMLCharacterReference.gperf" + {"olcir;", "⦾"}, +#line 237 "HTMLCharacterReference.gperf" + {"Implies;", "⇒"}, + {""}, +#line 284 "HTMLCharacterReference.gperf" + {"LeftDownVector;", "⇃"}, + {""}, + {""}, +#line 285 "HTMLCharacterReference.gperf" + {"LeftDownVectorBar;", "⥙"}, +#line 766 "HTMLCharacterReference.gperf" + {"blacktriangleright;", "▸"}, +#line 1803 "HTMLCharacterReference.gperf" + {"ratail;", "⤚"}, + {""}, +#line 1996 "HTMLCharacterReference.gperf" + {"succcurlyeq;", "≽"}, + {""}, + {""}, + {""}, +#line 1668 "HTMLCharacterReference.gperf" + {"oopf;", "𝕠"}, +#line 281 "HTMLCharacterReference.gperf" + {"LeftCeiling;", "⌈"}, + {""}, +#line 1787 "HTMLCharacterReference.gperf" + {"range;", "⦥"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1831 "HTMLCharacterReference.gperf" + {"rfloor;", "⌋"}, + {""}, + {""}, +#line 617 "HTMLCharacterReference.gperf" + {"VerticalLine;", "|"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1944 "HTMLCharacterReference.gperf" + {"solb;", "⧄"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1500 "HTMLCharacterReference.gperf" + {"ndash;", "–"}, +#line 1574 "HTMLCharacterReference.gperf" + {"nrightarrow;", "↛"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1631 "HTMLCharacterReference.gperf" + {"nwarhk;", "⤣"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 402 "HTMLCharacterReference.gperf" + {"NotVerticalBar;", "∤"}, +#line 311 "HTMLCharacterReference.gperf" + {"Lleftarrow;", "⇚"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2045 "HTMLCharacterReference.gperf" + {"telrec;", "⌕"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2108 "HTMLCharacterReference.gperf" + {"udhar;", "⥮"}, + {""}, + {""}, +#line 1706 "HTMLCharacterReference.gperf" + {"permil;", "‰"}, + {""}, + {""}, + {""}, +#line 413 "HTMLCharacterReference.gperf" + {"Odblac;", "Ő"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 50 "HTMLCharacterReference.gperf" + {"Barwed;", "⌆"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1934 "HTMLCharacterReference.gperf" + {"smallsetminus;", "∖"}, +#line 1940 "HTMLCharacterReference.gperf" + {"smte;", "⪬"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 392 "HTMLCharacterReference.gperf" + {"NotSucceeds;", "⊁"}, + {""}, + {""}, + {""}, + {""}, +#line 1031 "HTMLCharacterReference.gperf" + {"empty;", "∅"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 215 "HTMLCharacterReference.gperf" + {"HilbertSpace;", "ℋ"}, +#line 1221 "HTMLCharacterReference.gperf" + {"imagpart;", "ℑ"}, + {""}, + {""}, +#line 1739 "HTMLCharacterReference.gperf" + {"prcue;", "≼"}, +#line 2117 "HTMLCharacterReference.gperf" + {"ulcorner;", "⌜"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1703 "HTMLCharacterReference.gperf" + {"pcy;", "п"}, + {""}, + {""}, +#line 146 "HTMLCharacterReference.gperf" + {"DownTeeArrow;", "↧"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1926 "HTMLCharacterReference.gperf" + {"simg;", "⪞"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 501 "HTMLCharacterReference.gperf" + {"Rrightarrow;", "⇛"}, + {""}, +#line 1465 "HTMLCharacterReference.gperf" + {"multimap;", "⊸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 472 "HTMLCharacterReference.gperf" + {"ReverseEquilibrium;", "⇋"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 759 "HTMLCharacterReference.gperf" + {"bigwedge;", "⋀"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 261 "HTMLCharacterReference.gperf" + {"Kcedil;", "Ķ"}, +#line 1622 "HTMLCharacterReference.gperf" + {"nvinfin;", "⧞"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 763 "HTMLCharacterReference.gperf" + {"blacktriangle;", "▴"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1987 "HTMLCharacterReference.gperf" + {"subseteq;", "⊆"}, +#line 1988 "HTMLCharacterReference.gperf" + {"subseteqq;", "⫅"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1660 "HTMLCharacterReference.gperf" + {"olcross;", "⦻"}, + {""}, + {""}, +#line 202 "HTMLCharacterReference.gperf" + {"GreaterEqualLess;", "⋛"}, + {""}, + {""}, +#line 2120 "HTMLCharacterReference.gperf" + {"umacr;", "ū"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1880 "HTMLCharacterReference.gperf" + {"sacute;", "ś"}, + {""}, + {""}, +#line 1677 "HTMLCharacterReference.gperf" + {"ordf", "ª"}, +#line 1678 "HTMLCharacterReference.gperf" + {"ordf;", "ª"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 129 "HTMLCharacterReference.gperf" + {"DoubleRightArrow;", "⇒"}, + {""}, + {""}, + {""}, +#line 1938 "HTMLCharacterReference.gperf" + {"smile;", "⌣"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1941 "HTMLCharacterReference.gperf" + {"smtes;", "⪬︀"}, +#line 1663 "HTMLCharacterReference.gperf" + {"omacr;", "ō"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1923 "HTMLCharacterReference.gperf" + {"simdot;", "⩪"}, + {""}, + {""}, + {""}, +#line 79 "HTMLCharacterReference.gperf" + {"CirclePlus;", "⊕"}, + {""}, + {""}, + {""}, +#line 1809 "HTMLCharacterReference.gperf" + {"rbrack;", "]"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1535 "HTMLCharacterReference.gperf" + {"nleftarrow;", "↚"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 80 "HTMLCharacterReference.gperf" + {"CircleTimes;", "⊗"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 593 "HTMLCharacterReference.gperf" + {"UpDownArrow;", "↕"}, + {""}, +#line 1786 "HTMLCharacterReference.gperf" + {"rangd;", "⦒"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1032 "HTMLCharacterReference.gperf" + {"emptyset;", "∅"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 68 "HTMLCharacterReference.gperf" + {"Ccedil", "Ç"}, +#line 69 "HTMLCharacterReference.gperf" + {"Ccedil;", "Ç"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1927 "HTMLCharacterReference.gperf" + {"simgE;", "⪠"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 34 "HTMLCharacterReference.gperf" + {"Alpha;", "Α"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 408 "HTMLCharacterReference.gperf" + {"Oacute", "Ó"}, +#line 409 "HTMLCharacterReference.gperf" + {"Oacute;", "Ó"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1503 "HTMLCharacterReference.gperf" + {"nearhk;", "⤤"}, + {""}, +#line 142 "HTMLCharacterReference.gperf" + {"DownRightTeeVector;", "⥟"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1708 "HTMLCharacterReference.gperf" + {"pertenk;", "‱"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2162 "HTMLCharacterReference.gperf" + {"varsubsetneq;", "⊊︀"}, + {""}, + {""}, +#line 1536 "HTMLCharacterReference.gperf" + {"nleftrightarrow;", "↮"}, +#line 2156 "HTMLCharacterReference.gperf" + {"varphi;", "ϕ"}, +#line 1743 "HTMLCharacterReference.gperf" + {"preccurlyeq;", "≼"}, + {""}, +#line 1788 "HTMLCharacterReference.gperf" + {"rangle;", "⟩"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1989 "HTMLCharacterReference.gperf" + {"subsetneq;", "⊊"}, +#line 1990 "HTMLCharacterReference.gperf" + {"subsetneqq;", "⫋"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2107 "HTMLCharacterReference.gperf" + {"udblac;", "ű"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 486 "HTMLCharacterReference.gperf" + {"RightTee;", "⊢"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 217 "HTMLCharacterReference.gperf" + {"HorizontalLine;", "─"}, +#line 2028 "HTMLCharacterReference.gperf" + {"supsim;", "⫈"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 488 "HTMLCharacterReference.gperf" + {"RightTeeVector;", "⥛"}, + {""}, + {""}, + {""}, + {""}, +#line 1948 "HTMLCharacterReference.gperf" + {"spadesuit;", "♠"}, + {""}, + {""}, + {""}, + {""}, +#line 1173 "HTMLCharacterReference.gperf" + {"half;", "½"}, + {""}, + {""}, + {""}, +#line 1893 "HTMLCharacterReference.gperf" + {"scpolint;", "⨓"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1977 "HTMLCharacterReference.gperf" + {"subE;", "⫅"}, + {""}, +#line 1215 "HTMLCharacterReference.gperf" + {"iinfin;", "⧜"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1644 "HTMLCharacterReference.gperf" + {"odblac;", "ő"}, + {""}, + {""}, + {""}, +#line 989 "HTMLCharacterReference.gperf" + {"downharpoonright;", "⇂"}, + {""}, + {""}, + {""}, +#line 1733 "HTMLCharacterReference.gperf" + {"popf;", "𝕡"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2030 "HTMLCharacterReference.gperf" + {"supsup;", "⫖"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 433 "HTMLCharacterReference.gperf" + {"OverBrace;", "⏞"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2101 "HTMLCharacterReference.gperf" + {"ubrcy;", "ў"}, +#line 1294 "HTMLCharacterReference.gperf" + {"larrhk;", "↩"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 498 "HTMLCharacterReference.gperf" + {"Rightarrow;", "⇒"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1890 "HTMLCharacterReference.gperf" + {"scnE;", "⪶"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 139 "HTMLCharacterReference.gperf" + {"DownLeftTeeVector;", "⥞"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 77 "HTMLCharacterReference.gperf" + {"CircleDot;", "⊙"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 611 "HTMLCharacterReference.gperf" + {"Vdash;", "⊩"}, + {""}, + {""}, +#line 1731 "HTMLCharacterReference.gperf" + {"pm;", "±"}, + {""}, + {""}, +#line 1475 "HTMLCharacterReference.gperf" + {"nRightarrow;", "⇏"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1847 "HTMLCharacterReference.gperf" + {"ring;", "˚"}, + {""}, +#line 1782 "HTMLCharacterReference.gperf" + {"racute;", "ŕ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 612 "HTMLCharacterReference.gperf" + {"Vdashl;", "⫦"}, +#line 1626 "HTMLCharacterReference.gperf" + {"nvltrie;", "⊴⃒"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 494 "HTMLCharacterReference.gperf" + {"RightUpVector;", "↾"}, +#line 21 "HTMLCharacterReference.gperf" + {"AElig", "Æ"}, +#line 22 "HTMLCharacterReference.gperf" + {"AElig;", "Æ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2163 "HTMLCharacterReference.gperf" + {"varsubsetneqq;", "⫋︀"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2093 "HTMLCharacterReference.gperf" + {"twixt;", "≬"}, +#line 2001 "HTMLCharacterReference.gperf" + {"succsim;", "≿"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 425 "HTMLCharacterReference.gperf" + {"Oslash", "Ø"}, +#line 426 "HTMLCharacterReference.gperf" + {"Oslash;", "Ø"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1822 "HTMLCharacterReference.gperf" + {"rdsh;", "↳"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 520 "HTMLCharacterReference.gperf" + {"SmallCircle;", "∘"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 478 "HTMLCharacterReference.gperf" + {"RightArrowBar;", "⇥"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 53 "HTMLCharacterReference.gperf" + {"Bernoullis;", "ℬ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 619 "HTMLCharacterReference.gperf" + {"VerticalTilde;", "≀"}, + {""}, + {""}, + {""}, + {""}, +#line 357 "HTMLCharacterReference.gperf" + {"NotEqualTilde;", "≂̸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 397 "HTMLCharacterReference.gperf" + {"NotSupersetEqual;", "⊉"}, + {""}, + {""}, + {""}, +#line 1751 "HTMLCharacterReference.gperf" + {"prnE;", "⪵"}, + {""}, + {""}, +#line 1850 "HTMLCharacterReference.gperf" + {"rlhar;", "⇌"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1077 "HTMLCharacterReference.gperf" + {"ffllig;", "ffl"}, +#line 2035 "HTMLCharacterReference.gperf" + {"swnwar;", "⤪"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2053 "HTMLCharacterReference.gperf" + {"thicksim;", "∼"}, + {""}, + {""}, + {""}, +#line 2095 "HTMLCharacterReference.gperf" + {"twoheadrightarrow;", "↠"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2119 "HTMLCharacterReference.gperf" + {"ultri;", "◸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 434 "HTMLCharacterReference.gperf" + {"OverBracket;", "⎴"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1318 "HTMLCharacterReference.gperf" + {"ldrdhar;", "⥧"}, + {""}, + {""}, +#line 1441 "HTMLCharacterReference.gperf" + {"mdash;", "—"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1187 "HTMLCharacterReference.gperf" + {"hkswarow;", "⤦"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 143 "HTMLCharacterReference.gperf" + {"DownRightVector;", "⇁"}, + {""}, +#line 419 "HTMLCharacterReference.gperf" + {"Omicron;", "Ο"}, +#line 144 "HTMLCharacterReference.gperf" + {"DownRightVectorBar;", "⥗"}, +#line 2098 "HTMLCharacterReference.gperf" + {"uacute", "ú"}, +#line 2099 "HTMLCharacterReference.gperf" + {"uacute;", "ú"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 487 "HTMLCharacterReference.gperf" + {"RightTeeArrow;", "↦"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 432 "HTMLCharacterReference.gperf" + {"OverBar;", "‾"}, +#line 1488 "HTMLCharacterReference.gperf" + {"naturals;", "ℕ"}, + {""}, + {""}, +#line 1265 "HTMLCharacterReference.gperf" + {"kcedil;", "ķ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 493 "HTMLCharacterReference.gperf" + {"RightUpTeeVector;", "⥜"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 764 "HTMLCharacterReference.gperf" + {"blacktriangledown;", "▾"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 358 "HTMLCharacterReference.gperf" + {"NotExists;", "∄"}, +#line 223 "HTMLCharacterReference.gperf" + {"IJlig;", "IJ"}, +#line 305 "HTMLCharacterReference.gperf" + {"LessGreater;", "≶"}, + {""}, + {""}, + {""}, + {""}, +#line 1636 "HTMLCharacterReference.gperf" + {"oacute", "ó"}, +#line 1637 "HTMLCharacterReference.gperf" + {"oacute;", "ó"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1757 "HTMLCharacterReference.gperf" + {"profsurf;", "⌓"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1701 "HTMLCharacterReference.gperf" + {"parsl;", "⫽"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 277 "HTMLCharacterReference.gperf" + {"LeftAngleBracket;", "⟨"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1945 "HTMLCharacterReference.gperf" + {"solbar;", "⌿"}, + {""}, + {""}, + {""}, + {""}, +#line 78 "HTMLCharacterReference.gperf" + {"CircleMinus;", "⊖"}, + {""}, + {""}, + {""}, + {""}, +#line 91 "HTMLCharacterReference.gperf" + {"CounterClockwiseContourIntegral;", "∳"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2017 "HTMLCharacterReference.gperf" + {"suphsub;", "⫗"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 883 "HTMLCharacterReference.gperf" + {"cirscir;", "⧂"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1487 "HTMLCharacterReference.gperf" + {"natural;", "♮"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 110 "HTMLCharacterReference.gperf" + {"DiacriticalDot;", "˙"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 236 "HTMLCharacterReference.gperf" + {"ImaginaryI;", "ⅈ"}, + {""}, + {""}, + {""}, + {""}, +#line 591 "HTMLCharacterReference.gperf" + {"UpArrowBar;", "⤒"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 394 "HTMLCharacterReference.gperf" + {"NotSucceedsSlantEqual;", "⋡"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1906 "HTMLCharacterReference.gperf" + {"seswar;", "⤩"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 422 "HTMLCharacterReference.gperf" + {"OpenCurlyQuote;", "‘"}, + {""}, + {""}, +#line 2071 "HTMLCharacterReference.gperf" + {"topfork;", "⫚"}, + {""}, + {""}, +#line 435 "HTMLCharacterReference.gperf" + {"OverParenthesis;", "⏜"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 111 "HTMLCharacterReference.gperf" + {"DiacriticalDoubleAcute;", "˝"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1800 "HTMLCharacterReference.gperf" + {"rarrsim;", "⥴"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2042 "HTMLCharacterReference.gperf" + {"tcedil;", "ţ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1848 "HTMLCharacterReference.gperf" + {"risingdotseq;", "≓"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1648 "HTMLCharacterReference.gperf" + {"oelig;", "œ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 331 "HTMLCharacterReference.gperf" + {"MinusPlus;", "∓"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1913 "HTMLCharacterReference.gperf" + {"shchcy;", "щ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1699 "HTMLCharacterReference.gperf" + {"parallel;", "∥"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 476 "HTMLCharacterReference.gperf" + {"RightAngleBracket;", "⟩"}, + {""}, + {""}, + {""}, +#line 1686 "HTMLCharacterReference.gperf" + {"oslash", "ø"}, +#line 1687 "HTMLCharacterReference.gperf" + {"oslash;", "ø"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1748 "HTMLCharacterReference.gperf" + {"precsim;", "≾"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1582 "HTMLCharacterReference.gperf" + {"nshortparallel;", "∦"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 592 "HTMLCharacterReference.gperf" + {"UpArrowDownArrow;", "⇅"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2000 "HTMLCharacterReference.gperf" + {"succnsim;", "⋩"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1665 "HTMLCharacterReference.gperf" + {"omicron;", "ο"}, + {""}, + {""}, + {""}, + {""}, +#line 881 "HTMLCharacterReference.gperf" + {"cirfnint;", "⨐"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 391 "HTMLCharacterReference.gperf" + {"NotSubsetEqual;", "⊈"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2036 "HTMLCharacterReference.gperf" + {"szlig", "ß"}, +#line 2037 "HTMLCharacterReference.gperf" + {"szlig;", "ß"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1921 "HTMLCharacterReference.gperf" + {"sigmav;", "ς"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1755 "HTMLCharacterReference.gperf" + {"profalar;", "⌮"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1732 "HTMLCharacterReference.gperf" + {"pointint;", "⨕"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1888 "HTMLCharacterReference.gperf" + {"scedil;", "ş"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 314 "HTMLCharacterReference.gperf" + {"LongLeftRightArrow;", "⟷"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1852 "HTMLCharacterReference.gperf" + {"rmoust;", "⎱"}, + {""}, +#line 2091 "HTMLCharacterReference.gperf" + {"tshcy;", "ћ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2129 "HTMLCharacterReference.gperf" + {"uplus;", "⊎"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1778 "HTMLCharacterReference.gperf" + {"rAtail;", "⤜"}, + {""}, +#line 2029 "HTMLCharacterReference.gperf" + {"supsub;", "⫔"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1072 "HTMLCharacterReference.gperf" + {"fallingdotseq;", "≒"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 65 "HTMLCharacterReference.gperf" + {"CapitalDifferentialD;", "ⅅ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1671 "HTMLCharacterReference.gperf" + {"oplus;", "⊕"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 283 "HTMLCharacterReference.gperf" + {"LeftDownTeeVector;", "⥡"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1991 "HTMLCharacterReference.gperf" + {"subsim;", "⫇"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 560 "HTMLCharacterReference.gperf" + {"Tilde;", "∼"}, + {""}, +#line 618 "HTMLCharacterReference.gperf" + {"VerticalSeparator;", "❘"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1436 "HTMLCharacterReference.gperf" + {"mapstoleft;", "↤"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 492 "HTMLCharacterReference.gperf" + {"RightUpDownVector;", "⥏"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1993 "HTMLCharacterReference.gperf" + {"subsup;", "⫓"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1892 "HTMLCharacterReference.gperf" + {"scnsim;", "⋩"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1719 "HTMLCharacterReference.gperf" + {"plankv;", "ℏ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2050 "HTMLCharacterReference.gperf" + {"thetasym;", "ϑ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 115 "HTMLCharacterReference.gperf" + {"DifferentialD;", "ⅆ"}, + {""}, +#line 366 "HTMLCharacterReference.gperf" + {"NotHumpDownHump;", "≎̸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2161 "HTMLCharacterReference.gperf" + {"varsigma;", "ς"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1720 "HTMLCharacterReference.gperf" + {"plus;", "+"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1727 "HTMLCharacterReference.gperf" + {"plusmn", "±"}, +#line 1728 "HTMLCharacterReference.gperf" + {"plusmn;", "±"}, + {""}, +#line 1747 "HTMLCharacterReference.gperf" + {"precnsim;", "⋨"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 765 "HTMLCharacterReference.gperf" + {"blacktriangleleft;", "◂"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1753 "HTMLCharacterReference.gperf" + {"prnsim;", "⋨"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1814 "HTMLCharacterReference.gperf" + {"rcedil;", "ŗ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1937 "HTMLCharacterReference.gperf" + {"smid;", "∣"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 482 "HTMLCharacterReference.gperf" + {"RightDownTeeVector;", "⥝"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1726 "HTMLCharacterReference.gperf" + {"pluse;", "⩲"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1730 "HTMLCharacterReference.gperf" + {"plustwo;", "⨧"}, + {""}, + {""}, + {""}, + {""}, +#line 1845 "HTMLCharacterReference.gperf" + {"rightsquigarrow;", "↝"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 436 "HTMLCharacterReference.gperf" + {"PartialD;", "∂"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 83 "HTMLCharacterReference.gperf" + {"CloseCurlyQuote;", "’"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 988 "HTMLCharacterReference.gperf" + {"downharpoonleft;", "⇃"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2056 "HTMLCharacterReference.gperf" + {"thksim;", "∼"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1033 "HTMLCharacterReference.gperf" + {"emptyv;", "∅"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2032 "HTMLCharacterReference.gperf" + {"swarhk;", "⤦"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 349 "HTMLCharacterReference.gperf" + {"NonBreakingSpace;", " "}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 82 "HTMLCharacterReference.gperf" + {"CloseCurlyDoubleQuote;", "”"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1722 "HTMLCharacterReference.gperf" + {"plusb;", "⊞"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1220 "HTMLCharacterReference.gperf" + {"imagline;", "ℐ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 383 "HTMLCharacterReference.gperf" + {"NotRightTriangle;", "⋫"}, + {""}, + {""}, +#line 384 "HTMLCharacterReference.gperf" + {"NotRightTriangleBar;", "⧐̸"}, + {""}, +#line 385 "HTMLCharacterReference.gperf" + {"NotRightTriangleEqual;", "⋭"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2075 "HTMLCharacterReference.gperf" + {"triangle;", "▵"}, +#line 2079 "HTMLCharacterReference.gperf" + {"triangleq;", "≜"}, + {""}, + {""}, +#line 2077 "HTMLCharacterReference.gperf" + {"triangleleft;", "◃"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2076 "HTMLCharacterReference.gperf" + {"triangledown;", "▿"}, +#line 1842 "HTMLCharacterReference.gperf" + {"rightleftarrows;", "⇄"}, + {""}, + {""}, +#line 496 "HTMLCharacterReference.gperf" + {"RightVector;", "⇀"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2078 "HTMLCharacterReference.gperf" + {"trianglelefteq;", "⊴"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 483 "HTMLCharacterReference.gperf" + {"RightDownVector;", "⇂"}, + {""}, + {""}, +#line 484 "HTMLCharacterReference.gperf" + {"RightDownVectorBar;", "⥕"}, + {""}, + {""}, + {""}, +#line 1725 "HTMLCharacterReference.gperf" + {"plusdu;", "⨥"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 393 "HTMLCharacterReference.gperf" + {"NotSucceedsEqual;", "⪰̸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1724 "HTMLCharacterReference.gperf" + {"plusdo;", "∔"}, + {""}, + {""}, + {""}, + {""}, +#line 382 "HTMLCharacterReference.gperf" + {"NotReverseElement;", "∌"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 395 "HTMLCharacterReference.gperf" + {"NotSucceedsTilde;", "≿̸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1843 "HTMLCharacterReference.gperf" + {"rightleftharpoons;", "⇌"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1936 "HTMLCharacterReference.gperf" + {"smeparsl;", "⧤"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 421 "HTMLCharacterReference.gperf" + {"OpenCurlyDoubleQuote;", "“"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1974 "HTMLCharacterReference.gperf" + {"straightphi;", "ϕ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1900 "HTMLCharacterReference.gperf" + {"searhk;", "⤥"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1841 "HTMLCharacterReference.gperf" + {"rightharpoonup;", "⇀"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1666 "HTMLCharacterReference.gperf" + {"omid;", "⦶"}, + {""}, + {""}, + {""}, + {""}, +#line 418 "HTMLCharacterReference.gperf" + {"Omega;", "Ω"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1992 "HTMLCharacterReference.gperf" + {"subsub;", "⫕"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 367 "HTMLCharacterReference.gperf" + {"NotHumpEqual;", "≏̸"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1853 "HTMLCharacterReference.gperf" + {"rmoustache;", "⎱"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1715 "HTMLCharacterReference.gperf" + {"pitchfork;", "⋔"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2080 "HTMLCharacterReference.gperf" + {"triangleright;", "▹"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1830 "HTMLCharacterReference.gperf" + {"rfisht;", "⥽"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1840 "HTMLCharacterReference.gperf" + {"rightharpoondown;", "⇁"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 407 "HTMLCharacterReference.gperf" + {"OElig;", "Œ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 304 "HTMLCharacterReference.gperf" + {"LessFullEqual;", "≦"}, + {""}, +#line 485 "HTMLCharacterReference.gperf" + {"RightFloor;", "⌋"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1700 "HTMLCharacterReference.gperf" + {"parsim;", "⫳"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1643 "HTMLCharacterReference.gperf" + {"odash;", "⊝"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1717 "HTMLCharacterReference.gperf" + {"planck;", "ℏ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2081 "HTMLCharacterReference.gperf" + {"trianglerighteq;", "⊵"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2109 "HTMLCharacterReference.gperf" + {"ufisht;", "⥾"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2016 "HTMLCharacterReference.gperf" + {"suphsol;", "⟉"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1442 "HTMLCharacterReference.gperf" + {"measuredangle;", "∡"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2059 "HTMLCharacterReference.gperf" + {"tilde;", "˜"}, + {""}, +#line 1667 "HTMLCharacterReference.gperf" + {"ominus;", "⊖"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2121 "HTMLCharacterReference.gperf" + {"uml", "¨"}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2122 "HTMLCharacterReference.gperf" + {"uml;", "¨"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 165 "HTMLCharacterReference.gperf" + {"EmptySmallSquare;", "◻"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 495 "HTMLCharacterReference.gperf" + {"RightUpVectorBar;", "⥔"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 182 "HTMLCharacterReference.gperf" + {"FilledSmallSquare;", "◼"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1838 "HTMLCharacterReference.gperf" + {"rightarrow;", "→"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1664 "HTMLCharacterReference.gperf" + {"omega;", "ω"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2189 "HTMLCharacterReference.gperf" + {"vzigzag;", "⦚"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1844 "HTMLCharacterReference.gperf" + {"rightrightarrows;", "⇉"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 242 "HTMLCharacterReference.gperf" + {"InvisibleTimes;", "⁢"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 340 "HTMLCharacterReference.gperf" + {"NegativeMediumSpace;", "​"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 441 "HTMLCharacterReference.gperf" + {"PlusMinus;", "±"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 113 "HTMLCharacterReference.gperf" + {"DiacriticalTilde;", "˜"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 109 "HTMLCharacterReference.gperf" + {"DiacriticalAcute;", "´"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 308 "HTMLCharacterReference.gperf" + {"LessTilde;", "≲"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1797 "HTMLCharacterReference.gperf" + {"rarrhk;", "↪"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2094 "HTMLCharacterReference.gperf" + {"twoheadleftarrow;", "↞"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1756 "HTMLCharacterReference.gperf" + {"profline;", "⌒"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1721 "HTMLCharacterReference.gperf" + {"plusacir;", "⨣"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 489 "HTMLCharacterReference.gperf" + {"RightTriangle;", "⊳"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1935 "HTMLCharacterReference.gperf" + {"smashp;", "⨳"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 241 "HTMLCharacterReference.gperf" + {"InvisibleComma;", "⁣"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1973 "HTMLCharacterReference.gperf" + {"straightepsilon;", "ϵ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 480 "HTMLCharacterReference.gperf" + {"RightCeiling;", "⌉"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 561 "HTMLCharacterReference.gperf" + {"TildeEqual;", "≃"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 497 "HTMLCharacterReference.gperf" + {"RightVectorBar;", "⥓"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 563 "HTMLCharacterReference.gperf" + {"TildeTilde;", "≈"}, + {""}, + {""}, +#line 481 "HTMLCharacterReference.gperf" + {"RightDoubleBracket;", "⟧"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 112 "HTMLCharacterReference.gperf" + {"DiacriticalGrave;", "`"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 183 "HTMLCharacterReference.gperf" + {"FilledVerySmallSquare;", "▪"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1718 "HTMLCharacterReference.gperf" + {"planckh;", "ℎ"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1729 "HTMLCharacterReference.gperf" + {"plussim;", "⨦"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1723 "HTMLCharacterReference.gperf" + {"pluscir;", "⨢"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 166 "HTMLCharacterReference.gperf" + {"EmptyVerySmallSquare;", "▫"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1878 "HTMLCharacterReference.gperf" + {"ruluhar;", "⥨"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 594 "HTMLCharacterReference.gperf" + {"UpEquilibrium;", "⥮"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 504 "HTMLCharacterReference.gperf" + {"RuleDelayed;", "⧴"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 562 "HTMLCharacterReference.gperf" + {"TildeFullEqual;", "≅"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1839 "HTMLCharacterReference.gperf" + {"rightarrowtail;", "↣"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 491 "HTMLCharacterReference.gperf" + {"RightTriangleEqual;", "⊵"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1846 "HTMLCharacterReference.gperf" + {"rightthreetimes;", "⋌"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 490 "HTMLCharacterReference.gperf" + {"RightTriangleBar;", "⧐"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 2088 "HTMLCharacterReference.gperf" + {"trpezium;", "⏢"}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, + {""}, +#line 1819 "HTMLCharacterReference.gperf" + {"rdldhar;", "⥩"}}; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + unsigned int key = hash(str, len); + + if (key <= MAX_HASH_VALUE) + { + const char * s = wordlist[key].name; + + if (*str == *s && !strncmp(str + 1, s + 1, len - 1) && s[len] == '\0') + return &wordlist[key]; + } + } + return 0; +} +#line 2252 "HTMLCharacterReference.gperf" + +// NOLINTEND(google-runtime-int,hicpp-use-nullptr,modernize-use-nullptr) diff --git a/src/Functions/HTMLCharacterReference.gperf b/src/Functions/HTMLCharacterReference.gperf new file mode 100644 index 00000000000..4093688c608 --- /dev/null +++ b/src/Functions/HTMLCharacterReference.gperf @@ -0,0 +1,2253 @@ +%language=C++ +%define class-name HTMLCharacterHash +%define lookup-function-name Lookup +%readonly-tables +%includes +%compare-strncmp +%{ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wimplicit-fallthrough" +#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant" +#pragma GCC diagnostic ignored "-Wunused-macros" +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#pragma GCC diagnostic ignored "-Wshorten-64-to-32" +// NOLINTBEGIN(google-runtime-int,hicpp-use-nullptr,modernize-use-nullptr) +%} +struct NameAndGlyph { +const char *name; +const char *glyph; +}; +%% +"AElig", "Æ" +"AElig;", "Æ" +"AMP", "&" +"AMP;", "&" +"Aacute", "Á" +"Aacute;", "Á" +"Abreve;", "Ă" +"Acirc", "Â" +"Acirc;", "Â" +"Acy;", "А" +"Afr;", "𝔄" +"Agrave", "À" +"Agrave;", "À" +"Alpha;", "Α" +"Amacr;", "Ā" +"And;", "⩓" +"Aogon;", "Ą" +"Aopf;", "𝔸" +"ApplyFunction;", "⁡" +"Aring", "Å" +"Aring;", "Å" +"Ascr;", "𝒜" +"Assign;", "≔" +"Atilde", "Ã" +"Atilde;", "Ã" +"Auml", "Ä" +"Auml;", "Ä" +"Backslash;", "∖" +"Barv;", "⫧" +"Barwed;", "⌆" +"Bcy;", "Б" +"Because;", "∵" +"Bernoullis;", "ℬ" +"Beta;", "Β" +"Bfr;", "𝔅" +"Bopf;", "𝔹" +"Breve;", "˘" +"Bscr;", "ℬ" +"Bumpeq;", "≎" +"CHcy;", "Ч" +"COPY", "©" +"COPY;", "©" +"Cacute;", "Ć" +"Cap;", "⋒" +"CapitalDifferentialD;", "ⅅ" +"Cayleys;", "ℭ" +"Ccaron;", "Č" +"Ccedil", "Ç" +"Ccedil;", "Ç" +"Ccirc;", "Ĉ" +"Cconint;", "∰" +"Cdot;", "Ċ" +"Cedilla;", "¸" +"CenterDot;", "·" +"Cfr;", "ℭ" +"Chi;", "Χ" +"CircleDot;", "⊙" +"CircleMinus;", "⊖" +"CirclePlus;", "⊕" +"CircleTimes;", "⊗" +"ClockwiseContourIntegral;", "∲" +"CloseCurlyDoubleQuote;", "”" +"CloseCurlyQuote;", "’" +"Colon;", "∷" +"Colone;", "⩴" +"Congruent;", "≡" +"Conint;", "∯" +"ContourIntegral;", "∮" +"Copf;", "ℂ" +"Coproduct;", "∐" +"CounterClockwiseContourIntegral;", "∳" +"Cross;", "⨯" +"Cscr;", "𝒞" +"Cup;", "⋓" +"CupCap;", "≍" +"DD;", "ⅅ" +"DDotrahd;", "⤑" +"DJcy;", "Ђ" +"DScy;", "Ѕ" +"DZcy;", "Џ" +"Dagger;", "‡" +"Darr;", "↡" +"Dashv;", "⫤" +"Dcaron;", "Ď" +"Dcy;", "Д" +"Del;", "∇" +"Delta;", "Δ" +"Dfr;", "𝔇" +"DiacriticalAcute;", "´" +"DiacriticalDot;", "˙" +"DiacriticalDoubleAcute;", "˝" +"DiacriticalGrave;", "`" +"DiacriticalTilde;", "˜" +"Diamond;", "⋄" +"DifferentialD;", "ⅆ" +"Dopf;", "𝔻" +"Dot;", "¨" +"DotDot;", "⃜" +"DotEqual;", "≐" +"DoubleContourIntegral;", "∯" +"DoubleDot;", "¨" +"DoubleDownArrow;", "⇓" +"DoubleLeftArrow;", "⇐" +"DoubleLeftRightArrow;", "⇔" +"DoubleLeftTee;", "⫤" +"DoubleLongLeftArrow;", "⟸" +"DoubleLongLeftRightArrow;", "⟺" +"DoubleLongRightArrow;", "⟹" +"DoubleRightArrow;", "⇒" +"DoubleRightTee;", "⊨" +"DoubleUpArrow;", "⇑" +"DoubleUpDownArrow;", "⇕" +"DoubleVerticalBar;", "∥" +"DownArrow;", "↓" +"DownArrowBar;", "⤓" +"DownArrowUpArrow;", "⇵" +"DownBreve;", "̑" +"DownLeftRightVector;", "⥐" +"DownLeftTeeVector;", "⥞" +"DownLeftVector;", "↽" +"DownLeftVectorBar;", "⥖" +"DownRightTeeVector;", "⥟" +"DownRightVector;", "⇁" +"DownRightVectorBar;", "⥗" +"DownTee;", "⊤" +"DownTeeArrow;", "↧" +"Downarrow;", "⇓" +"Dscr;", "𝒟" +"Dstrok;", "Đ" +"ENG;", "Ŋ" +"ETH", "Ð" +"ETH;", "Ð" +"Eacute", "É" +"Eacute;", "É" +"Ecaron;", "Ě" +"Ecirc", "Ê" +"Ecirc;", "Ê" +"Ecy;", "Э" +"Edot;", "Ė" +"Efr;", "𝔈" +"Egrave", "È" +"Egrave;", "È" +"Element;", "∈" +"Emacr;", "Ē" +"EmptySmallSquare;", "◻" +"EmptyVerySmallSquare;", "▫" +"Eogon;", "Ę" +"Eopf;", "𝔼" +"Epsilon;", "Ε" +"Equal;", "⩵" +"EqualTilde;", "≂" +"Equilibrium;", "⇌" +"Escr;", "ℰ" +"Esim;", "⩳" +"Eta;", "Η" +"Euml", "Ë" +"Euml;", "Ë" +"Exists;", "∃" +"ExponentialE;", "ⅇ" +"Fcy;", "Ф" +"Ffr;", "𝔉" +"FilledSmallSquare;", "◼" +"FilledVerySmallSquare;", "▪" +"Fopf;", "𝔽" +"ForAll;", "∀" +"Fouriertrf;", "ℱ" +"Fscr;", "ℱ" +"GJcy;", "Ѓ" +"GT", ">" +"GT;", ">" +"Gamma;", "Γ" +"Gammad;", "Ϝ" +"Gbreve;", "Ğ" +"Gcedil;", "Ģ" +"Gcirc;", "Ĝ" +"Gcy;", "Г" +"Gdot;", "Ġ" +"Gfr;", "𝔊" +"Gg;", "⋙" +"Gopf;", "𝔾" +"GreaterEqual;", "≥" +"GreaterEqualLess;", "⋛" +"GreaterFullEqual;", "≧" +"GreaterGreater;", "⪢" +"GreaterLess;", "≷" +"GreaterSlantEqual;", "⩾" +"GreaterTilde;", "≳" +"Gscr;", "𝒢" +"Gt;", "≫" +"HARDcy;", "Ъ" +"Hacek;", "ˇ" +"Hat;", "^" +"Hcirc;", "Ĥ" +"Hfr;", "ℌ" +"HilbertSpace;", "ℋ" +"Hopf;", "ℍ" +"HorizontalLine;", "─" +"Hscr;", "ℋ" +"Hstrok;", "Ħ" +"HumpDownHump;", "≎" +"HumpEqual;", "≏" +"IEcy;", "Е" +"IJlig;", "IJ" +"IOcy;", "Ё" +"Iacute", "Í" +"Iacute;", "Í" +"Icirc", "Î" +"Icirc;", "Î" +"Icy;", "И" +"Idot;", "İ" +"Ifr;", "ℑ" +"Igrave", "Ì" +"Igrave;", "Ì" +"Im;", "ℑ" +"Imacr;", "Ī" +"ImaginaryI;", "ⅈ" +"Implies;", "⇒" +"Int;", "∬" +"Integral;", "∫" +"Intersection;", "⋂" +"InvisibleComma;", "⁣" +"InvisibleTimes;", "⁢" +"Iogon;", "Į" +"Iopf;", "𝕀" +"Iota;", "Ι" +"Iscr;", "ℐ" +"Itilde;", "Ĩ" +"Iukcy;", "І" +"Iuml", "Ï" +"Iuml;", "Ï" +"Jcirc;", "Ĵ" +"Jcy;", "Й" +"Jfr;", "𝔍" +"Jopf;", "𝕁" +"Jscr;", "𝒥" +"Jsercy;", "Ј" +"Jukcy;", "Є" +"KHcy;", "Х" +"KJcy;", "Ќ" +"Kappa;", "Κ" +"Kcedil;", "Ķ" +"Kcy;", "К" +"Kfr;", "𝔎" +"Kopf;", "𝕂" +"Kscr;", "𝒦" +"LJcy;", "Љ" +"LT", "<" +"LT;", "<" +"Lacute;", "Ĺ" +"Lambda;", "Λ" +"Lang;", "⟪" +"Laplacetrf;", "ℒ" +"Larr;", "↞" +"Lcaron;", "Ľ" +"Lcedil;", "Ļ" +"Lcy;", "Л" +"LeftAngleBracket;", "⟨" +"LeftArrow;", "←" +"LeftArrowBar;", "⇤" +"LeftArrowRightArrow;", "⇆" +"LeftCeiling;", "⌈" +"LeftDoubleBracket;", "⟦" +"LeftDownTeeVector;", "⥡" +"LeftDownVector;", "⇃" +"LeftDownVectorBar;", "⥙" +"LeftFloor;", "⌊" +"LeftRightArrow;", "↔" +"LeftRightVector;", "⥎" +"LeftTee;", "⊣" +"LeftTeeArrow;", "↤" +"LeftTeeVector;", "⥚" +"LeftTriangle;", "⊲" +"LeftTriangleBar;", "⧏" +"LeftTriangleEqual;", "⊴" +"LeftUpDownVector;", "⥑" +"LeftUpTeeVector;", "⥠" +"LeftUpVector;", "↿" +"LeftUpVectorBar;", "⥘" +"LeftVector;", "↼" +"LeftVectorBar;", "⥒" +"Leftarrow;", "⇐" +"Leftrightarrow;", "⇔" +"LessEqualGreater;", "⋚" +"LessFullEqual;", "≦" +"LessGreater;", "≶" +"LessLess;", "⪡" +"LessSlantEqual;", "⩽" +"LessTilde;", "≲" +"Lfr;", "𝔏" +"Ll;", "⋘" +"Lleftarrow;", "⇚" +"Lmidot;", "Ŀ" +"LongLeftArrow;", "⟵" +"LongLeftRightArrow;", "⟷" +"LongRightArrow;", "⟶" +"Longleftarrow;", "⟸" +"Longleftrightarrow;", "⟺" +"Longrightarrow;", "⟹" +"Lopf;", "𝕃" +"LowerLeftArrow;", "↙" +"LowerRightArrow;", "↘" +"Lscr;", "ℒ" +"Lsh;", "↰" +"Lstrok;", "Ł" +"Lt;", "≪" +"Map;", "⤅" +"Mcy;", "М" +"MediumSpace;", " " +"Mellintrf;", "ℳ" +"Mfr;", "𝔐" +"MinusPlus;", "∓" +"Mopf;", "𝕄" +"Mscr;", "ℳ" +"Mu;", "Μ" +"NJcy;", "Њ" +"Nacute;", "Ń" +"Ncaron;", "Ň" +"Ncedil;", "Ņ" +"Ncy;", "Н" +"NegativeMediumSpace;", "​" +"NegativeThickSpace;", "​" +"NegativeThinSpace;", "​" +"NegativeVeryThinSpace;", "​" +"NestedGreaterGreater;", "≫" +"NestedLessLess;", "≪" +"NewLine;", "\n" +"Nfr;", "𝔑" +"NoBreak;", "⁠" +"NonBreakingSpace;", " " +"Nopf;", "ℕ" +"Not;", "⫬" +"NotCongruent;", "≢" +"NotCupCap;", "≭" +"NotDoubleVerticalBar;", "∦" +"NotElement;", "∉" +"NotEqual;", "≠" +"NotEqualTilde;", "≂̸" +"NotExists;", "∄" +"NotGreater;", "≯" +"NotGreaterEqual;", "≱" +"NotGreaterFullEqual;", "≧̸" +"NotGreaterGreater;", "≫̸" +"NotGreaterLess;", "≹" +"NotGreaterSlantEqual;", "⩾̸" +"NotGreaterTilde;", "≵" +"NotHumpDownHump;", "≎̸" +"NotHumpEqual;", "≏̸" +"NotLeftTriangle;", "⋪" +"NotLeftTriangleBar;", "⧏̸" +"NotLeftTriangleEqual;", "⋬" +"NotLess;", "≮" +"NotLessEqual;", "≰" +"NotLessGreater;", "≸" +"NotLessLess;", "≪̸" +"NotLessSlantEqual;", "⩽̸" +"NotLessTilde;", "≴" +"NotNestedGreaterGreater;", "⪢̸" +"NotNestedLessLess;", "⪡̸" +"NotPrecedes;", "⊀" +"NotPrecedesEqual;", "⪯̸" +"NotPrecedesSlantEqual;", "⋠" +"NotReverseElement;", "∌" +"NotRightTriangle;", "⋫" +"NotRightTriangleBar;", "⧐̸" +"NotRightTriangleEqual;", "⋭" +"NotSquareSubset;", "⊏̸" +"NotSquareSubsetEqual;", "⋢" +"NotSquareSuperset;", "⊐̸" +"NotSquareSupersetEqual;", "⋣" +"NotSubset;", "⊂⃒" +"NotSubsetEqual;", "⊈" +"NotSucceeds;", "⊁" +"NotSucceedsEqual;", "⪰̸" +"NotSucceedsSlantEqual;", "⋡" +"NotSucceedsTilde;", "≿̸" +"NotSuperset;", "⊃⃒" +"NotSupersetEqual;", "⊉" +"NotTilde;", "≁" +"NotTildeEqual;", "≄" +"NotTildeFullEqual;", "≇" +"NotTildeTilde;", "≉" +"NotVerticalBar;", "∤" +"Nscr;", "𝒩" +"Ntilde", "Ñ" +"Ntilde;", "Ñ" +"Nu;", "Ν" +"OElig;", "Œ" +"Oacute", "Ó" +"Oacute;", "Ó" +"Ocirc", "Ô" +"Ocirc;", "Ô" +"Ocy;", "О" +"Odblac;", "Ő" +"Ofr;", "𝔒" +"Ograve", "Ò" +"Ograve;", "Ò" +"Omacr;", "Ō" +"Omega;", "Ω" +"Omicron;", "Ο" +"Oopf;", "𝕆" +"OpenCurlyDoubleQuote;", "“" +"OpenCurlyQuote;", "‘" +"Or;", "⩔" +"Oscr;", "𝒪" +"Oslash", "Ø" +"Oslash;", "Ø" +"Otilde", "Õ" +"Otilde;", "Õ" +"Otimes;", "⨷" +"Ouml", "Ö" +"Ouml;", "Ö" +"OverBar;", "‾" +"OverBrace;", "⏞" +"OverBracket;", "⎴" +"OverParenthesis;", "⏜" +"PartialD;", "∂" +"Pcy;", "П" +"Pfr;", "𝔓" +"Phi;", "Φ" +"Pi;", "Π" +"PlusMinus;", "±" +"Poincareplane;", "ℌ" +"Popf;", "ℙ" +"Pr;", "⪻" +"Precedes;", "≺" +"PrecedesEqual;", "⪯" +"PrecedesSlantEqual;", "≼" +"PrecedesTilde;", "≾" +"Prime;", "″" +"Product;", "∏" +"Proportion;", "∷" +"Proportional;", "∝" +"Pscr;", "𝒫" +"Psi;", "Ψ" +"QUOT", "\"" +"QUOT;", "\"" +"Qfr;", "𝔔" +"Qopf;", "ℚ" +"Qscr;", "𝒬" +"RBarr;", "⤐" +"REG", "®" +"REG;", "®" +"Racute;", "Ŕ" +"Rang;", "⟫" +"Rarr;", "↠" +"Rarrtl;", "⤖" +"Rcaron;", "Ř" +"Rcedil;", "Ŗ" +"Rcy;", "Р" +"Re;", "ℜ" +"ReverseElement;", "∋" +"ReverseEquilibrium;", "⇋" +"ReverseUpEquilibrium;", "⥯" +"Rfr;", "ℜ" +"Rho;", "Ρ" +"RightAngleBracket;", "⟩" +"RightArrow;", "→" +"RightArrowBar;", "⇥" +"RightArrowLeftArrow;", "⇄" +"RightCeiling;", "⌉" +"RightDoubleBracket;", "⟧" +"RightDownTeeVector;", "⥝" +"RightDownVector;", "⇂" +"RightDownVectorBar;", "⥕" +"RightFloor;", "⌋" +"RightTee;", "⊢" +"RightTeeArrow;", "↦" +"RightTeeVector;", "⥛" +"RightTriangle;", "⊳" +"RightTriangleBar;", "⧐" +"RightTriangleEqual;", "⊵" +"RightUpDownVector;", "⥏" +"RightUpTeeVector;", "⥜" +"RightUpVector;", "↾" +"RightUpVectorBar;", "⥔" +"RightVector;", "⇀" +"RightVectorBar;", "⥓" +"Rightarrow;", "⇒" +"Ropf;", "ℝ" +"RoundImplies;", "⥰" +"Rrightarrow;", "⇛" +"Rscr;", "ℛ" +"Rsh;", "↱" +"RuleDelayed;", "⧴" +"SHCHcy;", "Щ" +"SHcy;", "Ш" +"SOFTcy;", "Ь" +"Sacute;", "Ś" +"Sc;", "⪼" +"Scaron;", "Š" +"Scedil;", "Ş" +"Scirc;", "Ŝ" +"Scy;", "С" +"Sfr;", "𝔖" +"ShortDownArrow;", "↓" +"ShortLeftArrow;", "←" +"ShortRightArrow;", "→" +"ShortUpArrow;", "↑" +"Sigma;", "Σ" +"SmallCircle;", "∘" +"Sopf;", "𝕊" +"Sqrt;", "√" +"Square;", "□" +"SquareIntersection;", "⊓" +"SquareSubset;", "⊏" +"SquareSubsetEqual;", "⊑" +"SquareSuperset;", "⊐" +"SquareSupersetEqual;", "⊒" +"SquareUnion;", "⊔" +"Sscr;", "𝒮" +"Star;", "⋆" +"Sub;", "⋐" +"Subset;", "⋐" +"SubsetEqual;", "⊆" +"Succeeds;", "≻" +"SucceedsEqual;", "⪰" +"SucceedsSlantEqual;", "≽" +"SucceedsTilde;", "≿" +"SuchThat;", "∋" +"Sum;", "∑" +"Sup;", "⋑" +"Superset;", "⊃" +"SupersetEqual;", "⊇" +"Supset;", "⋑" +"THORN", "Þ" +"THORN;", "Þ" +"TRADE;", "™" +"TSHcy;", "Ћ" +"TScy;", "Ц" +"Tab;", "\t" +"Tau;", "Τ" +"Tcaron;", "Ť" +"Tcedil;", "Ţ" +"Tcy;", "Т" +"Tfr;", "𝔗" +"Therefore;", "∴" +"Theta;", "Θ" +"ThickSpace;", "  " +"ThinSpace;", " " +"Tilde;", "∼" +"TildeEqual;", "≃" +"TildeFullEqual;", "≅" +"TildeTilde;", "≈" +"Topf;", "𝕋" +"TripleDot;", "⃛" +"Tscr;", "𝒯" +"Tstrok;", "Ŧ" +"Uacute", "Ú" +"Uacute;", "Ú" +"Uarr;", "↟" +"Uarrocir;", "⥉" +"Ubrcy;", "Ў" +"Ubreve;", "Ŭ" +"Ucirc", "Û" +"Ucirc;", "Û" +"Ucy;", "У" +"Udblac;", "Ű" +"Ufr;", "𝔘" +"Ugrave", "Ù" +"Ugrave;", "Ù" +"Umacr;", "Ū" +"UnderBar;", "_" +"UnderBrace;", "⏟" +"UnderBracket;", "⎵" +"UnderParenthesis;", "⏝" +"Union;", "⋃" +"UnionPlus;", "⊎" +"Uogon;", "Ų" +"Uopf;", "𝕌" +"UpArrow;", "↑" +"UpArrowBar;", "⤒" +"UpArrowDownArrow;", "⇅" +"UpDownArrow;", "↕" +"UpEquilibrium;", "⥮" +"UpTee;", "⊥" +"UpTeeArrow;", "↥" +"Uparrow;", "⇑" +"Updownarrow;", "⇕" +"UpperLeftArrow;", "↖" +"UpperRightArrow;", "↗" +"Upsi;", "ϒ" +"Upsilon;", "Υ" +"Uring;", "Ů" +"Uscr;", "𝒰" +"Utilde;", "Ũ" +"Uuml", "Ü" +"Uuml;", "Ü" +"VDash;", "⊫" +"Vbar;", "⫫" +"Vcy;", "В" +"Vdash;", "⊩" +"Vdashl;", "⫦" +"Vee;", "⋁" +"Verbar;", "‖" +"Vert;", "‖" +"VerticalBar;", "∣" +"VerticalLine;", "|" +"VerticalSeparator;", "❘" +"VerticalTilde;", "≀" +"VeryThinSpace;", " " +"Vfr;", "𝔙" +"Vopf;", "𝕍" +"Vscr;", "𝒱" +"Vvdash;", "⊪" +"Wcirc;", "Ŵ" +"Wedge;", "⋀" +"Wfr;", "𝔚" +"Wopf;", "𝕎" +"Wscr;", "𝒲" +"Xfr;", "𝔛" +"Xi;", "Ξ" +"Xopf;", "𝕏" +"Xscr;", "𝒳" +"YAcy;", "Я" +"YIcy;", "Ї" +"YUcy;", "Ю" +"Yacute", "Ý" +"Yacute;", "Ý" +"Ycirc;", "Ŷ" +"Ycy;", "Ы" +"Yfr;", "𝔜" +"Yopf;", "𝕐" +"Yscr;", "𝒴" +"Yuml;", "Ÿ" +"ZHcy;", "Ж" +"Zacute;", "Ź" +"Zcaron;", "Ž" +"Zcy;", "З" +"Zdot;", "Ż" +"ZeroWidthSpace;", "​" +"Zeta;", "Ζ" +"Zfr;", "ℨ" +"Zopf;", "ℤ" +"Zscr;", "𝒵" +"aacute", "á" +"aacute;", "á" +"abreve;", "ă" +"ac;", "∾" +"acE;", "∾̳" +"acd;", "∿" +"acirc", "â" +"acirc;", "â" +"acute", "´" +"acute;", "´" +"acy;", "а" +"aelig", "æ" +"aelig;", "æ" +"af;", "⁡" +"afr;", "𝔞" +"agrave", "à" +"agrave;", "à" +"alefsym;", "ℵ" +"aleph;", "ℵ" +"alpha;", "α" +"amacr;", "ā" +"amalg;", "⨿" +"amp", "&" +"amp;", "&" +"and;", "∧" +"andand;", "⩕" +"andd;", "⩜" +"andslope;", "⩘" +"andv;", "⩚" +"ang;", "∠" +"ange;", "⦤" +"angle;", "∠" +"angmsd;", "∡" +"angmsdaa;", "⦨" +"angmsdab;", "⦩" +"angmsdac;", "⦪" +"angmsdad;", "⦫" +"angmsdae;", "⦬" +"angmsdaf;", "⦭" +"angmsdag;", "⦮" +"angmsdah;", "⦯" +"angrt;", "∟" +"angrtvb;", "⊾" +"angrtvbd;", "⦝" +"angsph;", "∢" +"angst;", "Å" +"angzarr;", "⍼" +"aogon;", "ą" +"aopf;", "𝕒" +"ap;", "≈" +"apE;", "⩰" +"apacir;", "⩯" +"ape;", "≊" +"apid;", "≋" +"apos;", "'" +"approx;", "≈" +"approxeq;", "≊" +"aring", "å" +"aring;", "å" +"ascr;", "𝒶" +"ast;", "*" +"asymp;", "≈" +"asympeq;", "≍" +"atilde", "ã" +"atilde;", "ã" +"auml", "ä" +"auml;", "ä" +"awconint;", "∳" +"awint;", "⨑" +"bNot;", "⫭" +"backcong;", "≌" +"backepsilon;", "϶" +"backprime;", "‵" +"backsim;", "∽" +"backsimeq;", "⋍" +"barvee;", "⊽" +"barwed;", "⌅" +"barwedge;", "⌅" +"bbrk;", "⎵" +"bbrktbrk;", "⎶" +"bcong;", "≌" +"bcy;", "б" +"bdquo;", "„" +"becaus;", "∵" +"because;", "∵" +"bemptyv;", "⦰" +"bepsi;", "϶" +"bernou;", "ℬ" +"beta;", "β" +"beth;", "ℶ" +"between;", "≬" +"bfr;", "𝔟" +"bigcap;", "⋂" +"bigcirc;", "◯" +"bigcup;", "⋃" +"bigodot;", "⨀" +"bigoplus;", "⨁" +"bigotimes;", "⨂" +"bigsqcup;", "⨆" +"bigstar;", "★" +"bigtriangledown;", "▽" +"bigtriangleup;", "△" +"biguplus;", "⨄" +"bigvee;", "⋁" +"bigwedge;", "⋀" +"bkarow;", "⤍" +"blacklozenge;", "⧫" +"blacksquare;", "▪" +"blacktriangle;", "▴" +"blacktriangledown;", "▾" +"blacktriangleleft;", "◂" +"blacktriangleright;", "▸" +"blank;", "␣" +"blk12;", "▒" +"blk14;", "░" +"blk34;", "▓" +"block;", "█" +"bne;", "=⃥" +"bnequiv;", "≡⃥" +"bnot;", "⌐" +"bopf;", "𝕓" +"bot;", "⊥" +"bottom;", "⊥" +"bowtie;", "⋈" +"boxDL;", "╗" +"boxDR;", "╔" +"boxDl;", "╖" +"boxDr;", "╓" +"boxH;", "═" +"boxHD;", "╦" +"boxHU;", "╩" +"boxHd;", "╤" +"boxHu;", "╧" +"boxUL;", "╝" +"boxUR;", "╚" +"boxUl;", "╜" +"boxUr;", "╙" +"boxV;", "║" +"boxVH;", "╬" +"boxVL;", "╣" +"boxVR;", "╠" +"boxVh;", "╫" +"boxVl;", "╢" +"boxVr;", "╟" +"boxbox;", "⧉" +"boxdL;", "╕" +"boxdR;", "╒" +"boxdl;", "┐" +"boxdr;", "┌" +"boxh;", "─" +"boxhD;", "╥" +"boxhU;", "╨" +"boxhd;", "┬" +"boxhu;", "┴" +"boxminus;", "⊟" +"boxplus;", "⊞" +"boxtimes;", "⊠" +"boxuL;", "╛" +"boxuR;", "╘" +"boxul;", "┘" +"boxur;", "└" +"boxv;", "│" +"boxvH;", "╪" +"boxvL;", "╡" +"boxvR;", "╞" +"boxvh;", "┼" +"boxvl;", "┤" +"boxvr;", "├" +"bprime;", "‵" +"breve;", "˘" +"brvbar", "¦" +"brvbar;", "¦" +"bscr;", "𝒷" +"bsemi;", "⁏" +"bsim;", "∽" +"bsime;", "⋍" +"bsol;", "\\" +"bsolb;", "⧅" +"bsolhsub;", "⟈" +"bull;", "•" +"bullet;", "•" +"bump;", "≎" +"bumpE;", "⪮" +"bumpe;", "≏" +"bumpeq;", "≏" +"cacute;", "ć" +"cap;", "∩" +"capand;", "⩄" +"capbrcup;", "⩉" +"capcap;", "⩋" +"capcup;", "⩇" +"capdot;", "⩀" +"caps;", "∩︀" +"caret;", "⁁" +"caron;", "ˇ" +"ccaps;", "⩍" +"ccaron;", "č" +"ccedil", "ç" +"ccedil;", "ç" +"ccirc;", "ĉ" +"ccups;", "⩌" +"ccupssm;", "⩐" +"cdot;", "ċ" +"cedil", "¸" +"cedil;", "¸" +"cemptyv;", "⦲" +"cent", "¢" +"cent;", "¢" +"centerdot;", "·" +"cfr;", "𝔠" +"chcy;", "ч" +"check;", "✓" +"checkmark;", "✓" +"chi;", "χ" +"cir;", "○" +"cirE;", "⧃" +"circ;", "ˆ" +"circeq;", "≗" +"circlearrowleft;", "↺" +"circlearrowright;", "↻" +"circledR;", "®" +"circledS;", "Ⓢ" +"circledast;", "⊛" +"circledcirc;", "⊚" +"circleddash;", "⊝" +"cire;", "≗" +"cirfnint;", "⨐" +"cirmid;", "⫯" +"cirscir;", "⧂" +"clubs;", "♣" +"clubsuit;", "♣" +"colon;", ":" +"colone;", "≔" +"coloneq;", "≔" +"comma;", "," +"commat;", "@" +"comp;", "∁" +"compfn;", "∘" +"complement;", "∁" +"complexes;", "ℂ" +"cong;", "≅" +"congdot;", "⩭" +"conint;", "∮" +"copf;", "𝕔" +"coprod;", "∐" +"copy", "©" +"copy;", "©" +"copysr;", "℗" +"crarr;", "↵" +"cross;", "✗" +"cscr;", "𝒸" +"csub;", "⫏" +"csube;", "⫑" +"csup;", "⫐" +"csupe;", "⫒" +"ctdot;", "⋯" +"cudarrl;", "⤸" +"cudarrr;", "⤵" +"cuepr;", "⋞" +"cuesc;", "⋟" +"cularr;", "↶" +"cularrp;", "⤽" +"cup;", "∪" +"cupbrcap;", "⩈" +"cupcap;", "⩆" +"cupcup;", "⩊" +"cupdot;", "⊍" +"cupor;", "⩅" +"cups;", "∪︀" +"curarr;", "↷" +"curarrm;", "⤼" +"curlyeqprec;", "⋞" +"curlyeqsucc;", "⋟" +"curlyvee;", "⋎" +"curlywedge;", "⋏" +"curren", "¤" +"curren;", "¤" +"curvearrowleft;", "↶" +"curvearrowright;", "↷" +"cuvee;", "⋎" +"cuwed;", "⋏" +"cwconint;", "∲" +"cwint;", "∱" +"cylcty;", "⌭" +"dArr;", "⇓" +"dHar;", "⥥" +"dagger;", "†" +"daleth;", "ℸ" +"darr;", "↓" +"dash;", "‐" +"dashv;", "⊣" +"dbkarow;", "⤏" +"dblac;", "˝" +"dcaron;", "ď" +"dcy;", "д" +"dd;", "ⅆ" +"ddagger;", "‡" +"ddarr;", "⇊" +"ddotseq;", "⩷" +"deg", "°" +"deg;", "°" +"delta;", "δ" +"demptyv;", "⦱" +"dfisht;", "⥿" +"dfr;", "𝔡" +"dharl;", "⇃" +"dharr;", "⇂" +"diam;", "⋄" +"diamond;", "⋄" +"diamondsuit;", "♦" +"diams;", "♦" +"die;", "¨" +"digamma;", "ϝ" +"disin;", "⋲" +"div;", "÷" +"divide", "÷" +"divide;", "÷" +"divideontimes;", "⋇" +"divonx;", "⋇" +"djcy;", "ђ" +"dlcorn;", "⌞" +"dlcrop;", "⌍" +"dollar;", "$" +"dopf;", "𝕕" +"dot;", "˙" +"doteq;", "≐" +"doteqdot;", "≑" +"dotminus;", "∸" +"dotplus;", "∔" +"dotsquare;", "⊡" +"doublebarwedge;", "⌆" +"downarrow;", "↓" +"downdownarrows;", "⇊" +"downharpoonleft;", "⇃" +"downharpoonright;", "⇂" +"drbkarow;", "⤐" +"drcorn;", "⌟" +"drcrop;", "⌌" +"dscr;", "𝒹" +"dscy;", "ѕ" +"dsol;", "⧶" +"dstrok;", "đ" +"dtdot;", "⋱" +"dtri;", "▿" +"dtrif;", "▾" +"duarr;", "⇵" +"duhar;", "⥯" +"dwangle;", "⦦" +"dzcy;", "џ" +"dzigrarr;", "⟿" +"eDDot;", "⩷" +"eDot;", "≑" +"eacute", "é" +"eacute;", "é" +"easter;", "⩮" +"ecaron;", "ě" +"ecir;", "≖" +"ecirc", "ê" +"ecirc;", "ê" +"ecolon;", "≕" +"ecy;", "э" +"edot;", "ė" +"ee;", "ⅇ" +"efDot;", "≒" +"efr;", "𝔢" +"eg;", "⪚" +"egrave", "è" +"egrave;", "è" +"egs;", "⪖" +"egsdot;", "⪘" +"el;", "⪙" +"elinters;", "⏧" +"ell;", "ℓ" +"els;", "⪕" +"elsdot;", "⪗" +"emacr;", "ē" +"empty;", "∅" +"emptyset;", "∅" +"emptyv;", "∅" +"emsp13;", " " +"emsp14;", " " +"emsp;", " " +"eng;", "ŋ" +"ensp;", " " +"eogon;", "ę" +"eopf;", "𝕖" +"epar;", "⋕" +"eparsl;", "⧣" +"eplus;", "⩱" +"epsi;", "ε" +"epsilon;", "ε" +"epsiv;", "ϵ" +"eqcirc;", "≖" +"eqcolon;", "≕" +"eqsim;", "≂" +"eqslantgtr;", "⪖" +"eqslantless;", "⪕" +"equals;", "=" +"equest;", "≟" +"equiv;", "≡" +"equivDD;", "⩸" +"eqvparsl;", "⧥" +"erDot;", "≓" +"erarr;", "⥱" +"escr;", "ℯ" +"esdot;", "≐" +"esim;", "≂" +"eta;", "η" +"eth", "ð" +"eth;", "ð" +"euml", "ë" +"euml;", "ë" +"euro;", "€" +"excl;", "!" +"exist;", "∃" +"expectation;", "ℰ" +"exponentiale;", "ⅇ" +"fallingdotseq;", "≒" +"fcy;", "ф" +"female;", "♀" +"ffilig;", "ffi" +"fflig;", "ff" +"ffllig;", "ffl" +"ffr;", "𝔣" +"filig;", "fi" +"fjlig;", "fj" +"flat;", "♭" +"fllig;", "fl" +"fltns;", "▱" +"fnof;", "ƒ" +"fopf;", "𝕗" +"forall;", "∀" +"fork;", "⋔" +"forkv;", "⫙" +"fpartint;", "⨍" +"frac12", "½" +"frac12;", "½" +"frac13;", "⅓" +"frac14", "¼" +"frac14;", "¼" +"frac15;", "⅕" +"frac16;", "⅙" +"frac18;", "⅛" +"frac23;", "⅔" +"frac25;", "⅖" +"frac34", "¾" +"frac34;", "¾" +"frac35;", "⅗" +"frac38;", "⅜" +"frac45;", "⅘" +"frac56;", "⅚" +"frac58;", "⅝" +"frac78;", "⅞" +"frasl;", "⁄" +"frown;", "⌢" +"fscr;", "𝒻" +"gE;", "≧" +"gEl;", "⪌" +"gacute;", "ǵ" +"gamma;", "γ" +"gammad;", "ϝ" +"gap;", "⪆" +"gbreve;", "ğ" +"gcirc;", "ĝ" +"gcy;", "г" +"gdot;", "ġ" +"ge;", "≥" +"gel;", "⋛" +"geq;", "≥" +"geqq;", "≧" +"geqslant;", "⩾" +"ges;", "⩾" +"gescc;", "⪩" +"gesdot;", "⪀" +"gesdoto;", "⪂" +"gesdotol;", "⪄" +"gesl;", "⋛︀" +"gesles;", "⪔" +"gfr;", "𝔤" +"gg;", "≫" +"ggg;", "⋙" +"gimel;", "ℷ" +"gjcy;", "ѓ" +"gl;", "≷" +"glE;", "⪒" +"gla;", "⪥" +"glj;", "⪤" +"gnE;", "≩" +"gnap;", "⪊" +"gnapprox;", "⪊" +"gne;", "⪈" +"gneq;", "⪈" +"gneqq;", "≩" +"gnsim;", "⋧" +"gopf;", "𝕘" +"grave;", "`" +"gscr;", "ℊ" +"gsim;", "≳" +"gsime;", "⪎" +"gsiml;", "⪐" +"gt", ">" +"gt;", ">" +"gtcc;", "⪧" +"gtcir;", "⩺" +"gtdot;", "⋗" +"gtlPar;", "⦕" +"gtquest;", "⩼" +"gtrapprox;", "⪆" +"gtrarr;", "⥸" +"gtrdot;", "⋗" +"gtreqless;", "⋛" +"gtreqqless;", "⪌" +"gtrless;", "≷" +"gtrsim;", "≳" +"gvertneqq;", "≩︀" +"gvnE;", "≩︀" +"hArr;", "⇔" +"hairsp;", " " +"half;", "½" +"hamilt;", "ℋ" +"hardcy;", "ъ" +"harr;", "↔" +"harrcir;", "⥈" +"harrw;", "↭" +"hbar;", "ℏ" +"hcirc;", "ĥ" +"hearts;", "♥" +"heartsuit;", "♥" +"hellip;", "…" +"hercon;", "⊹" +"hfr;", "𝔥" +"hksearow;", "⤥" +"hkswarow;", "⤦" +"hoarr;", "⇿" +"homtht;", "∻" +"hookleftarrow;", "↩" +"hookrightarrow;", "↪" +"hopf;", "𝕙" +"horbar;", "―" +"hscr;", "𝒽" +"hslash;", "ℏ" +"hstrok;", "ħ" +"hybull;", "⁃" +"hyphen;", "‐" +"iacute", "í" +"iacute;", "í" +"ic;", "⁣" +"icirc", "î" +"icirc;", "î" +"icy;", "и" +"iecy;", "е" +"iexcl", "¡" +"iexcl;", "¡" +"iff;", "⇔" +"ifr;", "𝔦" +"igrave", "ì" +"igrave;", "ì" +"ii;", "ⅈ" +"iiiint;", "⨌" +"iiint;", "∭" +"iinfin;", "⧜" +"iiota;", "℩" +"ijlig;", "ij" +"imacr;", "ī" +"image;", "ℑ" +"imagline;", "ℐ" +"imagpart;", "ℑ" +"imath;", "ı" +"imof;", "⊷" +"imped;", "Ƶ" +"in;", "∈" +"incare;", "℅" +"infin;", "∞" +"infintie;", "⧝" +"inodot;", "ı" +"int;", "∫" +"intcal;", "⊺" +"integers;", "ℤ" +"intercal;", "⊺" +"intlarhk;", "⨗" +"intprod;", "⨼" +"iocy;", "ё" +"iogon;", "į" +"iopf;", "𝕚" +"iota;", "ι" +"iprod;", "⨼" +"iquest", "¿" +"iquest;", "¿" +"iscr;", "𝒾" +"isin;", "∈" +"isinE;", "⋹" +"isindot;", "⋵" +"isins;", "⋴" +"isinsv;", "⋳" +"isinv;", "∈" +"it;", "⁢" +"itilde;", "ĩ" +"iukcy;", "і" +"iuml", "ï" +"iuml;", "ï" +"jcirc;", "ĵ" +"jcy;", "й" +"jfr;", "𝔧" +"jmath;", "ȷ" +"jopf;", "𝕛" +"jscr;", "𝒿" +"jsercy;", "ј" +"jukcy;", "є" +"kappa;", "κ" +"kappav;", "ϰ" +"kcedil;", "ķ" +"kcy;", "к" +"kfr;", "𝔨" +"kgreen;", "ĸ" +"khcy;", "х" +"kjcy;", "ќ" +"kopf;", "𝕜" +"kscr;", "𝓀" +"lAarr;", "⇚" +"lArr;", "⇐" +"lAtail;", "⤛" +"lBarr;", "⤎" +"lE;", "≦" +"lEg;", "⪋" +"lHar;", "⥢" +"lacute;", "ĺ" +"laemptyv;", "⦴" +"lagran;", "ℒ" +"lambda;", "λ" +"lang;", "⟨" +"langd;", "⦑" +"langle;", "⟨" +"lap;", "⪅" +"laquo", "«" +"laquo;", "«" +"larr;", "←" +"larrb;", "⇤" +"larrbfs;", "⤟" +"larrfs;", "⤝" +"larrhk;", "↩" +"larrlp;", "↫" +"larrpl;", "⤹" +"larrsim;", "⥳" +"larrtl;", "↢" +"lat;", "⪫" +"latail;", "⤙" +"late;", "⪭" +"lates;", "⪭︀" +"lbarr;", "⤌" +"lbbrk;", "❲" +"lbrace;", "{" +"lbrack;", "[" +"lbrke;", "⦋" +"lbrksld;", "⦏" +"lbrkslu;", "⦍" +"lcaron;", "ľ" +"lcedil;", "ļ" +"lceil;", "⌈" +"lcub;", "{" +"lcy;", "л" +"ldca;", "⤶" +"ldquo;", "“" +"ldquor;", "„" +"ldrdhar;", "⥧" +"ldrushar;", "⥋" +"ldsh;", "↲" +"le;", "≤" +"leftarrow;", "←" +"leftarrowtail;", "↢" +"leftharpoondown;", "↽" +"leftharpoonup;", "↼" +"leftleftarrows;", "⇇" +"leftrightarrow;", "↔" +"leftrightarrows;", "⇆" +"leftrightharpoons;", "⇋" +"leftrightsquigarrow;", "↭" +"leftthreetimes;", "⋋" +"leg;", "⋚" +"leq;", "≤" +"leqq;", "≦" +"leqslant;", "⩽" +"les;", "⩽" +"lescc;", "⪨" +"lesdot;", "⩿" +"lesdoto;", "⪁" +"lesdotor;", "⪃" +"lesg;", "⋚︀" +"lesges;", "⪓" +"lessapprox;", "⪅" +"lessdot;", "⋖" +"lesseqgtr;", "⋚" +"lesseqqgtr;", "⪋" +"lessgtr;", "≶" +"lesssim;", "≲" +"lfisht;", "⥼" +"lfloor;", "⌊" +"lfr;", "𝔩" +"lg;", "≶" +"lgE;", "⪑" +"lhard;", "↽" +"lharu;", "↼" +"lharul;", "⥪" +"lhblk;", "▄" +"ljcy;", "љ" +"ll;", "≪" +"llarr;", "⇇" +"llcorner;", "⌞" +"llhard;", "⥫" +"lltri;", "◺" +"lmidot;", "ŀ" +"lmoust;", "⎰" +"lmoustache;", "⎰" +"lnE;", "≨" +"lnap;", "⪉" +"lnapprox;", "⪉" +"lne;", "⪇" +"lneq;", "⪇" +"lneqq;", "≨" +"lnsim;", "⋦" +"loang;", "⟬" +"loarr;", "⇽" +"lobrk;", "⟦" +"longleftarrow;", "⟵" +"longleftrightarrow;", "⟷" +"longmapsto;", "⟼" +"longrightarrow;", "⟶" +"looparrowleft;", "↫" +"looparrowright;", "↬" +"lopar;", "⦅" +"lopf;", "𝕝" +"loplus;", "⨭" +"lotimes;", "⨴" +"lowast;", "∗" +"lowbar;", "_" +"loz;", "◊" +"lozenge;", "◊" +"lozf;", "⧫" +"lpar;", "(" +"lparlt;", "⦓" +"lrarr;", "⇆" +"lrcorner;", "⌟" +"lrhar;", "⇋" +"lrhard;", "⥭" +"lrm;", "‎" +"lrtri;", "⊿" +"lsaquo;", "‹" +"lscr;", "𝓁" +"lsh;", "↰" +"lsim;", "≲" +"lsime;", "⪍" +"lsimg;", "⪏" +"lsqb;", "[" +"lsquo;", "‘" +"lsquor;", "‚" +"lstrok;", "ł" +"lt", "<" +"lt;", "<" +"ltcc;", "⪦" +"ltcir;", "⩹" +"ltdot;", "⋖" +"lthree;", "⋋" +"ltimes;", "⋉" +"ltlarr;", "⥶" +"ltquest;", "⩻" +"ltrPar;", "⦖" +"ltri;", "◃" +"ltrie;", "⊴" +"ltrif;", "◂" +"lurdshar;", "⥊" +"luruhar;", "⥦" +"lvertneqq;", "≨︀" +"lvnE;", "≨︀" +"mDDot;", "∺" +"macr", "¯" +"macr;", "¯" +"male;", "♂" +"malt;", "✠" +"maltese;", "✠" +"map;", "↦" +"mapsto;", "↦" +"mapstodown;", "↧" +"mapstoleft;", "↤" +"mapstoup;", "↥" +"marker;", "▮" +"mcomma;", "⨩" +"mcy;", "м" +"mdash;", "—" +"measuredangle;", "∡" +"mfr;", "𝔪" +"mho;", "℧" +"micro", "µ" +"micro;", "µ" +"mid;", "∣" +"midast;", "*" +"midcir;", "⫰" +"middot", "·" +"middot;", "·" +"minus;", "−" +"minusb;", "⊟" +"minusd;", "∸" +"minusdu;", "⨪" +"mlcp;", "⫛" +"mldr;", "…" +"mnplus;", "∓" +"models;", "⊧" +"mopf;", "𝕞" +"mp;", "∓" +"mscr;", "𝓂" +"mstpos;", "∾" +"mu;", "μ" +"multimap;", "⊸" +"mumap;", "⊸" +"nGg;", "⋙̸" +"nGt;", "≫⃒" +"nGtv;", "≫̸" +"nLeftarrow;", "⇍" +"nLeftrightarrow;", "⇎" +"nLl;", "⋘̸" +"nLt;", "≪⃒" +"nLtv;", "≪̸" +"nRightarrow;", "⇏" +"nVDash;", "⊯" +"nVdash;", "⊮" +"nabla;", "∇" +"nacute;", "ń" +"nang;", "∠⃒" +"nap;", "≉" +"napE;", "⩰̸" +"napid;", "≋̸" +"napos;", "ʼn" +"napprox;", "≉" +"natur;", "♮" +"natural;", "♮" +"naturals;", "ℕ" +"nbsp", " " +"nbsp;", " " +"nbump;", "≎̸" +"nbumpe;", "≏̸" +"ncap;", "⩃" +"ncaron;", "ň" +"ncedil;", "ņ" +"ncong;", "≇" +"ncongdot;", "⩭̸" +"ncup;", "⩂" +"ncy;", "н" +"ndash;", "–" +"ne;", "≠" +"neArr;", "⇗" +"nearhk;", "⤤" +"nearr;", "↗" +"nearrow;", "↗" +"nedot;", "≐̸" +"nequiv;", "≢" +"nesear;", "⤨" +"nesim;", "≂̸" +"nexist;", "∄" +"nexists;", "∄" +"nfr;", "𝔫" +"ngE;", "≧̸" +"nge;", "≱" +"ngeq;", "≱" +"ngeqq;", "≧̸" +"ngeqslant;", "⩾̸" +"nges;", "⩾̸" +"ngsim;", "≵" +"ngt;", "≯" +"ngtr;", "≯" +"nhArr;", "⇎" +"nharr;", "↮" +"nhpar;", "⫲" +"ni;", "∋" +"nis;", "⋼" +"nisd;", "⋺" +"niv;", "∋" +"njcy;", "њ" +"nlArr;", "⇍" +"nlE;", "≦̸" +"nlarr;", "↚" +"nldr;", "‥" +"nle;", "≰" +"nleftarrow;", "↚" +"nleftrightarrow;", "↮" +"nleq;", "≰" +"nleqq;", "≦̸" +"nleqslant;", "⩽̸" +"nles;", "⩽̸" +"nless;", "≮" +"nlsim;", "≴" +"nlt;", "≮" +"nltri;", "⋪" +"nltrie;", "⋬" +"nmid;", "∤" +"nopf;", "𝕟" +"not", "¬" +"not;", "¬" +"notin;", "∉" +"notinE;", "⋹̸" +"notindot;", "⋵̸" +"notinva;", "∉" +"notinvb;", "⋷" +"notinvc;", "⋶" +"notni;", "∌" +"notniva;", "∌" +"notnivb;", "⋾" +"notnivc;", "⋽" +"npar;", "∦" +"nparallel;", "∦" +"nparsl;", "⫽⃥" +"npart;", "∂̸" +"npolint;", "⨔" +"npr;", "⊀" +"nprcue;", "⋠" +"npre;", "⪯̸" +"nprec;", "⊀" +"npreceq;", "⪯̸" +"nrArr;", "⇏" +"nrarr;", "↛" +"nrarrc;", "⤳̸" +"nrarrw;", "↝̸" +"nrightarrow;", "↛" +"nrtri;", "⋫" +"nrtrie;", "⋭" +"nsc;", "⊁" +"nsccue;", "⋡" +"nsce;", "⪰̸" +"nscr;", "𝓃" +"nshortmid;", "∤" +"nshortparallel;", "∦" +"nsim;", "≁" +"nsime;", "≄" +"nsimeq;", "≄" +"nsmid;", "∤" +"nspar;", "∦" +"nsqsube;", "⋢" +"nsqsupe;", "⋣" +"nsub;", "⊄" +"nsubE;", "⫅̸" +"nsube;", "⊈" +"nsubset;", "⊂⃒" +"nsubseteq;", "⊈" +"nsubseteqq;", "⫅̸" +"nsucc;", "⊁" +"nsucceq;", "⪰̸" +"nsup;", "⊅" +"nsupE;", "⫆̸" +"nsupe;", "⊉" +"nsupset;", "⊃⃒" +"nsupseteq;", "⊉" +"nsupseteqq;", "⫆̸" +"ntgl;", "≹" +"ntilde", "ñ" +"ntilde;", "ñ" +"ntlg;", "≸" +"ntriangleleft;", "⋪" +"ntrianglelefteq;", "⋬" +"ntriangleright;", "⋫" +"ntrianglerighteq;", "⋭" +"nu;", "ν" +"num;", "#" +"numero;", "№" +"numsp;", " " +"nvDash;", "⊭" +"nvHarr;", "⤄" +"nvap;", "≍⃒" +"nvdash;", "⊬" +"nvge;", "≥⃒" +"nvgt;", ">⃒" +"nvinfin;", "⧞" +"nvlArr;", "⤂" +"nvle;", "≤⃒" +"nvlt;", "<⃒" +"nvltrie;", "⊴⃒" +"nvrArr;", "⤃" +"nvrtrie;", "⊵⃒" +"nvsim;", "∼⃒" +"nwArr;", "⇖" +"nwarhk;", "⤣" +"nwarr;", "↖" +"nwarrow;", "↖" +"nwnear;", "⤧" +"oS;", "Ⓢ" +"oacute", "ó" +"oacute;", "ó" +"oast;", "⊛" +"ocir;", "⊚" +"ocirc", "ô" +"ocirc;", "ô" +"ocy;", "о" +"odash;", "⊝" +"odblac;", "ő" +"odiv;", "⨸" +"odot;", "⊙" +"odsold;", "⦼" +"oelig;", "œ" +"ofcir;", "⦿" +"ofr;", "𝔬" +"ogon;", "˛" +"ograve", "ò" +"ograve;", "ò" +"ogt;", "⧁" +"ohbar;", "⦵" +"ohm;", "Ω" +"oint;", "∮" +"olarr;", "↺" +"olcir;", "⦾" +"olcross;", "⦻" +"oline;", "‾" +"olt;", "⧀" +"omacr;", "ō" +"omega;", "ω" +"omicron;", "ο" +"omid;", "⦶" +"ominus;", "⊖" +"oopf;", "𝕠" +"opar;", "⦷" +"operp;", "⦹" +"oplus;", "⊕" +"or;", "∨" +"orarr;", "↻" +"ord;", "⩝" +"order;", "ℴ" +"orderof;", "ℴ" +"ordf", "ª" +"ordf;", "ª" +"ordm", "º" +"ordm;", "º" +"origof;", "⊶" +"oror;", "⩖" +"orslope;", "⩗" +"orv;", "⩛" +"oscr;", "ℴ" +"oslash", "ø" +"oslash;", "ø" +"osol;", "⊘" +"otilde", "õ" +"otilde;", "õ" +"otimes;", "⊗" +"otimesas;", "⨶" +"ouml", "ö" +"ouml;", "ö" +"ovbar;", "⌽" +"par;", "∥" +"para", "¶" +"para;", "¶" +"parallel;", "∥" +"parsim;", "⫳" +"parsl;", "⫽" +"part;", "∂" +"pcy;", "п" +"percnt;", "%" +"period;", "." +"permil;", "‰" +"perp;", "⊥" +"pertenk;", "‱" +"pfr;", "𝔭" +"phi;", "φ" +"phiv;", "ϕ" +"phmmat;", "ℳ" +"phone;", "☎" +"pi;", "π" +"pitchfork;", "⋔" +"piv;", "ϖ" +"planck;", "ℏ" +"planckh;", "ℎ" +"plankv;", "ℏ" +"plus;", "+" +"plusacir;", "⨣" +"plusb;", "⊞" +"pluscir;", "⨢" +"plusdo;", "∔" +"plusdu;", "⨥" +"pluse;", "⩲" +"plusmn", "±" +"plusmn;", "±" +"plussim;", "⨦" +"plustwo;", "⨧" +"pm;", "±" +"pointint;", "⨕" +"popf;", "𝕡" +"pound", "£" +"pound;", "£" +"pr;", "≺" +"prE;", "⪳" +"prap;", "⪷" +"prcue;", "≼" +"pre;", "⪯" +"prec;", "≺" +"precapprox;", "⪷" +"preccurlyeq;", "≼" +"preceq;", "⪯" +"precnapprox;", "⪹" +"precneqq;", "⪵" +"precnsim;", "⋨" +"precsim;", "≾" +"prime;", "′" +"primes;", "ℙ" +"prnE;", "⪵" +"prnap;", "⪹" +"prnsim;", "⋨" +"prod;", "∏" +"profalar;", "⌮" +"profline;", "⌒" +"profsurf;", "⌓" +"prop;", "∝" +"propto;", "∝" +"prsim;", "≾" +"prurel;", "⊰" +"pscr;", "𝓅" +"psi;", "ψ" +"puncsp;", " " +"qfr;", "𝔮" +"qint;", "⨌" +"qopf;", "𝕢" +"qprime;", "⁗" +"qscr;", "𝓆" +"quaternions;", "ℍ" +"quatint;", "⨖" +"quest;", "?" +"questeq;", "≟" +"quot", "\"" +"quot;", "\"" +"rAarr;", "⇛" +"rArr;", "⇒" +"rAtail;", "⤜" +"rBarr;", "⤏" +"rHar;", "⥤" +"race;", "∽̱" +"racute;", "ŕ" +"radic;", "√" +"raemptyv;", "⦳" +"rang;", "⟩" +"rangd;", "⦒" +"range;", "⦥" +"rangle;", "⟩" +"raquo", "»" +"raquo;", "»" +"rarr;", "→" +"rarrap;", "⥵" +"rarrb;", "⇥" +"rarrbfs;", "⤠" +"rarrc;", "⤳" +"rarrfs;", "⤞" +"rarrhk;", "↪" +"rarrlp;", "↬" +"rarrpl;", "⥅" +"rarrsim;", "⥴" +"rarrtl;", "↣" +"rarrw;", "↝" +"ratail;", "⤚" +"ratio;", "∶" +"rationals;", "ℚ" +"rbarr;", "⤍" +"rbbrk;", "❳" +"rbrace;", "}" +"rbrack;", "]" +"rbrke;", "⦌" +"rbrksld;", "⦎" +"rbrkslu;", "⦐" +"rcaron;", "ř" +"rcedil;", "ŗ" +"rceil;", "⌉" +"rcub;", "}" +"rcy;", "р" +"rdca;", "⤷" +"rdldhar;", "⥩" +"rdquo;", "”" +"rdquor;", "”" +"rdsh;", "↳" +"real;", "ℜ" +"realine;", "ℛ" +"realpart;", "ℜ" +"reals;", "ℝ" +"rect;", "▭" +"reg", "®" +"reg;", "®" +"rfisht;", "⥽" +"rfloor;", "⌋" +"rfr;", "𝔯" +"rhard;", "⇁" +"rharu;", "⇀" +"rharul;", "⥬" +"rho;", "ρ" +"rhov;", "ϱ" +"rightarrow;", "→" +"rightarrowtail;", "↣" +"rightharpoondown;", "⇁" +"rightharpoonup;", "⇀" +"rightleftarrows;", "⇄" +"rightleftharpoons;", "⇌" +"rightrightarrows;", "⇉" +"rightsquigarrow;", "↝" +"rightthreetimes;", "⋌" +"ring;", "˚" +"risingdotseq;", "≓" +"rlarr;", "⇄" +"rlhar;", "⇌" +"rlm;", "‏" +"rmoust;", "⎱" +"rmoustache;", "⎱" +"rnmid;", "⫮" +"roang;", "⟭" +"roarr;", "⇾" +"robrk;", "⟧" +"ropar;", "⦆" +"ropf;", "𝕣" +"roplus;", "⨮" +"rotimes;", "⨵" +"rpar;", ")" +"rpargt;", "⦔" +"rppolint;", "⨒" +"rrarr;", "⇉" +"rsaquo;", "›" +"rscr;", "𝓇" +"rsh;", "↱" +"rsqb;", "]" +"rsquo;", "’" +"rsquor;", "’" +"rthree;", "⋌" +"rtimes;", "⋊" +"rtri;", "▹" +"rtrie;", "⊵" +"rtrif;", "▸" +"rtriltri;", "⧎" +"ruluhar;", "⥨" +"rx;", "℞" +"sacute;", "ś" +"sbquo;", "‚" +"sc;", "≻" +"scE;", "⪴" +"scap;", "⪸" +"scaron;", "š" +"sccue;", "≽" +"sce;", "⪰" +"scedil;", "ş" +"scirc;", "ŝ" +"scnE;", "⪶" +"scnap;", "⪺" +"scnsim;", "⋩" +"scpolint;", "⨓" +"scsim;", "≿" +"scy;", "с" +"sdot;", "⋅" +"sdotb;", "⊡" +"sdote;", "⩦" +"seArr;", "⇘" +"searhk;", "⤥" +"searr;", "↘" +"searrow;", "↘" +"sect", "§" +"sect;", "§" +"semi;", ";" +"seswar;", "⤩" +"setminus;", "∖" +"setmn;", "∖" +"sext;", "✶" +"sfr;", "𝔰" +"sfrown;", "⌢" +"sharp;", "♯" +"shchcy;", "щ" +"shcy;", "ш" +"shortmid;", "∣" +"shortparallel;", "∥" +"shy", "­" +"shy;", "­" +"sigma;", "σ" +"sigmaf;", "ς" +"sigmav;", "ς" +"sim;", "∼" +"simdot;", "⩪" +"sime;", "≃" +"simeq;", "≃" +"simg;", "⪞" +"simgE;", "⪠" +"siml;", "⪝" +"simlE;", "⪟" +"simne;", "≆" +"simplus;", "⨤" +"simrarr;", "⥲" +"slarr;", "←" +"smallsetminus;", "∖" +"smashp;", "⨳" +"smeparsl;", "⧤" +"smid;", "∣" +"smile;", "⌣" +"smt;", "⪪" +"smte;", "⪬" +"smtes;", "⪬︀" +"softcy;", "ь" +"sol;", "/" +"solb;", "⧄" +"solbar;", "⌿" +"sopf;", "𝕤" +"spades;", "♠" +"spadesuit;", "♠" +"spar;", "∥" +"sqcap;", "⊓" +"sqcaps;", "⊓︀" +"sqcup;", "⊔" +"sqcups;", "⊔︀" +"sqsub;", "⊏" +"sqsube;", "⊑" +"sqsubset;", "⊏" +"sqsubseteq;", "⊑" +"sqsup;", "⊐" +"sqsupe;", "⊒" +"sqsupset;", "⊐" +"sqsupseteq;", "⊒" +"squ;", "□" +"square;", "□" +"squarf;", "▪" +"squf;", "▪" +"srarr;", "→" +"sscr;", "𝓈" +"ssetmn;", "∖" +"ssmile;", "⌣" +"sstarf;", "⋆" +"star;", "☆" +"starf;", "★" +"straightepsilon;", "ϵ" +"straightphi;", "ϕ" +"strns;", "¯" +"sub;", "⊂" +"subE;", "⫅" +"subdot;", "⪽" +"sube;", "⊆" +"subedot;", "⫃" +"submult;", "⫁" +"subnE;", "⫋" +"subne;", "⊊" +"subplus;", "⪿" +"subrarr;", "⥹" +"subset;", "⊂" +"subseteq;", "⊆" +"subseteqq;", "⫅" +"subsetneq;", "⊊" +"subsetneqq;", "⫋" +"subsim;", "⫇" +"subsub;", "⫕" +"subsup;", "⫓" +"succ;", "≻" +"succapprox;", "⪸" +"succcurlyeq;", "≽" +"succeq;", "⪰" +"succnapprox;", "⪺" +"succneqq;", "⪶" +"succnsim;", "⋩" +"succsim;", "≿" +"sum;", "∑" +"sung;", "♪" +"sup1", "¹" +"sup1;", "¹" +"sup2", "²" +"sup2;", "²" +"sup3", "³" +"sup3;", "³" +"sup;", "⊃" +"supE;", "⫆" +"supdot;", "⪾" +"supdsub;", "⫘" +"supe;", "⊇" +"supedot;", "⫄" +"suphsol;", "⟉" +"suphsub;", "⫗" +"suplarr;", "⥻" +"supmult;", "⫂" +"supnE;", "⫌" +"supne;", "⊋" +"supplus;", "⫀" +"supset;", "⊃" +"supseteq;", "⊇" +"supseteqq;", "⫆" +"supsetneq;", "⊋" +"supsetneqq;", "⫌" +"supsim;", "⫈" +"supsub;", "⫔" +"supsup;", "⫖" +"swArr;", "⇙" +"swarhk;", "⤦" +"swarr;", "↙" +"swarrow;", "↙" +"swnwar;", "⤪" +"szlig", "ß" +"szlig;", "ß" +"target;", "⌖" +"tau;", "τ" +"tbrk;", "⎴" +"tcaron;", "ť" +"tcedil;", "ţ" +"tcy;", "т" +"tdot;", "⃛" +"telrec;", "⌕" +"tfr;", "𝔱" +"there4;", "∴" +"therefore;", "∴" +"theta;", "θ" +"thetasym;", "ϑ" +"thetav;", "ϑ" +"thickapprox;", "≈" +"thicksim;", "∼" +"thinsp;", " " +"thkap;", "≈" +"thksim;", "∼" +"thorn", "þ" +"thorn;", "þ" +"tilde;", "˜" +"times", "×" +"times;", "×" +"timesb;", "⊠" +"timesbar;", "⨱" +"timesd;", "⨰" +"tint;", "∭" +"toea;", "⤨" +"top;", "⊤" +"topbot;", "⌶" +"topcir;", "⫱" +"topf;", "𝕥" +"topfork;", "⫚" +"tosa;", "⤩" +"tprime;", "‴" +"trade;", "™" +"triangle;", "▵" +"triangledown;", "▿" +"triangleleft;", "◃" +"trianglelefteq;", "⊴" +"triangleq;", "≜" +"triangleright;", "▹" +"trianglerighteq;", "⊵" +"tridot;", "◬" +"trie;", "≜" +"triminus;", "⨺" +"triplus;", "⨹" +"trisb;", "⧍" +"tritime;", "⨻" +"trpezium;", "⏢" +"tscr;", "𝓉" +"tscy;", "ц" +"tshcy;", "ћ" +"tstrok;", "ŧ" +"twixt;", "≬" +"twoheadleftarrow;", "↞" +"twoheadrightarrow;", "↠" +"uArr;", "⇑" +"uHar;", "⥣" +"uacute", "ú" +"uacute;", "ú" +"uarr;", "↑" +"ubrcy;", "ў" +"ubreve;", "ŭ" +"ucirc", "û" +"ucirc;", "û" +"ucy;", "у" +"udarr;", "⇅" +"udblac;", "ű" +"udhar;", "⥮" +"ufisht;", "⥾" +"ufr;", "𝔲" +"ugrave", "ù" +"ugrave;", "ù" +"uharl;", "↿" +"uharr;", "↾" +"uhblk;", "▀" +"ulcorn;", "⌜" +"ulcorner;", "⌜" +"ulcrop;", "⌏" +"ultri;", "◸" +"umacr;", "ū" +"uml", "¨" +"uml;", "¨" +"uogon;", "ų" +"uopf;", "𝕦" +"uparrow;", "↑" +"updownarrow;", "↕" +"upharpoonleft;", "↿" +"upharpoonright;", "↾" +"uplus;", "⊎" +"upsi;", "υ" +"upsih;", "ϒ" +"upsilon;", "υ" +"upuparrows;", "⇈" +"urcorn;", "⌝" +"urcorner;", "⌝" +"urcrop;", "⌎" +"uring;", "ů" +"urtri;", "◹" +"uscr;", "𝓊" +"utdot;", "⋰" +"utilde;", "ũ" +"utri;", "▵" +"utrif;", "▴" +"uuarr;", "⇈" +"uuml", "ü" +"uuml;", "ü" +"uwangle;", "⦧" +"vArr;", "⇕" +"vBar;", "⫨" +"vBarv;", "⫩" +"vDash;", "⊨" +"vangrt;", "⦜" +"varepsilon;", "ϵ" +"varkappa;", "ϰ" +"varnothing;", "∅" +"varphi;", "ϕ" +"varpi;", "ϖ" +"varpropto;", "∝" +"varr;", "↕" +"varrho;", "ϱ" +"varsigma;", "ς" +"varsubsetneq;", "⊊︀" +"varsubsetneqq;", "⫋︀" +"varsupsetneq;", "⊋︀" +"varsupsetneqq;", "⫌︀" +"vartheta;", "ϑ" +"vartriangleleft;", "⊲" +"vartriangleright;", "⊳" +"vcy;", "в" +"vdash;", "⊢" +"vee;", "∨" +"veebar;", "⊻" +"veeeq;", "≚" +"vellip;", "⋮" +"verbar;", "|" +"vert;", "|" +"vfr;", "𝔳" +"vltri;", "⊲" +"vnsub;", "⊂⃒" +"vnsup;", "⊃⃒" +"vopf;", "𝕧" +"vprop;", "∝" +"vrtri;", "⊳" +"vscr;", "𝓋" +"vsubnE;", "⫋︀" +"vsubne;", "⊊︀" +"vsupnE;", "⫌︀" +"vsupne;", "⊋︀" +"vzigzag;", "⦚" +"wcirc;", "ŵ" +"wedbar;", "⩟" +"wedge;", "∧" +"wedgeq;", "≙" +"weierp;", "℘" +"wfr;", "𝔴" +"wopf;", "𝕨" +"wp;", "℘" +"wr;", "≀" +"wreath;", "≀" +"wscr;", "𝓌" +"xcap;", "⋂" +"xcirc;", "◯" +"xcup;", "⋃" +"xdtri;", "▽" +"xfr;", "𝔵" +"xhArr;", "⟺" +"xharr;", "⟷" +"xi;", "ξ" +"xlArr;", "⟸" +"xlarr;", "⟵" +"xmap;", "⟼" +"xnis;", "⋻" +"xodot;", "⨀" +"xopf;", "𝕩" +"xoplus;", "⨁" +"xotime;", "⨂" +"xrArr;", "⟹" +"xrarr;", "⟶" +"xscr;", "𝓍" +"xsqcup;", "⨆" +"xuplus;", "⨄" +"xutri;", "△" +"xvee;", "⋁" +"xwedge;", "⋀" +"yacute", "ý" +"yacute;", "ý" +"yacy;", "я" +"ycirc;", "ŷ" +"ycy;", "ы" +"yen", "¥" +"yen;", "¥" +"yfr;", "𝔶" +"yicy;", "ї" +"yopf;", "𝕪" +"yscr;", "𝓎" +"yucy;", "ю" +"yuml", "ÿ" +"yuml;", "ÿ" +"zacute;", "ź" +"zcaron;", "ž" +"zcy;", "з" +"zdot;", "ż" +"zeetrf;", "ℨ" +"zeta;", "ζ" +"zfr;", "𝔷" +"zhcy;", "ж" +"zigrarr;", "⇝" +"zopf;", "𝕫" +"zscr;", "𝓏" +"zwj;", "‍" +"zwnj;", "‌" +%% +// NOLINTEND(google-runtime-int,hicpp-use-nullptr,modernize-use-nullptr) diff --git a/src/Functions/HTMLCharacterReference.h b/src/Functions/HTMLCharacterReference.h new file mode 100644 index 00000000000..9db873939d7 --- /dev/null +++ b/src/Functions/HTMLCharacterReference.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +// Definition of the class generated by gperf +class HTMLCharacterHash +{ +private: + static inline unsigned int hash(const char * str, size_t len); + +public: + static const struct NameAndGlyph * Lookup(const char * str, size_t len); +}; + +// Definition of the struct generated by gperf +struct NameAndGlyph +{ + const char * name; + const char * glyph; +}; diff --git a/src/Functions/HTMLCharacterReference.sh b/src/Functions/HTMLCharacterReference.sh new file mode 100755 index 00000000000..c7b728c5980 --- /dev/null +++ b/src/Functions/HTMLCharacterReference.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +echo '%language=C++ +%define class-name HTMLCharacterHash +%define lookup-function-name Lookup +%readonly-tables +%includes +%compare-strncmp +%{ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wimplicit-fallthrough" +#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant" +#pragma GCC diagnostic ignored "-Wunused-macros" +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#pragma GCC diagnostic ignored "-Wshorten-64-to-32" +// NOLINTBEGIN(google-runtime-int,hicpp-use-nullptr,modernize-use-nullptr) +%} +struct NameAndGlyph { +const char *name; +const char *glyph; +}; +%%' > HTMLCharacterReference.gperf + +# character reference as available at https://html.spec.whatwg.org/multipage/named-characters.html +curl -X GET https://html.spec.whatwg.org/entities.json | jq -r 'keys[] as $k | "\"\($k)\", \(.[$k] | .characters|tojson)"' | sed 's/^"&/"/' >> HTMLCharacterReference.gperf +echo '%%' >> HTMLCharacterReference.gperf +echo '// NOLINTEND(google-runtime-int,hicpp-use-nullptr,modernize-use-nullptr)' >> HTMLCharacterReference.gperf diff --git a/src/Functions/HasTokenImpl.h b/src/Functions/HasTokenImpl.h index ab6b6399486..a4ff49859cc 100644 --- a/src/Functions/HasTokenImpl.h +++ b/src/Functions/HasTokenImpl.h @@ -13,6 +13,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; } /** Token search the string, means that needle must be surrounded by some separator chars, like whitespace or puctuation. @@ -39,9 +40,6 @@ struct HasTokenImpl if (start_pos != nullptr) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' does not support start_pos argument", name); - if (pattern.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle cannot be empty, because empty string isn't a token"); - if (haystack_offsets.empty()) return; @@ -49,7 +47,7 @@ struct HasTokenImpl const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; - if (!std::none_of(pattern.begin(), pattern.end(), isTokenSeparator)) + if (const auto has_separator = std::any_of(pattern.cbegin(), pattern.cend(), isTokenSeparator); has_separator || pattern.empty()) { if (res_null) { @@ -57,8 +55,12 @@ struct HasTokenImpl std::ranges::fill(res_null->getData(), true); return; } - else + else if (has_separator) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle must not contain whitespace or separator characters"); + else if (pattern.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle cannot be empty, because empty string isn't a token"); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected internal state"); } size_t pattern_size = pattern.size(); diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 650b54d9a37..d119b15733b 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -361,7 +361,7 @@ ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments, /// If default of sparse column is changed after execution of function, convert to full column. /// If there are any default in non-zero position after execution of function, convert to full column. /// Currently there is no easy way to rebuild sparse column with new offsets. - if (!result_type->supportsSparseSerialization() || !res->isDefaultAt(0) || res->getNumberOfDefaultRows() != 1) + if (!result_type->canBeInsideSparseColumns() || !res->isDefaultAt(0) || res->getNumberOfDefaultRows() != 1) { const auto & offsets_data = assert_cast &>(*sparse_offsets).getData(); return res->createWithOffsets(offsets_data, (*res)[0], input_rows_count, /*shift=*/ 1); diff --git a/src/Functions/IsOperation.h b/src/Functions/IsOperation.h index 0c54901579e..8ea53c865ce 100644 --- a/src/Functions/IsOperation.h +++ b/src/Functions/IsOperation.h @@ -60,7 +60,7 @@ struct IsOperation static constexpr bool bit_hamming_distance = IsSameOperation::value; - static constexpr bool division = div_floating || div_int || div_int_or_zero; + static constexpr bool division = div_floating || div_int || div_int_or_zero || modulo; static constexpr bool allow_decimal = plus || minus || multiply || division || least || greatest; }; diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp index 709ef89dd3c..141f25bfe4c 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp @@ -58,6 +58,10 @@ bool ParserJSONPathMemberAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & member_name = std::make_shared(String(last_begin, pos->end)); ++pos; } + else if (!pos.isValid() && pos->type == TokenType::EndOfStream) + { + member_name = std::make_shared(String(last_begin, last_end)); + } else { return false; diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index 9aae42cce41..9c28e349413 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -635,9 +635,7 @@ UInt128 sipHash128(Polygon && polygon) for (auto & inner : inners) hash_ring(inner); - UInt128 res; - hash.get128(res); - return res; + return hash.get128(); } } diff --git a/src/Functions/TransformDateTime64.h b/src/Functions/TransformDateTime64.h index fcee2753066..896e9d8ca48 100644 --- a/src/Functions/TransformDateTime64.h +++ b/src/Functions/TransformDateTime64.h @@ -88,8 +88,9 @@ public: } } - template >> - inline auto execute(const T & t, Args && ... args) const + template + requires(!std::same_as) + inline auto execute(const T & t, Args &&... args) const { return wrapped_transform.execute(t, std::forward(args)...); } @@ -128,7 +129,8 @@ public: } } - template >> + template + requires (!std::same_as) inline auto executeExtendedResult(const T & t, Args && ... args) const { return wrapped_transform.executeExtendedResult(t, std::forward(args)...); diff --git a/src/Functions/URL/CMakeLists.txt b/src/Functions/URL/CMakeLists.txt index 0e148e87604..ce9a430423b 100644 --- a/src/Functions/URL/CMakeLists.txt +++ b/src/Functions/URL/CMakeLists.txt @@ -22,6 +22,11 @@ if (USE_GPERF) BYPRODUCTS "${CMAKE_CURRENT_SOURCE_DIR}/tldLookup.gperf" ) add_custom_target(generate-tldlookup ${GPERF} tldLookup.gperf --output-file=tldLookup.generated.cpp && clang-format -i tldLookup.generated.cpp + # apply these clang-tidy fixes to the file generated by gperf: + # - use cstring since string.h is deprecated + # - return nullptr instead of 0 + && sed -i -e 's/\#include /\#include /g' tldLookup.generated.cpp -e 's/return 0\;/return nullptr\;/g' tldLookup.generated.cpp + SOURCES tldLookup.gperf WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} # BYPRODUCTS "${CMAKE_CURRENT_SOURCE_DIR}/tldLookup.generated.cpp" diff --git a/src/Functions/URL/domain.h b/src/Functions/URL/domain.h index 91f83a457be..f38f106e9a2 100644 --- a/src/Functions/URL/domain.h +++ b/src/Functions/URL/domain.h @@ -44,6 +44,7 @@ inline std::string_view getURLHostRFC(const char * data, size_t size) case '.': case '-': case '+': + case '[': break; case ' ': /// restricted symbols case '\t': @@ -56,7 +57,6 @@ inline std::string_view getURLHostRFC(const char * data, size_t size) case '\\': case '^': case '~': - case '[': case ']': case ';': case '=': @@ -73,6 +73,13 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos pos = data; } + bool has_open_bracket = false; + bool has_end_bracket = false; + if (*pos == '[') /// IPv6 [2001:db8::1]:80 + { + has_open_bracket = true; + ++pos; + } Pos dot_pos = nullptr; Pos colon_pos = nullptr; bool has_sub_delims = false; @@ -84,10 +91,14 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos switch (*pos) { case '.': + if (has_open_bracket) + return std::string_view{}; if (has_at_symbol || colon_pos == nullptr) dot_pos = pos; break; case ':': + if (has_open_bracket) + continue; if (has_at_symbol || colon_pos) goto done; colon_pos = pos; break; @@ -116,6 +127,13 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos /// registered). has_sub_delims = true; continue; + case ']': + if (has_open_bracket) + { + has_end_bracket = true; + goto done; + } + [[fallthrough]]; case ' ': /// restricted symbols in whole URL case '\t': case '<': @@ -126,7 +144,6 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos case '\\': case '^': case '[': - case ']': if (colon_pos == nullptr) return std::string_view{}; else @@ -138,7 +155,11 @@ done: if (has_sub_delims) return std::string_view{}; if (!has_at_symbol) + { + if (has_open_bracket && has_end_bracket) + return std::string_view(start_of_host, pos - start_of_host); pos = colon_pos ? colon_pos : pos; + } return checkAndReturnHost(pos, dot_pos, start_of_host); } diff --git a/src/Functions/URL/tldLookup.generated.cpp b/src/Functions/URL/tldLookup.generated.cpp index d973d9efa07..c0ee9b35e7b 100644 --- a/src/Functions/URL/tldLookup.generated.cpp +++ b/src/Functions/URL/tldLookup.generated.cpp @@ -1,6 +1,6 @@ /* C++ code produced by gperf version 3.1 */ /* Command-line: /usr/bin/gperf --output-file=tldLookup.generated.cpp tldLookup.gperf */ -/* Computed positions: -k'1-11,13,$' */ +/* Computed positions: -k'1-11,13-14,17,$' */ #if !( \ (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) && (')' == 41) \ @@ -26,12 +26,12 @@ #pragma GCC diagnostic ignored "-Wunused-macros" #include -#define TOTAL_KEYWORDS 4942 +#define TOTAL_KEYWORDS 5045 #define MIN_WORD_LENGTH 4 -#define MAX_WORD_LENGTH 36 +#define MAX_WORD_LENGTH 34 #define MIN_HASH_VALUE 75 -#define MAX_HASH_VALUE 95690 -/* maximum key range = 95616, duplicates = 0 */ +#define MAX_HASH_VALUE 110600 +/* maximum key range = 110526, duplicates = 0 */ class TopLevelDomainLookupHash { @@ -44,27 +44,38 @@ public: inline unsigned int TopLevelDomainLookupHash::hash(const char * str, size_t len) { - static const unsigned int asso_values[] = { - 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, - 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, - 95691, 95691, 95691, 95691, 95691, 95691, 95691, 13532, 0, 5, 20274, 843, 5, 0, 25, 0, 0, 5, 0, - 0, 0, 95691, 95691, 95691, 0, 95691, 95691, 95691, 95691, 95691, 90, 55, 65, 20, 50, 115, 80, 110, - 105, 60, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, - 95691, 95691, 2240, 11727, 2640, 15, 225, 5155, 3815, 5700, 1020, 8055, 348, 4615, 20, 950, 5, 7600, 1644, - 610, 2055, 310, 0, 2075, 40, 14137, 100, 6785, 1005, 25879, 10983, 395, 17499, 24974, 20884, 320, 14423, 21274, - 5, 1238, 7540, 30729, 160, 16174, 30, 10, 45, 70, 120, 0, 0, 100, 0, 5, 0, 35, 0, - 70, 35, 140, 0, 0, 40, 0, 40, 0, 10, 0, 0, 0, 10, 5, 5, 10, 0, 5, - 55, 0, 0, 0, 0, 20, 0, 5, 0, 5, 5, 0, 5, 25, 15, 25, 0, 0, 0, - 40, 30, 0, 95691, 95691, 5, 0, 80, 0, 95691, 0, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 0, - 0, 0, 0, 95691, 95691, 95691, 5, 0, 0, 0, 0, 95691, 95691, 95691, 95691, 0, 0, 95691, 0, - 0, 0, 0, 10, 0, 0, 0, 5, 0, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 0, 95691, 95691, - 95691, 5, 0, 0, 0, 80, 10, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, - 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691, 95691}; + static const unsigned int asso_values[] + = {110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, + 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, + 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 3905, 0, 5, + 11617, 15312, 10, 5, 25, 0, 25, 0, 5, 0, 0, 110601, 110601, 110601, 5, 110601, + 110601, 110601, 110601, 110601, 30, 20, 5, 15, 10, 65, 45, 80, 70, 55, 110601, 110601, + 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, + 110601, 2570, 9477, 1350, 15, 130, 5915, 1830, 4360, 2210, 5405, 63, 3190, 20, 1165, 5, + 6120, 5863, 470, 2315, 175, 0, 815, 40, 13577, 115, 5680, 1030, 11798, 23179, 345, 1097, + 28079, 13839, 245, 25674, 31874, 75, 31774, 7351, 27474, 190, 16044, 8040, 50, 25, 35, 55, + 0, 0, 30, 0, 10, 0, 0, 0, 35, 0, 55, 10, 5, 65, 0, 60, + 0, 25, 5, 30, 0, 5, 10, 0, 20, 5, 5, 35, 5, 0, 0, 0, + 0, 0, 15, 0, 5, 5, 0, 5, 5, 5, 0, 0, 0, 0, 0, 15, + 5, 110601, 110601, 5, 10, 45, 5, 110601, 0, 110601, 110601, 110601, 110601, 110601, 110601, 110601, + 0, 0, 0, 0, 110601, 110601, 110601, 45, 0, 0, 0, 0, 110601, 110601, 110601, 110601, + 0, 0, 110601, 0, 0, 0, 0, 5, 0, 5, 30, 0, 0, 110601, 110601, 110601, + 110601, 110601, 110601, 110601, 0, 110601, 110601, 110601, 0, 0, 5, 0, 20, 40, 110601, 110601, + 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, + 110601, 110601, 110601, 110601}; unsigned int hval = len; switch (hval) { default: + hval += asso_values[static_cast(str[16])]; + /*FALLTHROUGH*/ + case 16: + case 15: + case 14: + hval += asso_values[static_cast(str[13] + 1)]; + /*FALLTHROUGH*/ + case 13: hval += asso_values[static_cast(str[12])]; /*FALLTHROUGH*/ case 12: @@ -185,7 +196,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "co.tm", "", - "", + "\340\270\227\340\270\253\340\270\262\340\270\243.\340\271\204\340\270\227\340\270\242", "", "", "", @@ -196,7 +207,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "com.so", "", - "", + "\340\270\243\340\270\261\340\270\220\340\270\232\340\270\262\340\270\245.\340\271\204\340\270\227\340\270\242", "", "co.cm", "com.mo", @@ -204,9 +215,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "\327\231\327\250\327\225\327\251\327\234\327\231\327\235.museum", "", "", + "\340\270\230\340\270\270\340\270\243\340\270\201\340\270\264\340\270\210.\340\271\204\340\270\227\340\270\242", "", "", "com.bo", @@ -236,8 +247,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "\340\270\243\340\270\261\340\270\220\340\270\232\340\270\262\340\270\245.\340\271\204\340\270\227\340\270\242", "", + "\327\231\327\251\327\225\327\221.\327\231\327\251\327\250\327\220\327\234", "", "com.bm", "", @@ -249,12 +260,27 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "\327\220\327\247\327\223\327\236\327\231\327\224.\327\231\327\251\327\250\327\220\327\234", + "", + "", + "", + "", + "", + "com.io", + "", + "edu.so", + "", + "", + "", + "", "edu.mo", "", "", "", "", - "", + "com.mw", "", "", "", @@ -269,25 +295,25 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.mw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "com.kw", - "\340\270\227\340\270\253\340\270\262\340\270\243.\340\271\204\340\270\227\340\270\242", "", "", + "com.im", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\327\236\327\236\327\251\327\234.\327\231\327\251\327\250\327\220\327\234", "", "edu.bm", "", @@ -299,6 +325,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "com.cu", + "com.mk", "", "", "", @@ -307,9 +335,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", + "com.co", "", "", "", @@ -331,7 +357,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "\340\270\230\340\270\270\340\270\243\340\270\201\340\270\264\340\270\210.\340\271\204\340\270\227\340\270\242", + "", "", "", "edu.kw", @@ -339,6 +365,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "com.cm", + "", + "", + "", + "", + "com.ru", "", "", "", @@ -348,6 +380,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "com.ro", + "", + "co.dk", + "", + "", + "", + "", + "", + "", + "", + "edu.cu", + "edu.mk", "", "", "", @@ -356,15 +400,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "edu.co", "", "", "", "", - "", - "", - "", - "", - "", + "com.cw", "", "", "", @@ -393,8 +434,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", + "\340\270\250\340\270\266\340\270\201\340\270\251\340\270\262.\340\271\204\340\270\227\340\270\242", + "edu.ru", "", "", "", @@ -409,17 +450,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.cu", "", "", "", "", "", + "com.se", "", "", "", "", - "com.co", + "nom.km", "", "", "", @@ -429,11 +470,26 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "edu.cw", "", "", "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "c66.me", + "", + "", + "", + "", "edu.sy", "", "", @@ -449,7 +505,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.cm", + "", "", "", "", @@ -458,226 +514,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "\320\270\320\272\320\276\320\274.museum", - "edu.cu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.co", - "", - "", - "", - "", - "edu.uy", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.cw", - "", - "", - "", - "", - "com.ru", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.ro", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nom.km", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.cw", - "", - "", - "com.io", - "", - "edu.ru", - "", - "", - "", - "", - "www.ro", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nym.su", - "", - "", - "", - "\340\270\250\340\270\266\340\270\201\340\270\251\340\270\262.\340\271\204\340\270\227\340\270\242", - "", - "", - "", - "", - "", - "", - "", - "", - "com.im", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.cy", - "", - "", - "", - "", - "c66.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.se", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "", "", "", @@ -694,6 +530,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "edu.uy", + "", + "", + "", + "", "", "", "", @@ -704,106 +545,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nom.uy", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "co.dk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nym.by", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nom.ro", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "com.st", "", "", @@ -814,7 +555,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", + "com.cy", "", "", "", @@ -844,6 +585,22 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "nom.ro", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "", "", "", @@ -852,7 +609,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nom.im", "", "edu.st", "", @@ -874,76 +630,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "nym.ro", - "", - "", - "", - "", - "", - "com.mk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nom.ke", - "edu.mk", - "", - "", - "", - "", - "", - "", - "", - "", "com.re", "", "", @@ -979,6 +665,26 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "net.mu", "", "", @@ -994,7 +700,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.py", + "", "", "", "", @@ -1014,17 +720,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nym.me", "", "", "", "", - "crd.co", "", "", "", "", - "\347\266\262\347\265\241.\351\246\231\346\270\257", + "", + "", + "", "", "", "", @@ -1034,17 +740,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "\345\205\254\345\217\270.\351\246\231\346\270\257", "", "", "", "", - "\346\224\277\345\272\234.\351\246\231\346\270\257", "", "", "", "", - "edu.py", + "", + "", + "", "", "", "", @@ -1067,7 +773,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", + "net.id", "", "", "", @@ -1077,6 +783,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "net.kw", "", "", + "net.im", "", "", "", @@ -1084,16 +791,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nom.st", + "com.pk", "", "", "", + "crd.co", "", "", "", "", "", - "co.cr", "", "", "", @@ -1102,109 +809,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "*.mm", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "co.mu", - "", - "", - "", - "", - "", - "nom.pw", - "", - "", - "", - "", - "", - "", - "", - "", - "ed.cr", - "", - "nom.mk", - "", - "", - "", - "", - "", - "", - "", - "", - "net.sy", - "", - "", - "", - "g\303\241ls\303\241.no", - "net.my", - "", - "", - "", - "", - "", - "", - "", - "", - "co.zm", - "", - "", - "", - "", - "", - "net.ky", - "", - "", - "", "", "net.cu", + "net.mk", + "", "", "", - "edu.it", "", - "com.gu", "", "", "", @@ -1214,11 +825,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "net.uy", "", "", "", - "co.hu", + "", + "co.cr", "", "", "", @@ -1235,7 +846,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", + "net.uk", "", "", "", @@ -1244,18 +855,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.pe", - "", - "", - "", - "co.mw", + "net.ru", + "edu.pk", + "", + "", + "", + "", + "", "", "", "", "", "", - "edu.gu", - "nym.sk", "", "", "", @@ -1284,7 +895,92 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "net.ru", + "com.py", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.sy", + "", + "", + "", + "ed.cr", + "net.my", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.pe", + "", + "", + "", + "", + "net.ky", + "", + "", + "", + "", + "net.rw", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.me", + "", + "", + "", + "", + "net.uy", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.py", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "", "", "", @@ -1303,7 +999,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "co.zw", "", "", "", @@ -1320,271 +1015,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.ly", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.id", - "", - "", - "", - "", - "nym.ie", - "", - "", - "", - "", - "net.im", - "", - "net.rw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.ly", - "", - "", - "", - "", - "adm.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.cy", - "", - "", - "", - "", - "com.gy", - "", - "", - "", - "", - "", - "", - "", - "", - "", "com.pt", "", "", "", "", - "com.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.gy", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.pt", - "", - "", - "", - "", - "edu.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.pk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nym.lu", - "", - "", - "", - "", - "nom.gd", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nom.pe", - "", - "", - "", - "", - "", - "", - "co.ni", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.pk", - "co.ci", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "co.vi", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", + "com.to", "", "", "", @@ -1604,698 +1040,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", + "net.cy", "", "", "", "", "net.bt", "", - "ed.ci", - "", - "", - "", - "", - "", - "", - "", - "nym.pe", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.ge", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ato.br", - "", - "", - "", - "", - "", - "net.mk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.ge", - "", - "", - "", - "", - "", - "net.uk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nym.gy", - "", - "", - "", - "", - "net.py", - "", - "", - "", - "", - "nym.pt", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "co.no", - "", - "", - "", - "", - "emr.it", - "", - "", - "", - "", - "", - "", - "com.gt", - "", - "", - "", - "", - "", - "com.lk", - "", - "nid.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "co.com", - "", - "", - "", - "", - "4u.com", - "", - "", - "", - "", - "com.om", - "", - "", - "", - "", - "", - "edu.gt", - "", - "", - "", - "", - "", - "edu.lk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "eu.com", - "", - "", - "", - "", - "edu.om", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nom.ge", - "", - "", - "", - "", - "", - "", - "", - "co.pw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "not.br", - "", - "", - "", - "", - "net.gu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ed.pw", - "", - "com.pr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.pe", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "go.cr", - "", - "", - "", - "", - "", - "edu.pr", - "", - "", - "", - "", - "", - "", - "", - "", - "uy.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.sn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.bn", - "", - "", - "", - "", - "nom.gt", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.ly", - "", - "", - "", - "", - "nym.lt", - "", - "", - "", - "", - "edu.sn", - "", - "", - "", - "no.com", - "edu.mn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.bn", - "", - "", - "", - "", - "edu.kn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\303\245mli.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.gy", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.pt", - "", - "", - "", - "", - "net.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "co.ls", - "", - "", - "", - "", - "", - "com.bi", - "", - "", - "", - "", - "com.ki", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.to", - "", - "", - "", - "", - "art.br", - "", - "", - "", - "", - "", - "net.pk", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.bi", - "", - "", - "", - "", - "edu.ki", - "", "", "", "", @@ -2309,7 +1060,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.cn", "", "", "", @@ -2317,25 +1067,26 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "co.uk", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.pt", + "", + "", "", "", "edu.to", "", - "uk.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "", "", "", @@ -2349,31 +1100,37 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "edu.tm", "", "", "", - "co.rw", - "com.lr", - "", - "", - "", - "", - "edu.cn", - "", - "", - "", - "", - "jor.br", - "", - "", - "", - "", - "", - "", - "", - "", "b\303\241l\303\241t.no", + "adm.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nom.pe", "", "", "", @@ -2389,22 +1146,30 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "emr.it", + "", + "com.br", + "", + "", + "", + "", "edu.tw", "", "", "", + "co.tt", "", "", "", "", - "nt.no", "", "", "", "", "", "", - "edu.lr", "", "", "", @@ -2423,95 +1188,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ar.com", - "com.gr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cim.br", - "", - "", - "", - "", - "", - "", - "", - "ne.pw", - "", - "net.ge", - "", - "", - "", - "", - "com.ci", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nym.mn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.gr", - "", - "", - "", - "", - "", - "", - "", - "edu.in", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.ci", - "", - "", - "", - "", - "nom.si", - "", - "go.ci", - "", - "", - "", - "", - "", "", "", "", @@ -2539,17 +1215,21 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "net.ir", - "co.tt", + "edu.br", "", "", "", "", "", - "udi.br", + "", + "", + "", + "co.zm", + "", + "", + "", + "", + "g\303\241ls\303\241.no", "", "", "", @@ -2564,10 +1244,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "erotika.hu", - "", - "", - "", "", "", "", @@ -2589,19 +1265,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ato.br", "", "", "", "", "", - "ntr.br", "", "", "", "", "", + "net.pk", "", - "co.uk", "", "", "", @@ -2614,13 +1290,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "net.gt", "", "", "", "", - "\330\247\331\212\330\261\330\247\331\206.ir", - "net.lk", "", "", "", @@ -2634,7 +1307,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "\330\247\333\214\330\261\330\247\331\206.ir", "", "", "", @@ -2654,198 +1326,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "cnt.br", - "", - "", - "", - "net.om", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nym.tw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.pn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gok.pk", - "eti.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nym.gr", - "", - "", - "", - "cn.com", - "net.pr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.au", - "", - "", - "", - "", - "nov.ru", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "", "", "", @@ -2853,6 +1333,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "*.mm", "co.st", "", "", @@ -2864,7 +1345,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.am", "", "", "", @@ -2874,7 +1354,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "edu.au", + "co.zw", + "", "", "", "", @@ -2889,25 +1370,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "net.bn", + "nov.ru", "", "", "", - "", - "net.kn", + "co.mu", "", "", "", "", "", - "", - "n\303\270tter\303\270y.no", - "", - "", - "com.aw", - "", - "", - "go.pw", + "net.py", "", "", "", @@ -2917,6 +1390,29 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "adult.ht", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cn.vu", + "net.pe", + "", + "", "", "", "", @@ -2938,8 +1434,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "co.bw", - "art.sn", "", "", "", @@ -2960,7 +1454,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "jur.pro", "", "", "", @@ -2969,12 +1462,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gru.br", "", "", "", "", "", + "n\303\270tter\303\270y.no", "", "", "", @@ -2992,8 +1485,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "com.ye", + "com.lk", "", "", + "co.mw", "", "", "", @@ -3002,59 +1498,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "*.er", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cri.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.ki", - "", - "", - "", - "", - "", - "", - "", - "", - "co.lc", - "", - "", - "", - "", - "", - "", - "", - "ru.com", - "", "", "", + "net.pt", "", "", "", @@ -3064,7 +1510,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", + "not.br", "", "", "", @@ -3099,16 +1545,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "net.cn", + "com.au", "", "", "", "", - "com.gn", + "edu.ye", + "edu.lk", "", "", "", - "av.tr", "", "", "", @@ -3131,6 +1577,206 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "net.tw", "", + "ac.lk", + "", + "", + "", + "", + "", + "", + "av.tr", + "com.am", + "", + "", + "", + "", + "com.ly", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.pr", + "", + "", + "", + "", + "edu.au", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.aw", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.ly", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.lc", + "edu.pr", + "", + "", + "", + "co.bw", + "", + "", + "", + "", + "", + "", + "", + "", + "net.ir", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "art.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "", "", "", @@ -3141,10 +1787,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ac.lk", - "", - "", - "net.lr", "", "", "", @@ -3154,7 +1796,56 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "edu.gn", + "", + "", + "", + "", + "", + "", + "", + "", + "0e.vc", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "go.cr", + "", + "", + "", + "", + "gok.pk", + "com.sv", + "", + "", + "", + "", + "com.mv", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.tt", "", "", "", @@ -3194,7 +1885,37 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "adult.ht", + "", + "", + "", + "", + "", + "edu.sv", + "", + "", + "", + "", + "edu.mv", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "jor.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "adv.br", "", "", "", @@ -3207,14 +1928,69 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "g12.br", + "", + "", + "", + "", + "", + "", + "", + "bmd.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.tr", + "", + "", + "", + "", + "com.cv", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.ye", + "net.lk", + "", + "", + "", + "ntr.br", + "", + "", + "", + "", + "", + "", + "", + "", "", "gov.mu", "", "", "", "", - "net.gr", + "gov.so", "", "", "", @@ -3222,7 +1998,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "gov.mo", "", "", - "net.in", + "", "", "", "", @@ -3239,13 +2015,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "net.ci", + "edu.tr", "", "", "", "", - "com.gi", - "", + "edu.cv", "", "", "", @@ -3255,6 +2030,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "net.au", "", "", "", @@ -3294,12 +2070,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "edu.gi", - "", + "net.am", "", "", "", "", + "net.ly", "", "", "", @@ -3314,6 +2090,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "net.pr", "", "", "", @@ -3333,104 +2110,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gon.pk", - "", - "cam.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.sy", - "", - "", - "", - "", - "gov.my", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.by", - "", - "", - "", - "", - "gov.ky", - "", - "", - "", - "gr.com", "gov.cu", - "", + "gov.mk", "", "", "", @@ -3444,241 +2125,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "adv.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nom.li", - "", - "", - "", - "", - "gov.cd", - "", - "", - "", - "", - "bmd.br", - "", - "", - "", - "", - "gov.cm", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.tr", - "", - "", - "", - "", - "net.pn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.ru", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.tr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nym.li", - "", - "", - "", - "", - "nom.ae", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.rw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "arq.br", - "", - "", - "", - "", - "", - "net.au", - "", - "", - "rio.br", - "", - "", - "", - "", - "", - "biev\303\241t.no", - "", - "", - "", - "", - "", - "gov.cy", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.am", - "", - "", - "", - "", - "gov.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.tt", - "", - "", - "", - "", - "", - "", - "", - "", - "", "", "", "", @@ -3694,247 +2140,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "am.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ac.mu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ac.zm", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "jus.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.iq", - "", - "", - "", - "gov.st", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.bt", - "", - "", - "bu.no", - "ac.mw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.iq", - "", - "", - "ac.cy", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gets-it.net", - "", - "", - "", - "", - "", - "bod\303\270.no", - "", - "", - "", - "net.gn", - "", - "", - "", - "ac.zw", - "", - "", - "", - "", - "", - "", - "gov.mk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", + "gov.cd", "", "", "", @@ -3944,287 +2150,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "gov.cm", "", "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "grosseto.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nc.tr", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.ie", - "", - "", - "", - "", - "aa.no", - "", - "", - "", - "", - "", - "", - "gov.py", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "us.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.ar", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.ae", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.ar", - "", - "", - "", - "", - "eco.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "za.com", - "", - "", - "ac.ni", - "", - "", - "", - "", - "", - "", - "", - "bir.ru", - "", - "ac.ci", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.tn", - "", - "", - "", - "", - "", - "", - "co.bi", - "", - "", - "", - "", - "", - "gov.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.gu", + "gov.ru", "", "", "", @@ -4261,6 +2192,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "co.je", "", "", "", @@ -4268,8 +2200,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "co.rs", - "net.tr", "", "", "", @@ -4280,45 +2210,47 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "gov.sy", "", "", "", "", + "gov.my", "", "", "", "", - "co.us", - "ass.km", "", "", "", "", "", + "gov.by", "", "", "", "", - "com.ms", "", "", "", "", "", + "gov.rw", "", "", "", "", - "com.bs", "", "", "", "", "", + "gov.me", "", "", "", "", + "geo.br", "", "", "", @@ -4334,12 +2266,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.sv", "", "", "", "", - "com.mv", "", "", "", @@ -4349,116 +2279,43 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "edu.ms", "", "", "", "", - "gov.ly", "", "", - "", - "", - "edu.bs", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "wy.us", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.sv", - "", - "", - "", - "", - "edu.mv", - "", - "", - "", - "", - "com.ws", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "etc.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "est.pr", - "grong.no", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.gy", - "", - "", - "", - "", - "gov.mr", - "", - "", - "", - "", - "gov.pt", - "", - "", - "", - "", - "gov.br", - "", - "", - "", - "", - "edu.ws", - "", - "", - "nt.ro", - "", "com.ee", "", "", "", "", - "bio.br", + "net.ae", "", "", - "rec.co", - "qc.com", + "", + "", + "", + "", + "", + "", + "", + "com.lr", + "", + "", + "gov.ie", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.mv", "", "", "", @@ -4483,22 +2340,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "gov.cy", "", "", "", "", - "", - "", - "", - "gos.pk", + "gov.bt", "", "", "", "", - "", - "", - "", - "br.com", "edu.ee", "", "", @@ -4510,113 +2361,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gov.pk", + "jur.pro", "", "", + "nc.tr", + "edu.lr", "", "", "", "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "rec.ro", - "", - "", - "", - "co.je", - "gen.in", - "", - "", - "", - "", - "", - "", - "", - "\347\275\221\347\273\234.cn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\347\266\262\347\265\241.cn", - "", - "", - "nd.us", - "", - "", - "", - "", - "nm.us", - "", - "\345\205\254\345\217\270.cn", - "", - "", - "", - "", - "", - "ak.us", - "", - "ct.us", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.iq", - "", - "even\303\241\305\241\305\241i.no", - "", - "", - "", - "", + "ac.uk", "", "", "com.et", @@ -4637,12 +2393,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "com.is", + "gov.it", "", "", "", @@ -4654,7 +2405,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "edu.rs", + "com.gu", "", "", "", @@ -4663,8 +2414,28 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ny.us", - "com.sa", + "", + "", + "", + "", + "", + "co.hu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.scot", + "", + "", + "net.tr", "", "", "", @@ -4674,12 +2445,47 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gov.ge", + "", + "", + "co.ni", "", "", "", "", - "com.ba", + "", + "", + "", + "", + "", + "co.ci", + "", + "", + "", + "", + "", + "", + "", + "cnt.br", + "", + "", + "", + "", + "edu.gu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.vi", + "", + "", + "com.ar", "", "", "", @@ -4692,9 +2498,104 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "edu.is", "", "", + "edu.gd", + "", + "", + "", + "co.ls", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.sn", + "", + "", + "", + "ac.zm", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.bn", + "", + "ed.ci", + "", + "", + "", + "", + "", + "", + "", + "edu.ar", + "", + "", + "", + "", + "eco.br", + "", + "", + "", + "", + "", + "", + "", + "uk.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.pk", + "", + "", + "", + "", + "", + "", + "", + "", + "gru.br", + "", + "", + "com.in", + "", + "edu.sn", + "", + "", + "", + "", + "edu.mn", "edu.krd", "", "", @@ -4704,17 +2605,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.ua", + "edu.bn", "", "", "", "", + "edu.kn", "", "", "", "", - "ut.us", - "edu.sa", "", "", "", @@ -4729,15 +2629,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "edu.ba", "", "", "", "", "", + "ac.cy", + "com.gy", "", "", "", + "co.rw", "", "", "", @@ -4751,16 +2653,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "edu.in", "", + "com.cn", "", "", - "edu.ua", "", + "ac.zw", "", "", "", "", "", + "com.ge", "", "", "", @@ -4768,13 +2673,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "nid.io", "", "", "", "", - "gov.ir", "", - "gov.lt", + "ac.mu", + "gov.py", "", "", "", @@ -4788,13 +2694,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ne.us", "", "", "", "", "", "", + "edu.gy", "", "", "", @@ -4809,32 +2715,32 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.jo", "", "", "", "", "", + "edu.cn", "", "", "", "", + "etc.br", "", "", "", "", + "edu.ge", "", "", "", "", "", - "ac.ls", "", "", "", "", "", - "net.ar", "", "", "", @@ -4848,23 +2754,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ac.ru", "", - "gov.lk", + "com.gt", "", "", - "ar.us", "", "", - "erotica.hu", "", "", - "edu.jo", "", "", "", "", - "com.ai", "", "", "", @@ -4884,407 +2785,22 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nom.rs", + "net.lr", "", "", "", - "gov.om", "", "", "", "", "", + "ac.mw", "", "", "", "", "", - "", - "", - "", - "", - "", - "com.ps", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ac.rw", - "net.tn", - "", - "", - "", - "", - "edu.ps", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "uwu.ai", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "grimstad.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.pr", - "", - "", - "", - "", - "", - "", - "", - "", - "co.na", - "ecn.br", - "", - "", - "", - "", - "", - "", - "", - "", - "co.ca", - "", - "", - "", - "", - "", - "", - "", - "", - "uk.net", - "we.bs", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nyc.mn", - "", - "", - "", - "", - "net.ms", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.bs", - "", - "", - "", - "", - "anani.br", - "", - "", - "", - "", - "gov.mn", - "", - "", - "", - "", - "co.krd", - "", - "", - "", - "", - "gov.bn", - "", - "", - "", - "", - "gov.kn", - "", - "", - "", - "", - "net.mv", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nom.ai", - "", - "", - "", - "co.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nsw.au", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.ws", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ac.uk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gu.us", - "", - "", - "", - "", - "", - "", - "", - "railroad.museum", - "", - "", - "", - "", - "", - "", - "", - "gov.ki", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.pa", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.lv", + "gov.pt", "", "", "", @@ -5293,8 +2809,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "wi.us", - "edu.ls", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.gt", "", "", "", @@ -5309,32 +2835,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "ac.pr", - "ciencia.bo", "gov.tm", "", "", "", "", - "edu.pa", - "", - "", - "", - "nu.ca", - "gov.cn", - "", - "", - "", - "", - "edu.lv", "", "", "", @@ -5349,7 +2854,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.do", + "", + "", + "", + "", + "", + "", + "net.et", + "", + "", + "", + "", + "", "", "", "", @@ -5374,111 +2890,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gov.lr", + "net.gu", "", "", "", "", - "com.dm", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.do", - "", - "yk.ca", - "", - "", - "", - "", - "", - "", - "", - "net.et", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "r\303\270st.no", - "", - "", - "", - "", - "", - "edu.dm", - "", - "", - "net.is", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.gr", - "", - "", - "", - "", - "", - "", - "", - "gov.in", - "", - "net.sa", - "", - "", - "rec.br", - "", - "net.ma", - "", - "", - "", - "art.museum", "com.sc", "", "", "", "", - "net.ba", - "", - "", - "", - "", - "", - "", - "", - "", - "corvette.museum", "", "", "", @@ -5494,7 +2915,34 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "net.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.mr", + "gon.pk", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.br", + "", + "", + "", + "", + "", + "", + "", "", "", "", @@ -5508,12 +2956,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "and.museum", "", "", "", "", - "nrw.museum", "edu.sc", "", "", @@ -5524,6 +2970,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "net.ar", "", "", "", @@ -5534,167 +2981,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "bomlo.no", "", "", + "gov.ir", "", "", "", "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nom.pa", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "*.ye", - "", - "gen.tr", - "", - "", - "", - "", - "net.jo", - "git-repos.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.la", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nt.ca", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.ai", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.la", - "", - "", - "", - "", - "", - "", - "", - "", - "brussels.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "co.kr", - "net.ps", + "com.lv", "", "", "", @@ -5711,10 +3007,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "act.au", "", - "j\303\270rpeland.no", "", "", - "gov.pn", "", "", "", @@ -5728,17 +3022,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "co.nl", "", "", "", + "net.bn", "", "", "", "", + "net.kn", "", "", - "co.cl", "", "", "", @@ -5747,9 +3041,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "red.sv", "", "", + "*.er", "", "", "", @@ -5761,772 +3055,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "edu.lv", "", "", "", - "asn.au", - "", - "", - "", - "", - "", - "", - "", - "", - "georgia.su", - "", - "", - "", - "", - "", - "agr.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.de", - "", - "", - "ru.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.au", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "res.in", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.tt", - "", - "", - "", - "", - "", - "ngo.lk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "co.mg", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ras.ru", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "romsa.no", - "", - "", - "", - "", - "co.ma", - "nym.la", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.ls", - "", - "", - "", - "", - "net.je", - "", - "", - "", - "co.za", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.pa", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.lv", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "aremark.no", - "", - "", - "", - "", - "", - "gov.gn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.do", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cng.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.dm", - "", - "", - "", - "air.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ne.kr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "art.do", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "eng.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.sc", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.gi", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "rnu.tn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.ae", - "", - "", - "", - "as.us", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nic.in", - "wv.us", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "brussel.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "al.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.la", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.lc", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "aaa.pro", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.lc", - "", - "", - "", - "", - "", - "edunet.tn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.tr", - "", - "ri.us", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "wa.us", - "res.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "ca.us", - "", - "", "", + "edu.pn", "", "", "", @@ -6536,8 +3070,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "erotika.hu", "", "", + "net.in", "", "", "", @@ -6569,6 +3105,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "art.sn", "", "", "", @@ -6583,8 +3120,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "net.gy", "", - "eidsberg.no", "", "", "", @@ -6603,14 +3140,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "net.cn", "", "", "", "", + "gov.tt", "", "", "", "", + "net.ge", "", "", "", @@ -6623,7 +3163,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ua.rs", "", "", "", @@ -6634,11 +3173,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "cam.it", "", "", "", "", - "nv.us", "", "", "", @@ -6646,7 +3185,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "american.museum", "", "", "", @@ -6658,7 +3196,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "go.kr", "", "", "", @@ -6702,8 +3239,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nl.no", "", + "net.gt", "", "", "", @@ -6719,7 +3256,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.qa", "", "", "", @@ -6728,13 +3264,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "group.aero", "", "", "", "", "", "", + "gov.ye", + "gov.lk", "", "", "", @@ -6749,7 +3286,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "getmyip.com", "", "", "", @@ -6758,10 +3294,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "co.me", "", "", "", "", + "co.us", "", "", "", @@ -6769,7 +3307,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "edu.qa", "", "", "", @@ -6780,6 +3317,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ak.us", "", "", "", @@ -6789,10 +3327,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nym.lc", "", "", "", + "gov.au", "", "", "", @@ -6802,10 +3340,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "com.tn", "", "", "", "", + "com.gr", "", "", "", @@ -6813,14 +3353,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "aukra.no", "", "", + "cim.br", "", "", "", "", - "eng.pro", "", "", "", @@ -6835,27 +3374,28 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "aca.pro", - "greta.fr", "", + "gov.ly", "", "", - "gov.iq", "", "", + "net.sc", "", "", "", "", + "agr.br", "", "", "", "", + "gov.pr", "", "", "", - "notteroy.no", "", + "com.jo", "", "", "", @@ -6863,8 +3403,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ac.rs", - "za.net", "", "", "", @@ -6872,6 +3410,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "edu.gr", "", "", "", @@ -6880,7 +3419,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "bar.pro", "", "", "", @@ -6901,7 +3439,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "educator.aero", "", "", "", @@ -6910,7 +3447,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "go.ci", "", + "biev\303\241t.no", "", "", "", @@ -6919,9 +3458,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "jdf.br", "", "", + "edu.jo", "", "", "", @@ -6930,15 +3469,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ct.us", "", "", "", "", "", + "net.lv", "", "", "", "", + "net.pn", "", "", "", @@ -6947,34 +3489,37 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "gv.vc", "", "", "", "", "", + "gov.lt", "", "", "", "", + "udi.br", + "ngo.lk", "", "", "", "", - "bardu.no", "", "", - "co.gy", - "nat.tn", "", "", "", "", "", "", + "nd.us", "", "", "", "", + "nm.us", "", "", "", @@ -6995,11 +3540,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "bet.ar", "", "", "", "", - "nom.qa", "", "", "", @@ -7015,12 +3560,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nesna.no", "", "", "", "", "", + "co.krd", "", "", "", @@ -7040,51 +3585,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "eti.br", "", "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "r\303\245de.no", - "gov.ar", - "", - "", - "", - "co.ug", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ws.na", - "", + "gov.ae", "", "", "", @@ -7104,276 +3610,26 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "ac.se", - "com.fr", - "", - "", - "", - "", - "", - "", - "re.kr", - "", - "co.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bukhara.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nodum.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.tn", - "*.kh", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ens.tn", - "", - "", - "", - "nc.us", - "net.lc", - "", - "", - "", - "", - "", - "", - "", - "", - "us.na", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ca.na", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.ms", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.bs", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "", "", "", "", + "ny.us", "gov.mv", "", "", "", + "ut.us", "", "", - "\303\270yer.no", "", - "co.pl", "", "", "", "", "", "", + "ne.us", "", "", "", @@ -7383,8 +3639,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ns.ca", - "nom.fr", "", "", "", @@ -7394,7 +3648,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "qld.au", "", "", "", @@ -7405,7 +3658,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nord-fron.no", "", "", "", @@ -7414,7 +3666,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gov.ws", "", "", "", @@ -7423,7 +3674,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "war.museum", "", "", "", @@ -7448,7 +3698,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "zt.ua", "", "", "", @@ -7456,8 +3705,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ck.ua", - "bas.it", "", "", "", @@ -7467,9 +3714,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ah.no", "", - "gov.ee", + "ecn.br", "", "", "", @@ -7478,6 +3724,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ar.us", "", "", "", @@ -7488,7 +3735,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ga.us", + "gov.tr", "", "", "", @@ -7498,25 +3745,26 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "jus.br", "", "", "", "", "", "", + "ac.ni", "", "", "", "", "", - "net.qa", "", "", "", "", + "ac.ci", "", "", - "caa.aero", "", "", "", @@ -7544,23 +3792,24 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "co.bi", "", "", "", - "a.se", - "jeonbuk.kr", "", "", "", "", + "bir.ru", "", "", "", "", - "x.se", + "cri.br", "", "", "", + "ac.ls", "", "", "", @@ -7568,7 +3817,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ne.ug", "", "", "", @@ -7576,25 +3824,23 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "giessen.museum", - "w.se", "", + "net.tn", "", "", "", + "ac.ru", + "net.gr", "", "", "", "", "", - "c.se", "", "", "", "", - "y.se", "", - "nsn.us", "", "", "", @@ -7603,211 +3849,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nyc.museum", - "county.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "z.se", - "", - "", - "rl.no", - "", - "", - "", - "gov.rs", - "", - "", - "", - "", - "caa.li", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.et", - "", - "", - "", - "", - "", - "", - "", - "e.se", - "", - "", - "", - "", - "", - "qc.ca", - "", - "", - "", - "", - "", - "url.tw", - "", - "", - "gov.is", - "", - "", - "", - "", - "", - "", - "", - "newport.museum", - "", - "", - "", - "", - "b\303\245d\303\245ddj\303\245.no", - "", - "", - "cc.na", - "", - "", - "", - "", - "works.aero", - "gov.sa", - "", - "", - "", - "", - "gov.ma", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.ba", - "", - "nodum.co", - "", - "avellino.it", - "", - "", - "", - "u.se", - "", - "", - "", - "", - "", - "", - "", - "*.ck", - "", - "", - "cr.ua", - "", - "", - "", - "", - "", - "gov.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "naumburg.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ac.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "jolster.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\340\270\255\340\270\207\340\270\204\340\271\214\340\270\201\340\270\243.\340\271\204\340\270\227\340\270\242", - "", - "can.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "co.om", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "es.kr", - "", - "", - "", - "", "", "", "", @@ -7829,7 +3870,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gov.jo", + "", "", "", "", @@ -7838,8 +3879,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "s\303\241l\303\241t.no", "", + "net.jo", "", "", "", @@ -7864,10 +3905,25 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "edu.sg", "", "", - "n.se", + "", "", "edu.mg", "", @@ -7888,6 +3944,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ac.rw", "", "", "", @@ -7924,7 +3981,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gov.ps", "", "", "", @@ -7948,6 +4004,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "co.kr", "", "", "", @@ -7974,7 +4031,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "anthro.museum", "", "", "", @@ -7982,100 +4038,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "cal.it", "", "", "", - "enebakk.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "qsl.br", - "", - "", - "", - "go.ug", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cn.ua", - "", - "", - "", - "", - "", - "nom.nu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "r\303\270mskog.no", "", "", "", @@ -8095,6 +4060,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "com.lc", "", "", "", @@ -8104,12 +4070,186 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "gov.ee", + "", + "", + "", + "", + "", + "gos.pk", + "", + "", + "", + "nsw.au", + "", + "", + "", + "", + "gov.lr", + "", + "", + "", + "", + "", + "", + "", + "", + "co.na", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.ca", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bio.br", + "", + "", + "", + "", + "cng.br", + "", + "", + "", + "", + "edu.lc", + "", + "", + "", + "", + "net.je", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "est.pr", + "", + "yk.ca", + "", + "", + "", + "", + "gov.scot", + "", + "", + "", + "", + "", + "", + "", + "gov.et", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nyc.mn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.gu", + "", + "", + "", + "", + "eng.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gen.tr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.gd", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "com.ac", "", "", "", "", - "com.es", "", "", "", @@ -8126,15 +4266,25 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "aircraft.aero", "", - "bykle.no", + "", + "", + "", + "gov.ar", "", "", "", "", "", - "nom.ug", + "", + "", + "", + "", + "", + "", + "", + "", + "gu.us", "", "", "", @@ -8149,7 +4299,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "j\303\270lster.no", + "", + "", + "", + "", + "", + "nu.ca", + "", "", "", "", @@ -8158,8 +4314,663 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "nv.us", + "gov.mn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.bn", + "", + "", + "", + "", + "gov.kn", + "", + "", + "", + "ne.kr", + "", + "", + "", + "", + "", + "net.sg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "erotica.hu", + "", + "", + "net.kg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.in", + "", + "", + "", + "", + "", + "at.vg", + "gets-it.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.gy", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.ge", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ky.us", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nt.ca", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.nl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.cl", + "", + "", + "", + "", + "", + "net.lc", + "", + "", + "", + "", + "grosseto.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.fm", + "", + "", + "", + "ac.me", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "notteroy.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.bi", + "", + "", + "", + "", + "com.ki", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "eng.pro", + "", + "", + "", + "edu.fm", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.sc", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.bi", + "", + "", + "", + "", + "edu.ki", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.ec", + "", + "", + "", + "", + "net.ac", + "*.ck", + "", + "", + "", + "com.gn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.ci", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "qld.au", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.lv", + "", + "", + "", + "", + "gov.pn", + "", + "", + "", + "ac.be", + "", + "", + "", + "", + "", + "edu.ec", + "", + "", + "", + "", + "", + "", + "", + "", + "co.rs", + "edu.gn", + "", + "", + "", + "jolster.no", + "", + "", + "", + "", + "", + "edu.ci", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ass.km", + "", + "", + "", + "", + "com.ss", + "", + "", + "", + "", + "com.ms", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.bs", + "", + "", + "", + "", + "", + "", + "", + "gen.in", + "nc.us", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "url.tw", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.is", + "", + "edu.ss", + "", + "", + "", + "aremark.no", + "edu.ms", + "", + "", + "", + "", + "", + "", + "", + "", + "co.ua", + "edu.bs", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "za.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.ws", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.is", + "", + "", + "", + "", + "", + "", + "asn.au", + "", + "ck.ua", + "", + "", + "", + "", + "", + "", + "", "", - "edu.es", "", "", "", @@ -8182,7 +4993,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "rieti.it", + "", + "go.kr", + "", + "", + "", + "", + "", + "anani.br", + "", + "", + "", + "", + "edu.ws", "", "", "", @@ -8191,13 +5014,21 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "fuossko.no", "", "", "", "", "", "", + "", + "", + "", + "", + "co.ug", + "", + "", + "", + "", "\303\270rland.no", "", "", @@ -8222,9 +5053,539 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "withgoogle.com", "", - "asn.lv", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.rs", + "", + "", + "", + "zt.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.fm", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.za", + "net.ki", + "", + "", + "", + "", + "gov.tn", + "", + "", + "", + "", + "gov.gr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.jo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "barsy.ro", + "", + "", + "", + "", + "", + "", + "", + "com.ag", + "", + "", + "", + "", + "net.ec", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.gn", + "", + "", + "", + "", + "", + "", + "bukhara.su", + "", + "", + "net.ci", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.ma", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bloxcms.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.ps", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ac.kr", + "net.ss", + "", + "", + "", + "", + "net.ms", + "", + "", + "", + "co.ke", + "", + "", + "", + "", + "", + "net.bs", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.sa", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.ba", + "", + "", + "", + "ne.ug", + "", + "", + "", + "", + "", + "edu.ps", + "", + "", + "", + "", + "", + "", + "", + "", + "cr.ua", + "", + "", + "", + "net.is", + "co.mg", + "com.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "group.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.sa", + "bar.pro", + "", + "", + "", + "", + "", + "", + "", + "", + "nom.ag", + "", + "", + "", + "", + "edu.ba", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.ws", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cc.na", + "edu.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "educator.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "aca.pro", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "enebakk.no", + "", + "qc.ca", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kvits\303\270y.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "as.us", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "", "", "", @@ -8263,8 +5624,109 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "windmill.museum", - "gov.ls", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.sg", + "", + "", + "", + "", + "gov.mg", + "", + "", + "", + "ne.ke", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.kg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.gy", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.eg", + "", + "", + "", + "", + "net.ag", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nom.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cv.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.hk", "", "", "", @@ -8288,7 +5750,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ac.kr", + "edu.eg", "", "", "", @@ -8304,7 +5766,111 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gov.lv", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.ps", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.hk", + "", + "", + "", + "", + "", + "", + "", + "", + "eidsberg.no", + "", + "", + "", + "", + "com.pa", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.sa", + "", + "", + "", + "", + "net.ma", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.ba", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.lc", + "", + "nl.ci", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.ua", + "", + "", + "", + "ca.us", + "", + "", + "", + "", + "", + "edu.pa", "", "", "", @@ -8320,24 +5886,704 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "jeonbuk.kr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.ht", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.ai", + "", + "", + "", + "", + "", + "", + "jdevcloud.com", + "", + "", + "", + "km.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ch.tc", + "", + "", + "j\303\270rpeland.no", + "", + "", + "nom.pa", + "", + "", + "", + "go.ug", + "edu.ht", + "", + "", + "", + "", + "bel.tr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.ac", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "4u.com", + "", + "edu.ls", + "", + "", + "", + "", + "", + "", + "", + "", + "cn.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.om", + "", + "", + "", + "", + "ktistory.com", + "ac.rs", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.do", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "uwu.ai", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "zlg.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.dm", + "", + "", + "\303\245mli.no", + "", + "", + "", + "", + "eu.com", + "", + "", + "", + "", + "edu.om", + "", + "", + "", + "", + "", + "", + "net.eg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.do", + "", + "co.pw", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.hk", + "", + "", + "", + "edu.dm", + "", + "", + "", + "co.ir", + "", + "", + "", + "", + "bc.ca", + "", + "", + "", + "", + "ciencia.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bas.it", + "", + "", + "", + "", + "", + "cc.ua", + "", + "", + "", + "", + "123sait.ru", + "", + "", + "greta.fr", + "", + "", + "", + "", + "", + "", + "", + "nat.tn", + "", + "", + "", + "\303\270rskog.no", + "", + "", + "ed.pw", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.pa", + "", + "", + "no.com", + "", + "", + "uk.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "go.ke", + "", + "", + "", + "", + "co.om", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nic.in", + "ac.ug", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cal.it", + "", + "", + "", + "", + "uy.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.de", + "", + "", + "", + "", + "", + "audnedaln.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "es.kr", + "grimstad.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "asn.lv", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nt.no", + "", + "", + "gov.ki", + "", + "", + "", + "ac.za", + "", + "kr.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "net.ht", + "", + "", + "", + "gc.ca", + "", + "", + "", + "", + "al.us", + "", + "", + "", + "", + "", + "net.ai", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.ls", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "aju.br", + "git-repos.de", + "", + "", + "", + "", + "", + "", + "", + "", + "com.la", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.ec", + "", + "", + "ar.com", + "", + "", + "", + "ne.pw", + "", + "", + "gov.gn", + "", + "", + "", + "ao.it", + "", + "", + "", + "", + "", + "art.ht", + "", + "", + "", + "", + "", + "", + "fuossko.no", + "", + "", + "com.hr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.gg", + "", + "", + "", + "us.na", + "", + "", + "", + "", + "ac.ma", + "", + "", + "", + "", + "co.it", + "edu.la", + "", + "", + "", + "", + "", + "", + "", + "", + "be.gy", + "com.sl", + "", + "", + "", + "", + "com.ml", "nord-odal.no", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "rns.tn", + "net.om", "", "", "", @@ -8359,6 +6605,77 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "net.do", + "", + "", + "", + "", + "gov.ss", + "", + "", + "", + "", + "gov.ms", + "", + "", + "", + "ns.ca", + "", + "", + "", + "", + "ac.ke", + "gov.bs", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.dm", + "", + "", + "", + "", + "edu.sl", + "", + "", + "", + "", + "edu.ml", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.es", + "", + "", + "", + "ca.na", + "", + "", + "", + "", + "", + "qsl.br", + "ks.us", + "", + "", + "", + "", + "", + "", + "gov.is", + "alesund.no", + "\303\241k\305\213oluokta.no", "", "", "", @@ -8369,13 +6686,21 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gov.do", + "aaa.pro", "", "", - "cincinnati.museum", - "arendal.no", + "ce.it", + "j\303\270lster.no", "", - "bryne.no", + "", + "", + "at.it", + "", + "", + "", + "", + "", + "art.do", "", "", "", @@ -8389,6 +6714,152 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "*.jm", + "", + "edu.es", + "", + "", + "", + "ud.it", + "gov.ws", + "", + "", + "", + "", + "", + "", + "", + "", + "ct.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "adygeya.su", + "namdinh.vn", + "", + "", + "", + "", + "", + "*.kh", + "", + "", + "nu.it", + "", + "", + "", + "", + "no.it", + "", + "kv.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.rs", + "", + "", + "", + "", + "", + "", + "ad.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "caa.aero", + "", + "", + "", + "", + "", + "", + "", + "coz.br", + "", + "bu.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.jp", + "", + "", + "com.gi", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bod\303\270.no", + "", + "", + "", + "", + "", + "", + "", + "", + "ga.us", + "", + "", + "", + "", + "", "nom.es", "", "", @@ -8399,7 +6870,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gov.dm", + "ens.tn", "", "", "", @@ -8415,9 +6886,29 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "kg.kr", "", "", "", + "edu.gi", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "eun.eg", + "", + "", + "", + "ac.im", + "", + "", + "ed.jp", + "", "", "", "", @@ -8484,7 +6975,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gov.sc", "", "", "", @@ -8492,15 +6982,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "georgia.museum", "", "", "", + "net.la", "", "", "", + "ar.it", "", - "jampa.br", "", "", "", @@ -8536,11 +7026,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "barsy.net", "", "", - "from-or.com", "", + "cr.it", "", "", "", @@ -8552,11 +7041,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "g.se", "", "", "", "", + "com.pl", "", "", "", @@ -8569,32 +7058,29 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "from-ma.com", "", "", - "fm.no", + "net.sl", "", - "from-ut.com", "", "", "", + "net.ml", "", "", "", "", "", - "ac.ma", "", "", "", "", "", - "from-wa.com", "", "", "", "", - "from-mt.com", + "gov.ps", "", "", "", @@ -8604,7 +7090,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "net.sg", "", "", "", @@ -8624,8 +7109,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "net.kg", "", + "edu.pl", "", "", "", @@ -8634,15 +7119,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gov.la", "", + "gov.sa", "", "", - "ac.za", + "net.il", "", + "gov.ma", "", "", - "anthropology.museum", "", "", "", @@ -8650,10 +7135,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "gov.ba", "", "", "", - "gc.ca", "", "", "", @@ -8665,35 +7150,35 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "carrier.museum", "", "", + "ne.jp", "", "", "", "", "", - "bus.museum", - "from-wv.com", "", "", + "gov.ua", "", "", "", "", + "forum.hu", "", "", "", "", "", - "ad.jp", "", "", "", "", + "atm.pl", "", + "go.pw", "", - "norddal.no", "", "", "", @@ -8705,7 +7190,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "r\303\245holt.no", "", "", "", @@ -8715,8 +7199,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "and.mom", - "co.jp", "", "", "", @@ -8732,202 +7214,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "austin.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "computer.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "barum.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "r.se", - "ed.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\303\241k\305\213oluokta.no", - "", - "", - "", - "", - "", - "", - "", - "", - "co.ke", - "", - "", - "", - "", - "", - "", - "biella.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "rauma.no", - "", - "askvoll.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "narviika.no", - "fi.cr", - "", - "", - "", - "", - "al.us", - "com.ht", - "", - "entertainment.aero", - "", - "", - "eid.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "games.hu", - "", - "", - "from-oh.com", - "", - "", - "", - "", - "", - "", - "net.ac", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "consultant.aero", - "edu.ht", - "", - "", - "", - "", - "eastcoast.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "", "", "nhs.uk", @@ -8935,16 +7221,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.hk", - "", - "", - "egersund.no", - "", - "", - "", - "", - "yosemite.museum", - "jerusalem.museum", "", "", "", @@ -8959,6 +7235,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "berlevag.no", "", "", "", @@ -8971,35 +7248,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "cn.com", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.hk", - "", - "convent.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "usa.museum", - "", + "nom.pl", "", "", "", @@ -9018,7 +7269,24 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nm.cn", + "bygland.no", + "", + "", + "", + "", + "", + "", + "", + "barsy.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", "", "", "", @@ -9033,12 +7301,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "rissa.no", "", "", "", "", - "niepce.museum", "", "", "", @@ -9062,18 +7328,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "br.com", "", "", "", "", "", + "av.it", "", "", + "aosta.it", "", - "", - "", - "", - "", + "bo.it", "emp.br", "", "", @@ -9093,18 +7359,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "aarborte.no", "", + "jdf.br", "", "", "", "", "", "", - "skedsmo.no", "", "", - "fnd.br", "", "", "", @@ -9130,14 +7394,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "at.md", "", "", "", - "co.ir", "", "", "", - "*.jm", "", "", "", @@ -9151,20 +7414,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "jdevcloud.com", "", "", "", "", + "arq.br", "", "", "", - "circus.museum", "", "", "", - "\303\270rskog.no", - "from-mo.com", "", "", "", @@ -9183,8 +7443,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "cv.ua", - "bnr.la", "", "", "", @@ -9199,6 +7457,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "k\303\245fjord.no", "", "", "", @@ -9209,16 +7468,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "qc.com", "", + "gov.eg", "", "", "", "", "", "", - "ne.jp", "", - "raisa.no", "", "", "", @@ -9235,8 +7494,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "nl.ca", "", "", + "\303\270ksnes.no", "", "", "", @@ -9247,24 +7508,29 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "bristol.museum", "", + "bt.it", "", + "gov.hk", "", "", + "ua.rs", "", "", "", "", "", + "from-or.com", "", "", "", + "ac.ir", "", "", "", "", "", + "net.pl", "", "", "", @@ -9277,24 +7543,25 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ac.pa", + "gr.com", "", - "com.ec", "", "", "", "", "", + "from-ma.com", "", "", "", + "go.it", + "from-ut.com", "", "", "", "", "", "", - "bloxcms.com", "", "", "", @@ -9303,21 +7570,22 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "from-wa.com", "", "", "", "", + "from-mt.com", "", "", "", "", "", "", - "ne.ke", "", + "from-oh.com", "", "", - "b\303\241jddar.no", "", "", "", @@ -9329,19 +7597,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "edu.ec", "", "", "", "", "", "", + "\340\270\255\340\270\207\340\270\204\340\271\214\340\270\201\340\270\243.\340\271\204\340\270\227\340\270\242", "", "", - "ulm.museum", "", + "uk.kg", "", "", + "kr.com", "", "", "", @@ -9356,6 +7625,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "art.pl", "", "", "", @@ -9375,22 +7645,23 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nuoro.it", "", "", "", "", "", + "com.qa", + "neat-url.com", "", "", "", + "s\303\241l\303\241t.no", "", "", "", "", "", "", - "automotive.museum", "", "", "", @@ -9399,6 +7670,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "from-wv.com", "", "", "", @@ -9407,19 +7679,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ge.it", "", "", - "aju.br", "", "", + "an.it", "", "", - "com.sl", "", "", + "nm.cn", "", "", - "com.ml", "", "", "", @@ -9433,21 +7705,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ac.be", - "gov.lc", "", - "ro.im", "", "", "", "", "", - "chungnam.kr", "", "", - "grane.no", "", "", + "edu.qa", "", "", "", @@ -9456,340 +7724,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "cn.it", "", "", "", - "com.hr", "", "", "", "", - "edu.sl", "", "", "", "", - "edu.ml", - "", - "b\303\245tsfjord.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-ok.com", - "", - "", - "", - "", - "", - "", - "", - "", - "wildlife.museum", - "", - "", - "", - "", - "", - "from-nj.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "now-dns.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-il.com", - "audnedaln.no", - "", - "", - "", - "", - "", - "", - "", - "", - "from-ia.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-nv.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "british.museum", - "", - "", - "", - "berlin.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "st.no", - "", - "", - "", - "", - "b.se", - "", - "net.gg", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "asker.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.ht", - "", - "", - "", - "", - "", - "", - "", - "", - "eidskog.no", - "", - "", - "", - "", - "ac.ug", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "yn.cn", - "", - "", - "", - "", - "", - "", - "", - "go.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gd.cn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "art.ht", - "", - "", - "", - "", - "", - "", - "", - "", - "bc.ca", - "", - "net.hk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.qa", - "", - "", - "", - "", - "", - "", - "", - "", - "cc.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "go.ke", - "", - "", - "", - "", - "", - "", - "", - "romskog.no", - "", - "", - "", "", "", "", @@ -9799,7 +7745,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "\303\245s.no", "", "", "", @@ -9824,15 +7769,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.ni", "", "", "", "", "", "", + "gov.ls", "", - "from-nh.com", "", "", "", @@ -9849,18 +7793,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "from-tx.com", "", "", "", "", - "zlg.br", "", "", + "en.it", "", "", "", "", + "br.it", "", "", "", @@ -9868,20 +7812,21 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "go.jp", "", "", "", "", + "aoste.it", "", + "fm.br", "", - "edu.ni", "", "", "", "", "", "", - "gloppen.no", "", "", "", @@ -9903,7 +7848,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "barsy.site", "", "", "", @@ -9913,38 +7857,100 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "\303\245seral.no", "", "", + "nh.us", "", "", "", - "nom.cl", "", "", "", "", - "education.museum", "", "", "", "", "", - "asnes.no", "", "", "", "", - "university.museum", "", "", + "krellian.net", + "", + "", + "gop.pk", + "", + "gov.om", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.do", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "udine.it", + "", + "", + "", + "", + "", + "", + "", + "com.al", + "", + "", + "", + "", + "com.hn", + "", + "", + "", + "", + "gov.dm", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fnd.br", "", "", "", "", "", - "nl.ca", - "forum.hu", "", "", "", @@ -9976,6 +7982,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ac.pr", "", "", "", @@ -9983,244 +7990,41 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "airguard.museum", - "", - "", - "", - "", - "", - "com.pl", - "", - "4lima.at", - "", - "", - "", - "", - "", - "", - "", - "", - "gop.pk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "aosta.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.ec", - "", - "", - "", - "", - "edu.pl", - "r\303\270yken.no", - "", - "", - "", - "", - "", - "", - "rennesoy.no", - "", - "biz.mw", - "", - "", - "", - "alesund.no", - "", - "", - "", - "castres.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nom.ni", - "", - "", - "", - "", - "", - "", - "", - "egyptian.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "barsy.online", - "", - "", - "", - "com.hn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "adygeya.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", + "edu.al", "", "", "", "", "edu.hn", + "kommune.no", "", "", "", "", "", - "neat-url.com", "", "", - "from-mn.com", "", + "from-mo.com", "", + "keymachine.de", "", "", "", - "net.sl", "", "", "", "", - "net.ml", "", "", "", "", + "gr.it", "", "", - "for.sale", "", "", "", - "qa2.com", "", "", "", @@ -10230,7 +8034,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "*.fk", "", "", "", @@ -10239,33 +8042,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "artcenter.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nh.us", - "", - "", "", "", "natural.bo", @@ -10286,27 +8062,240 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nord-fron.no", + "", + "", + "", + "", + "kr.it", + "", + "", + "eidskog.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ac.jp", + "", + "", + "", + "", + "", + "", + "", + "net.qa", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "georgia.su", + "", + "", + "", + "", + "ju.mp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "getmyip.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ks.ua", + "", + "", + "", + "", + "", "gr.jp", "", "", - "atm.pl", + "enf.br", "", "", "", "", + "gov.la", "", "", "", "", "", - "newspaper.museum", "", "", "", "", - "nom.pl", "", - "*.np", "", "", "", @@ -10319,6 +8308,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "skedsmo.no", "", "", "", @@ -10328,8 +8318,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "cultural.museum", - "bel.tr", "", "", "", @@ -10344,45 +8332,45 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "for.men", "", "", "", + "naustdal.no", "", "", "", "", - "co.pn", "", "", "", "", "", + "from-va.com", "", "", "", + "ag.it", + "from-ms.com", "", - "cq.cn", + "co.ro", "", "", "", - "udine.it", - "birdart.museum", "", "", "", "", - "coldwar.museum", + "gov.sl", "", "", "", "", - "biz.id", + "gov.ml", "", "", "", - "rv.ua", "", + "from-vt.com", "", "", "", @@ -10399,11 +8387,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "barsy.io", "", "", "", "", "", + "from-ks.com", "", "", "", @@ -10419,12 +8409,373 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nom.hn", "", + "net.al", "", "", "", - "biz.cy", + "", + "net.hn", + "", + "4lima.at", + "", + "", + "", + "", + "", + "gov.il", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "www.ro", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gd.cn", + "", + "", + "", + "", + "barsy.site", + "", + "", + "", + "", + "", + "", + "", + "", + "a.se", + "", + "", + "", + "", + "", + "", + "gov.cl", + "", + "", + "", + "", + "", + "", + "", + "", + "bn.it", + "", + "", + "", + "", + "", + "", + "", + "", + "y.se", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "z.se", + "", + "", + "", + "", + "c.se", + "", + "nsn.us", + "", + "", + "", + "", + "bacninh.vn", + "", + "", + "x.se", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "123minsida.se", + "", + "", + "us.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "aa.no", + "", + "co.gg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "e.se", + "", + "eid.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.gi", + "", + "", + "", + "yn.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\303\245s.no", + "", + "", + "", + "", + "", + "", + "", + "b\303\241jddar.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "za.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "u.se", + "fi.cr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "from-ok.com", + "", + "", + "", + "", + "eidfjord.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "from-nj.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "from-tx.com", + "", + "", + "", + "", + "cya.gg", + "", + "nt.ro", + "n.se", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "from-nh.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kustanai.ru", + "", + "", + "", + "", + "", + "", + "co.tj", + "", + "", + "", + "", + "ci.it", + "", + "", + "", + "", + "ac.cn", + "jp.net", "", "", "", @@ -10441,7 +8792,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "jfk.museum", + "", + "", + "", + "from-nv.com", + "", + "", + "", + "ac.vn", + "now-dns.net", + "", + "", + "", + "co.bn", "", "", "", @@ -10450,11 +8813,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "com.ag", + "cechire.com", "", "", "", @@ -10469,7 +8828,174 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "azimuth.network", + "", + "", + "gov.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bykle.no", + "", + "", + "", + "", + "jevnaker.no", + "", + "edu.gl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fvg.it", + "", + "", + "", + "", + "", + "cs.it", + "", + "", + "", + "", + "", + "from-ca.com", + "", + "", + "", + "co.gl", + "bnr.la", + "", + "", + "", + "", + "", + "", + "", + "", + "adygeya.ru", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nj.us", + "from-ct.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.sh", + "", + "", + "", + "", + "biz.mw", + "", + "", + "", + "", + "", + "", + "", + "biz.id", + "", + "com.bh", + "", + "", + "", + "az.us", + "", + "", + "", + "", + "", + "", + "bounceme.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "chungnam.kr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\303\245lg\303\245rd.no", + "", + "barsy.bg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nittedal.no", + "", + "", + "", + "", + "edu.bh", "fam.pk", "", "", @@ -10478,17 +9004,242 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "fm.br", "", "", "", "", - "nf.ca", - "edu.gl", "", "", "", - "from-tn.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cci.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.nr", + "grp.lk", + "akamaihd.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "qa2.com", + "", + "", + "", + "evenassi.no", + "", + "", + "", + "", + "biz.my", + "", + "", + "", + "", + "b\303\241hccavuotna.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ca.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "now.sh", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bg.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.tl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "al.no", + "", + "", + "", + "", + "", + "", + "", + "biz.cy", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "aid.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nom.nc", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kep.tr", + "", + "", + "bryne.no", + "", + "b\303\245tsfjord.no", + "", + "arendal.no", "", "", "", @@ -10519,6 +9270,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "net.gl", "", "", "", @@ -10530,7 +9282,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "cymru.museum", "", "", "", @@ -10545,20 +9296,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "*.fk", "", + "b.se", "", "", "", "", "", "", - "net.il", "", "", "", "", "", - "ao.it", "", "", "", @@ -10583,10 +9334,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "na.it", "", "", "", "", + "fetsund.no", + "quangnam.vn", "", "", "", @@ -10598,44 +9352,40 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "co.it", "", - "co.place", "", "", "", "", - "and\303\270y.no", "", "", "", + "from-ga.com", "", "", "", "", + "from-md.com", "", - "net.ni", "", "", "", "", "", "", - "barsy.de", "", "", - "from-va.com", "", "", "", "", - "from-ms.com", "", "", "", "", "", "", + "com.ph", "", "", "", @@ -10649,13 +9399,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "from-vt.com", "", + "net.sh", "", "", "", "", "", + "n\303\245\303\245mesjevuemie.no", "", "", "", @@ -10664,25 +9415,27 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "net.bh", "", "", "", "", "", - "eidfjord.no", "", "", "", "", - "naustdal.no", "", "", "", "", "", "", + "biz.pk", + "nl.no", "", "", + "gov.qa", "", "", "", @@ -10693,11 +9446,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "co.gg", "", "", "", "", + "edu.ph", "", "", "", @@ -10714,32 +9467,29 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nom.gl", "", "", "", "", "", "", - "\303\270ksnes.no", "", "", "", "", "", "", - "ud.it", "", "", "", "", "", - "nom.ag", + "norddal.no", "", + "autocode.dev", "", "", "", - "cci.fr", "", "", "", @@ -10750,6 +9500,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\303\245l.no", "", "", "", @@ -10762,7 +9513,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "cechire.com", + "g.se", "", "", "", @@ -10774,6 +9525,566 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bi.it", + "", + "", + "barsy.eu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "k.se", + "bitbucket.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\303\270ygarden.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "esp.br", + "", + "", + "", + "", + "", + "kristiansund.no", + "", + "", + "", + "", + "", + "", + "", + "bs.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ai.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.am", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "al.it", + "gov.al", + "", + "ghost.io", + "", + "", + "", + "", + "", + "", + "", + "grong.no", + "srv.br", + "", + "", + "", + "games.hu", + "", + "", + "aukra.no", + "", + "from-il.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "from-ia.com", + "", + "", + "", + "", + "", + "", + "", + "", + "cl.it", + "*.bd", + "berlev\303\245g.no", + "", + "", + "go.tj", + "", + "", + "", + "", + "", + "from-co.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "we.tc", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "se.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "far.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "brescia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "emb.kw", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "us.kg", + "ggf.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.ph", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ba.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cdn77-ssl.net", + "", + "co.il", + "", + "", + "", + "", + "co.th", + "", + "", + "", + "", + "", + "", + "austevoll.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\303\245krehamn.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.ng", + "", + "", + "", + "komforb.se", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "consultant.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "biz.tt", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bardu.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.ng", + "", + "", + "", + "", + "nesna.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nt.au", + "", + "", + "", + "", + "", + "", + "", + "ac.tj", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "js.cn", "from-ar.com", "", "", @@ -10803,6 +10114,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "co.bj", "", "", "", @@ -10814,11 +10126,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "net.pl", "", + "soc.lk", "", "", - "ce.it", "", "", "", @@ -10828,18 +10139,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "sa.cr", "", "", "", + "4lima.de", "", - "ac.im", - "gov.sg", "", "", "", "", - "gov.mg", "", "", "", @@ -10854,16 +10162,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gov.kg", "", "", "", "", - "nature.museum", + "blogsyte.com", "", "", "", - "at.it", "", "", "", @@ -10873,13 +10179,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nu.it", "", "", "", "", - "no.it", - "art.pl", "", "", "", @@ -10889,10 +10192,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "castle.museum", "", "", "", + "eu.org", "", "", "", @@ -10903,18 +10206,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ct.it", "", "", "", "", + "ae.org", "", "", "", "", + "net.th", "", "", "", + "nf.ca", "", "", "", @@ -10925,21 +10230,22 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "umb.it", "", "", "", "", - "net.hn", "", "", "", - "cartoonart.museum", "", "", + "fie.ee", "", "", "", "", + "biz.pr", "", "", "", @@ -10957,7 +10263,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "from-ks.com", "", "", "", @@ -10969,9 +10274,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "cranbrook.museum", "", "", + "cpa.pro", "", "", "", @@ -10980,8 +10285,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "grp.lk", - "barsy.io", "", "", "", @@ -10989,12 +10292,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "far.br", "", "", "", "", - "aid.pl", "", "", "", @@ -11004,31 +10305,30 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "awsmppl.com", "", "", "", - "costume.museum", "", + "co.network", "", "", "", "", "", + "kh.ua", "", "", "", "", "", - "center.museum", "", - "ntdll.top", "", "", "", "", "", "", - "az.us", "", "", "", @@ -11045,16 +10345,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "gsm.pl", + "b\303\245d\303\245ddj\303\245.no", "", + "elk.pl", "", "", + "barsy.online", "", "", "", "", "", + "ah.no", "", - "fortworth.museum", "", "", "", @@ -11083,14 +10387,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "co.pl", "", "", "", "", "", - "gausdal.no", "", "", + "gjerstad.no", + "seg.br", "", "", "", @@ -11104,12 +10410,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "com.pf", "", "", "", "", - "esp.br", "", "", "", @@ -11118,17 +10422,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "cdn-edges.net", "", "", - "n\303\245\303\245mesjevuemie.no", "", "", "", - "gov.ac", "", "", "", - "js.cn", "", "", "", @@ -11139,22 +10441,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "fin.ci", "", "", "", "", - "1kapp.com", "", "", "", "", - "cya.gg", "", "", "", "", - "edu.pf", "", "", "", @@ -11162,20 +10460,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "net.ng", "", - "ar.it", "", - "r\303\241hkker\303\241vju.no", "", "", "", "", "", "", - "newhampshire.museum", "", + "biz.mv", "", "", + "aostavalley.it", "", "", "", @@ -11187,23 +10485,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "gol.no", "", "", - "from-ca.com", "", "", + "abr.it", "", - "consulting.aero", - "nittedal.no", "", "", "", - "from-in.com", "", "", "", "", - "cr.it", "", "", "", @@ -11214,8 +10509,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "from-ct.com", "", + "from-nd.com", "", "", "", @@ -11223,15 +10518,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "roros.no", - "net.gl", "", - "aoste.it", "", "", "", "", "", + "bl.it", + "quangtri.vn", "", "", "", @@ -11241,14 +10535,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "biz.at", "", "", "", - "net.ag", "", "", "", - "reg.dk", "", "", "", @@ -11257,6 +10550,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "aip.ee", "", "", "", @@ -11279,7 +10573,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "evenassi.no", "", "", "", @@ -11294,6 +10587,44 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "biz.tr", + "", + "", + "", + "", + "gda.pl", + "", + "", + "", + "", + "", + "", + "kustanai.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sd.us", + "", + "", + "", + "", "fed.us", "", "", @@ -11305,20 +10636,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "biz.pk", + "co.at", "", "", "", "", "", - "ac.jp", "", + "narviika.no", "", "", "", "", "", - "ac.cn", "", "", "", @@ -11329,27 +10659,28 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ggf.br", "", "", "", "", "", + "froland.no", "", "", + "gob.bo", "", "", "", - "andoy.no", "", + "aircraft.aero", "", - "ac.vn", "", + "barum.no", "", "", "", - "co.bn", "", + "k\303\241r\303\241\305\241johka.no", "", "", "", @@ -11357,7 +10688,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "artdeco.museum", "", "", "", @@ -11370,11 +10700,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "gov.sh", "", "", "", "", + "sa.cr", "", + "barsy.in", "", "", "", @@ -11382,6 +10715,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "gov.bh", "", "", "", @@ -11391,14 +10725,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "from-mi.com", "", "", "", + "uz.ua", "", "", "", "", "", + "com.ni", "", "", "", @@ -11408,7 +10745,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ac.ke", + "from-wi.com", "", "", "", @@ -11418,318 +10755,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "navoi.su", "", "", "", "", "", - "co.gl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sar.it", - "", - "", - "\303\245mot.no", - "", - "", - "", - "", - "", - "", - "cagliari.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "culturalcenter.museum", - "", - "", - "", - "an.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\303\245lg\303\245rd.no", - "", - "", - "", - "", - "", - "", - "western.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cn.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "go.it", - "", - "", - "", - "", - "", - "", - "russia.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "srv.br", - "", - "", - "", - "", - "com.sh", - "", - "", - "", - "", - "", - "", - "", - "", - "sa.com", - "", - "", - "", - "", - "", - "com.bh", - "", - "", - "", - "en.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ci.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\347\275\221\347\273\234.hk", - "", - "", - "urn.arpa", - "", - "\347\266\262\347\273\234.hk", - "com.eg", - "", - "", - "", - "rodoy.no", - "", - "", - "", - "", - "\347\275\221\347\265\241.hk", - "", - "", - "", - "", - "\347\266\262\347\265\241.hk", - "", - "", - "", - "", - "", - "edu.bh", - "", - "", - "", - "\345\205\254\345\217\270.hk", - "", - "", - "", - "", - "\346\224\277\345\272\234.hk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.eg", - "", - "", - "", - "\344\270\252\344\272\272.hk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "uri.arpa", - "", - "settlers.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\303\245fjord.no", - "", - "", - "", - "from-ak.com", - "", - "", - "", - "ac.ir", - "", "", "", "", @@ -11769,6 +10800,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "edu.ni", "", "", "", @@ -11783,24 +10815,24 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ge.it", "", "", "", "", + "forsand.no", "", "", "", + "even\303\241\305\241\305\241i.no", "", "", "", "", "", - "ro.it", "", - "sd.us", "", "", + "kvafjord.no", "", "", "", @@ -11811,7 +10843,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "rm.it", "", "", "", @@ -11829,13 +10860,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "biz.pr", "", "", "", "", "", - "cpa.pro", "", "", "", @@ -11863,8 +10892,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "entertainment.aero", "", "", + "caa.li", "", "", "", @@ -11876,6 +10907,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "gub.uy", "", "", "", @@ -11884,14 +10916,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "now.sh", "", "", "", + "ch.it", "", "", "", "", + "go.th", "", "", "", @@ -11901,16 +10934,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "sar.it", "", "", "", "", "", + "nom.ni", "", "", "", "", - "co.am", "", "", "", @@ -11918,797 +10952,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "adygeya.ru", "", "", "", - "geekgalaxy.com", - "", - "", - "", - "r\303\246lingen.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "contemporary.museum", - "", - "", - "", - "", - "", - "", - "", - "soc.lk", - "", - "", - "", - "wedeploy.me", - "gov.hk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bounceme.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "somna.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "smola.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sorum.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "re.it", - "", - "", - "", - "", - "", - "", - "", - "gov.nr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ask\303\270y.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.al", - "", - "barsy.eu", - "", - "", - "biz.ki", - "", - "", - "workisboring.com", - "", - "", - "", - "emb.kw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "*.bd", - "", - "", - "", - "", - "", - "", - "", - "", - "chimkent.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.al", - "", - "", - "", - "boston.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "web.bo", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ind.kw", - "gr.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.ph", - "", - "", - "aq.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "askim.no", - "", - "", - "id.ly", - "", - "", - "", - "", - "", - "", - "", - "edu.ph", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.na", - "", - "", - "", - "", - "waw.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sic.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.ec", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "se.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nom.al", - "", - "web.co", - "", - "", - "", - "s\303\274dtirol.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\303\245l.no", - "", - "", - "", - "", - "from-ga.com", - "", - "", - "", - "sc.ls", - "from-md.com", - "", - "", - "", - "", - "net.sh", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "int.bo", - "", - "net.bh", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "fie.ee", - "", - "", - "", - "", - "net.eg", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.sl", - "", - "", - "", - "", - "gov.ml", - "", - "", - "", - "", - "", - "", - "", - "int.mw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nfshost.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "web.id", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nt.au", - "", - "", - "", - "wedeploy.io", - "", - "", - "", - "", - "", - "yombo.me", - "", - "", - "", - "", - "", - "", - "", - "\303\245lesund.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "jp.net", - "", - "clinton.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "suedtirol.it", - "author.aero", - "", - "", - "", - "", - "", - "", - "fetsund.no", - "", - "", - "", - "", - "", - "from-co.net", - "", - "", - "", - "", - "", - "galsa.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "int.co", - "", - "", - "", - "", - "", - "", - "com.gh", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "eun.eg", - "", - "", - "", - "nj.us", - "computerhistory.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "av.it", - "", - "", - "", - "", - "bo.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cs.it", - "", - "", - "", - "", - "", - "edu.gh", - "", - "", - "cargo.aero", - "", - "gov.cl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "int.ru", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "biz.tt", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "westfalen.museum", "", "", "", "", + "jl.cn", "", "", "", @@ -12729,6 +10980,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "brand.se", "", "", + "ynh.fr", "", "", "", @@ -12741,7 +10993,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "rn.it", "", "", "", @@ -12758,14 +11009,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "cc.hn", "", + "co.bb", "", "", + "zp.ua", + "biz.et", "", "", "", - "gjerstad.no", "", + "com.bj", "", "", "", @@ -12778,11 +11033,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "askoy.no", "", "", "", - "gov.il", "", "", "", @@ -12811,7 +11064,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ri.it", "", "", "", @@ -12824,18 +11076,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "b\303\241hccavuotna.no", "", "", "", - "co.tj", "", "", "", "", - "ca.it", "", "", + "edu.bj", "", "", "", @@ -12844,12 +11094,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "net.al", "", "", "", "", - "fin.tn", "", "", "", @@ -12871,17 +11119,346 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "barsy.in", "", "", "", "", "", "", + "com.gh", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "from-la.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sc.ls", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.ph", + "", + "", + "", + "\303\245seral.no", + "from-ri.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.gh", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.ni", + "", + "", + "", + "", + "", + "", + "", + "biz.in", + "", + "", + "", + "urn.arpa", + "", + "ac.il", + "", + "", + "", + "gob.pk", + "ac.th", + "", + "", + "", + "", + "", + "fhv.se", + "", + "", + "", + "", + "from-ak.com", + "", + "belem.br", + "", + "", + "", + "", + "ac.pa", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "consulting.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nissedal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "g\303\274nstigliefern.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "forte.id", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "jampa.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "barsy.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gob.pe", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "barsy.info", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ngo.ph", + "", + "", + "", + "", + "", + "", + "", + "", + "wy.us", + "", + "kragero.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "barsy.menu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sic.it", + "com.na", + "", "", "", - "bulsan.it", - "gob.bo", "", "", "", @@ -12927,6 +11504,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "co.id", + "net.bj", "", "", "", @@ -12938,7 +11517,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "bt.it", "", "", "", @@ -12958,404 +11536,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "fedje.no", "", "", "", "", "", "", - "nannestad.no", - "", - "", - "", - "", - "clock.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.ph", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "umb.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "aip.ee", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gsm.pl", - "", - "", - "", - "", - "gov.pl", - "", - "", - "", - "\303\245krehamn.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "siellak.no", - "", - "", - "seoul.kr", - "jevnaker.no", - "", - "", - "", - "", - "", - "", - "", - "", - "gub.uy", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "na.it", - "", - "", - "", - "", - "", - "", - "", - "web.gu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "rad\303\270y.no", - "com.af", - "", - "", - "", - "", - "", - "cuneo.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-mi.com", - "", - "", - "", - "", - "", - "", - "", - "aknoluokta.no", - "", - "", - "", - "", - "", - "", - "edu.af", - "", - "", - "", - "", - "from-wi.com", - "", - "", - "", - "", - "", - "", - "", - "", - "civilwar.museum", - "", - "", - "", - "aostavalley.it", - "", - "", - "", - "", - "", - "", - "", - "forde.no", - "", - "", - "", - "", - "", - "", - "", - "", - "gda.pl", - "", - "", - "", - "", - "", - "", - "reklam.hu", - "cremona.it", - "", - "", - "", - "", - "", - "resistance.museum", - "", - "", - "", - "", - "br.it", - "", - "", - "", - "", - "", - "", - "", - "", - "celtic.museum", - "", - "nissedal.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "biz.at", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "brescia.it", - "", - "", - "", - "", - "", - "", - "naamesjevuemie.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "biz.tr", - "", - "", - "", - "", - "from-nd.com", - "", - "", - "ind.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nom.nc", - "", - "", - "", - "", - "jorpeland.no", - "", - "", - "", - "", - "gol.no", - "", "", "", "", @@ -13381,2540 +11567,35 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "co.id", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "web.pk", - "", - "", - "", - "", - "", - "", - "nom.af", - "", - "", - "", - "", - "in.ni", - "", - "narvik.no", - "", - "", - "abr.it", - "", - "", - "", - "", - "", - "", - "", - "emiliaromagna.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ski.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gamvik.no", - "", - "", - "", - "", - "", - "", - "", - "", - "b\303\270mlo.no", - "", - "", - "", - "gov.bf", - "", - "", - "", - "", - "co.ao", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "naroy.no", - "", - "", - "bygland.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bn.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "int.pt", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ed.ao", - "naturalsciences.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "navuotna.no", - "enonic.io", - "", - "", - "belem.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "googlecode.com", - "bi.it", - "", - "", - "", - "", - "", - "", - "", - "4lima.de", - "", - "", - "", - "", - "", - "barletta-trani-andria.it", - "", - "", - "", - "", - "", - "", - "", - "*.fj", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "jl.cn", - "", - "", - "", - "", - "", - "from-fl.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-dc.com", - "", - "", - "ind.gt", - "", - "", - "", - "", - "", - "", - "", - "b\303\246rum.no", - "", - "", - "", - "", - "", - "newyork.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "berlevag.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sochi.su", - "", - "", - "web.lk", - "ruovat.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-ri.com", - "", - "", - "fvg.it", - "", - "", - "", - "", + "barsy.me", "", "", "com.bz", "", - "", + "g\303\241\305\213gaviika.no", "", "", "com.kz", "", "", - "", - "co.il", + "egersund.no", "", "", "", "", "", "", - "nsupdate.info", + "", + "", + "", + "", + "kommunalforbund.se", "", "", "", "", "", - "", - "amusement.aero", - "go.tj", "com.uz", - "journalism.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.mz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.bz", - "", - "", - "", - "", - "edu.kz", - "avocat.fr", - "", - "", - "", - "", - "svn-repos.de", - "", - "", - "", - "co.network", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "store.ro", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "radoy.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ragusa.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "boomla.net", - "", - "co.th", - "", - "", - "gob.pe", - "", - "", - "", - "baths.museum", - "", - "its.me", - "", - "net.af", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-la.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "grozny.ru", - "", - "", - "", - "bruxelles.museum", - "neues.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "council.aero", - "", - "", - "", - "", - "", - "risor.no", - "", - "", - "", - "", - "", - "", - "", - "", - "control.aero", - "", - "", - "", - "", - "", - "int.lk", - "", - "", - "", - "", - "uz.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "architecture.museum", - "", - "", - "", - "", - "", - "ra.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "etnedal.no", - "", - "frana.no", - "", - "", - "", - "corporation.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sortland.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "giize.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "co.financial", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nym.bz", - "", - "", - "", - "", - "nym.kz", - "wa.au", - "", - "culture.museum", - "", - "biz.mv", - "", - "", - "", - "", - "nordkapp.no", - "", - "", - "gob.pk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "contemporaryart.museum", - "", - "", - "barsy.menu", - "", - "", - "", - "", - "", - "", - "cloud66.zone", - "", - "", - "", - "", - "\303\270ygarden.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "barsy.bg", - "n\303\241vuotna.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "garden.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\303\241lt\303\241.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-de.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "austevoll.no", - "", - "", - "", - "", - "co.at", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "giske.no", - "", - "", - "", - "", - "", - "feira.br", - "", - "", - "", - "", - "", - "", - "journalist.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ind.in", - "", - "", - "", - "", - "", - "com.vu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "f.se", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "go.id", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ag.it", - "", - "", - "edu.vu", - "", - "", - "", - "", - "", - "", - "ah.cn", - "", - "", - "", - "", - "qh.cn", - "", - "", - "", - "", - "", - "", - "", - "store.ve", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-sd.com", - "", - "barsy.me", - "", - "", - "", - "", - "", - "", - "", - "", - "sauda.no", - "", - "", - "", - "", - "", - "", - "", - "sc.us", - "", - "", - "", - "cistron.nl", - "", - "", - "", - "rc.it", - "", - "", - "biz.et", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bushey.museum", - "", - "", - "", - "", - "net.th", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sf.no", - "ris\303\270r.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "suisse.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gob.gt", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.tl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "biz.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "co.bb", - "", - "", - "", - "", - "gb.com", - "", - "", - "", - "net.mz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.bz", - "", - "", - "", - "", - "net.kz", - "", - "", - "", - "", - "", - "", - "", - "campidanomedio.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.uz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.sh", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.bh", - "", - "", - "", - "grondar.za", - "", - "", - "", - "int.ci", - "sm.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-nc.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.eg", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "b\303\241hcavuotna.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bs.it", - "", - "", - "revista.bo", - "", - "rovigo.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "quebec.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "fuettertdasnetz.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "educational.museum", - "", - "", - "", - "", - "", - "wallonie.museum", - "", - "", - "", - "", - "", - "zp.ua", - "", - "", - "", - "aerobatic.aero", - "", - "brindisi.it", - "", - "", - "", - "", - "", - "", - "", - "", - "radio.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.ve", - "", - "", - "navoi.su", - "", - "", - "", - "", - "", - "\303\270rsta.no", - "", - "", - "", - "", - "", - "", - "", - "", - "cinema.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "forl\303\254cesena.it", - "", - "", - "", - "", - "", - "alaska.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.ve", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "armenia.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ba.it", - "", - "", - "", - "rel.ht", - "", - "", - "", - "", - "", - "go.th", - "", - "", - "", - "", - "farsund.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "entomology.museum", - "", - "", - "", - "foz.br", - "", - "", - "", - "", - "", - "rockart.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "froland.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "elk.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "fet.no", - "", - "", - "", - "", - "", - "", - "", - "int.tt", - "", - "biz.ls", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ardal.no", - "", - "gov.al", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "s.se", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "norfolk.museum", - "", - "", - "", - "", - "net.vu", - "", - "", - "", - "", - "", - "", - "", - "", - "balsan.it", - "", - "", - "", - "", - "", - "", - "", - "", - "fhv.se", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "building.museum", - "", - "", - "", - "", - "", - "", - "", - "for.mom", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "al.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "aquila.it", - "", - "", - "", - "", - "from-ne.com", - "", - "", - "", - "", - "", - "", - "aland.fi", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cl.it", - "", - "", - "", - "", - "", - "gov.ph", - "", - "", - "", - "", - "", - "usantiques.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "svalbard.no", - "", - "", - "", - "", - "station.museum", - "", - "rhcloud.com", - "", - "", - "", - "", - "id.us", - "", - "", - "sec.ps", - "", - "", - "", - "forsand.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "campinagrande.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "rochester.museum", - "", - "", - "", - "", - "alabama.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "usdecorativearts.museum", - "", - "", - "", - "", - "", - "idv.tw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "carrd.co", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "web.tr", - "", - "", - "", - "", - "sytes.net", - "", - "", - "com.kp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.ng", - "grozny.su", - "", - "", - "", - "", - "", - "", - "nieruchomosci.pl", - "ac.tj", - "", - "", - "", - "scotland.museum", - "", - "", - "", - "sokndal.no", - "", - "", - "", - "", - "", - "england.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "snasa.no", - "", - "", - "ringsaker.no", - "edu.kp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.ng", - "", - "", - "\347\266\262\350\267\257.tw", - "", - "", - "", - "", - "", - "xz.cn", - "", - "", - "", - "", - "", - "", - "wielun.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "jamison.museum", - "", - "", - "", - "", - "", - "fredrikstad.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sor-odal.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogsyte.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.ve", - "", - "", - "", - "fermo.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "brasil.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "berlev\303\245g.no", - "rg.it", - "", - "", - "", - "eastafrica.museum", - "", - "", - "", - "averoy.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.gh", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bjark\303\270y.no", - "", - "", - "", - "", - "", - "", - "", - "", - "fl.us", - "", - "sampa.br", - "", - "", - "", - "", - "andebu.no", - "", - "", - "", - "e12.ve", - "", - "", - "", - "", - "", - "barlettatraniandria.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "avocat.pro", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "niteroi.br", - "", - "", - "jab.br", - "", - "andriabarlettatrani.it", - "", - "", - "abc.br", - "", - "", - "amsterdam.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-sc.com", - "", - "", - "", - "barsy.info", - "", - "", - "", - "", - "", - "eu.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dr.tr", - "", - "", - "", - "adv.mz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "odo.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "alessandria.it", - "", - "", - "", - "", - "", - "", - "", - "", - "bievat.no", - "", - "", - "sc.kr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "biz.dk", - "", - "", - "", - "", - "", - "", - "", - "", - "aeroport.fr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "s\303\270r-fron.no", - "", - "", - "", - "", - "", - "", - "", - "", - "ae.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\345\222\214\346\255\214\345\261\261.jp", - "", - "", - "", - "bhz.br", "", "", "", @@ -15927,34 +11608,39 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "co.ae", "", + "control.aero", "", "", "", "", "", + "g\303\274nstigbestellen.de", "", "", - "community.museum", "", "", "", + "askvoll.no", "", "", "", "", + "edu.mz", "", "", + "ngrok.dev", "", "", "", - "futurehosting.at", "", "", "", + "edu.bz", "", "", "", "", + "edu.kz", "", "", "", @@ -15966,51 +11652,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "naklo.pl", - "", - "", - "", - "", - "ac.id", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "barsy.ca", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "jambyl.su", - "", - "", - "", - "sch.so", - "", "cbg.ru", "", "", @@ -16019,7 +11660,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "usarts.museum", + "from-sd.com", "", "", "", @@ -16028,21 +11669,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "sunndal.no", "", - "gaular.no", "", "", "", "", "", "", - "rel.pl", "", - "for.one", "", "", - "ind.tn", "", "", "", @@ -16054,19 +11690,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "kvalsund.no", "", "", "", "", "", - "engerdal.no", "", + "fm.no", + "seoul.kr", "", "", "", "", "", - "skanland.no", "", "", "", @@ -16075,1257 +11712,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "consulado.st", - "", - "", - "", - "", - "", - "or.cr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gmina.pl", - "", - "", - "", - "", - "agriculture.museum", - "", - "", - "", - "", - "", - "", - "", - "namsos.no", - "", - "", - "", - "", - "bauern.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-pr.com", - "", - "or.mu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-pa.com", - "", - "", - "", - "campania.it", - "gov.af", - "", - "in.rs", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "rennes\303\270y.no", - "", - "de.com", - "in.us", - "", - "", - "", - "", - "", - "own.pm", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.ng", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "crafts.museum", - "", - "", - "", - "", - "", - "", - "com.vn", - "int.ar", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sd.cn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ngo.ph", - "", - "edu.vn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ch.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "a\303\251roport.ci", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ekloges.cy", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "stavern.no", - "", - "", - "", - "", - "", - "", - "sauherad.no", - "", - "", - "", - "", - "appspot.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.vi", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ac.il", - "", - "", - "", - "sch.id", - "", - "", - "youth.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.az", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.gp", - "artgallery.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "s\303\270rfold.no", - "", - "", - "", - "rygge.no", - "", - "", - "", - "", - "bg.it", - "edu.az", - "glass.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.gp", - "", - "", - "stalbans.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gz.cn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "conference.aero", - "workers.dev", - "", - "", - "", - "", - "", - "r\303\270yrvik.no", - "", - "in.net", - "", - "eu.int", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "brunel.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "ac.th", - "", - "", - "", - "", - "", - "", - "", - "", - "int.mv", - "", - "", - "", - "g\303\241\305\213gaviika.no", - "", - "", - "surnadal.no", - "codespot.com", - "", - "", - "", - "or.ci", - "", - "", - "", - "", - "", - "", - "store.st", - "r\303\270ros.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nesset.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gv.ao", - "", - "", - "", - "", - "natuurwetenschappen.museum", - "", - "", - "", - "", - "b.br", - "", - "", - "", - "", - "stuttgart.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "skierva.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "barsy.pro", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ski.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "astronomy.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "newjersey.museum", - "", - "", - "", - "", - "", - "", - "sandcats.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sk\303\241nit.no", - "", - "", - "", - "", - "", - "", - "", - "", - "ulvik.no", - "", - "", - "", - "", - "\345\225\206\346\245\255.tw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "does-it.net", - "", - "", - "", - "", - "gob.ar", - "", - "", - "", - "co.ag", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ab.ca", - "", - "", - "", - "", - "", - "", - "", - "xj.cn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cloudns.pw", - "", - "", - "", - "", - "", - "co.in", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.mz", - "", - "siena.it", - "", - "", - "", - "s\303\270r-odal.no", - "balestrand.no", - "", - "", - "gov.bz", - "", - "", - "", - "", - "gov.kz", - "", - "", - "", - "", - "", - "", - "", - "", - "zj.cn", - "", - "gratangen.no", - "", - "", - "gdansk.pl", - "", - "austrheim.no", - "", - "ushuaia.museum", - "", - "", - "", - "", - "", - "", - "", - "int.is", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "selbu.no", - "", - "", - "", - "", - "stord.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ac.at", - "", - "", - "", - "air-surveillance.aero", - "", - "", - "", - "", - "floro.no", - "", - "", - "", - "fastpanel.direct", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "fribourg.museum", - "gdynia.pl", - "", - "", - "", - "", - "", - "", - "jpn.com", - "freeboxos.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "agrar.hu", - "", - "", - "", - "or.pw", - "in.na", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "abo.pa", - "", - "", - "", - "", - "", - "", - "fh.se", - "", - "sogne.no", - "", - "", - "", - "sydney.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "undersea.museum", - "", - "", - "", - "", - "", - "jessheim.no", - "florida.museum", - "", - "chicago.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "b\303\241id\303\241r.no", - "", - "", - "", - "ac.gn", - "", - "", - "gob.sv", - "", - "", - "", - "", - "net.vn", - "", - "", - "", - "", - "wlocl.pl", - "", - "", - "", - "gildeskal.no", - "", - "", - "", - "", - "", - "", - "bielawa.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nb.ca", - "", - "", - "", - "cherkasy.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "flesberg.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-nm.com", - "", - "", - "", - "", - "", - "seaport.museum", - "", - "barsy.org", - "", - "", - "servemp3.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ngrok.io", - "", - "natal.br", - "sor-fron.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edugit.org", - "", - "", - "", - "gjesdal.no", - "", - "", - "", - "", - "", - "", - "catering.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "utazas.hu", - "", - "", - "", - "sch.ly", - "", - "net.vi", - "agents.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sn.cn", - "", - "", - "", - "", - "frosinone.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.az", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.gp", - "", - "", - "", - "", - "", - "", - "", - "", - "sc.ug", - "", - "", - "", - "", - "", - "", - "", - "bsb.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "*.pg", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bl.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "jewishart.museum", - "", - "", - "", - "", - "", - "north.museum", - "web.do", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gb.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cloudns.pro", - "", - "chieti.it", - "", - "", - "", - "", - "gv.at", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "slg.br", - "", "", "", "belluno.it", @@ -17358,6 +11744,481 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "futurehosting.at", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "aknoluokta.no", + "", + "gov.ng", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ah.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\345\222\214\346\255\214\345\261\261.jp", + "", + "", + "", + "", + "", + "", + "store.ro", + "", + "", + "", + "", + "", + "", + "com.tj", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "qh.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.tj", + "", + "", + "", + "", + "from-de.com", + "", + "", + "", + "", + "", + "", + "", + "", + "ac.fj", + "", + "", + "", + "", + "", + "grane.no", + "", + "", + "", + "", + "", + "", + "abc.br", + "", + "", + "", + "", + "", + "", + "", + "", + "sochi.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bhz.br", + "ac.at", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "shw.io", + "sc.us", + "", + "", + "", + "we.bs", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.iq", + "", + "", + "", + "", + "", + "ngo.ng", + "", + "", + "", + "", + "", + "", + "", + "cagliari.it", + "", + "", + "amscompute.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ngrok.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.zm", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.mz", + "", + "", + "", + "edu.iq", + "", + "", + "", + "", + "", + "net.bz", + "", + "", + "", + "", + "net.kz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "farsund.no", + "", + "", + "", + "", + "", + "net.uz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nuoro.it", + "", + "", + "edu.zm", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "wv.us", + "", + "", + "", + "", + "", + "eu.int", + "", + "krager\303\270.no", + "", + "", + "", + "", + "", + "", + "fl.us", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "agrar.hu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "azimuth.network", + "", + "", + "", + "", + "", + "", + "novecore.site", + "", + "", + "", + "nannestad.no", + "", + "", + "", + "", + "", + "", + "store.ve", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "aarborte.no", + "", + "", + "", + "", + "", + "", + "", + "kristiansand.no", + "", + "", + "", + "", + "", + "", + "", "fm.it", "", "", @@ -17365,166 +12226,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "redirectme.net", "", "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "cz.it", - "sld.pa", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "inf.cu", - "", - "", - "", - "ballooning.aero", - "", - "", - "", - "", - "", - "", - "stordal.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sld.do", - "", - "", - "", - "", - "", - "\303\245snes.no", - "", - "", - "", - "", - "", - "ia.us", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "oy.lc", - "", - "dynv6.net", - "", + "sm.ua", "", "", "", @@ -17533,16 +12239,23 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "cz.it", "", "", - "settlement.museum", + "", + "", + "", + "fet.no", "", "", "", "", "", "", - "skierv\303\241.no", + "", + "", + "", + "\303\270yer.no", "", "", "", @@ -17558,16 +12271,69 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "riopreto.br", "", + "gob.sv", + "", + "elementor.cloud", + "", + "", + "uri.arpa", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fh.se", + "", + "", + "", + "", + "builtwithdark.com", + "net.tj", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "barsy.org", + "", + "", + "", + "", + "", + "", + "adv.mz", "", "", "", "fe.it", + "fin.tn", "", "", "", - "sch.ir", "", "", "", @@ -17577,6 +12343,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "n\303\241vuotna.no", "", "", "", @@ -17585,19 +12352,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "recht.pro", "", "", "", "", "", + "fuettertdasnetz.de", "", "", "", "", "", "", - "gangwon.kr", "", "", "", @@ -17616,24 +12382,24 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "aq.it", "", "", "", "", - "\303\245rdal.no", "", "", "", "", "", "", - "rendalen.no", "", "", "", "", "", "", + "co.financial", "", "", "", @@ -17641,16 +12407,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "svn-repos.de", "", + "gausdal.no", "", - "nalchik.su", "", - "sch.lk", "", - "brasilia.me", "", "", - "creation.museum", "", "", "", @@ -17661,17 +12425,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "eco.bj", "", "", "", "", - "bamble.no", "", "", "", + "jotelulu.cloud", "", "", + "gv.at", "", + "*.pg", "", "", "", @@ -17683,21 +12450,26 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "foz.br", + "cloud66.zone", "", "", "", + "com.kp", "", "", "", "", + "flesberg.no", "", - "emergency.aero", "", + "sonla.vn", "", "", "", "", "", + "net.iq", "", "", "", @@ -17722,6 +12494,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "go.id", + "com.pf", "", "", "", @@ -17742,10 +12516,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "crotone.it", "", "", + "net.zm", "", + "edu.kp", "", "", "", @@ -17764,6 +12539,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "kasserver.com", "", "", "", @@ -17771,7 +12547,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "genoa.it", "", "", "", @@ -17780,19 +12555,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "soundcast.me", + "bjark\303\270y.no", "", "", "", "", + "edu.pf", "", "", "", "", "", - "evenes.no", - "gov.ve", - "int.la", "", "", "", @@ -17805,72 +12578,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "fjell.no", "", + "barueri.br", "", "", "", "", "", - "", - "bedzin.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "snillfjord.no", - "", - "", - "", - "", - "software.aero", - "", - "", "us.org", - "flora.no", "", "", "", @@ -17891,10 +12606,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "rakkestad.no", - "baltimore.museum", "", "", + "council.aero", "", "", "", @@ -17908,7 +12622,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "inf.mk", "", "", "", @@ -17926,233 +12639,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gob.pa", - "", - "fr.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "za.org", - "", - "", - "", - "", - "fin.ec", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "freemasonry.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "government.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cloudera.site", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gob.do", - "cloudfront.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vet.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.nf", - "", - "", - "", - "so.it", - "", - "", - "", - "", - "", - "js.org", - "dr\303\270bak.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gaivuotna.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "ap.it", "", "", @@ -18174,7 +12660,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "sweden.museum", + "sc.kr", + "", + "", + "", + "", + "", + "", + "", + "", + "fr.it", "", "", "", @@ -18190,14 +12685,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "saotome.st", + "za.org", "", "", "", @@ -18225,7 +12713,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "servecounterstrike.com", "", "", "", @@ -18235,6 +12722,475 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "bomlo.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "js.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ac.id", + "", + "", + "", + "", + "", + "", + "", + "gb.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "am.in", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "certmgr.org", + "", + "", + "", + "co.in", + "", + "", + "", + "", + "zj.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "xj.cn", + "", + "", + "feira.br", + "", + "", + "", + "edgestack.me", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "avellino.it", + "", + "com.az", + "", + "", + "", + "", + "gov.gh", + "ac.ae", + "nog.community", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gob.ar", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "store.st", + "", + "edu.az", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\303\245mot.no", + "", + "", + "", + "", + "", + "", + "", + "", + "bz.it", + "", + "", + "ekloges.cy", + "", + "", + "jessheim.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "from-ne.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "s\303\274dtirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "uk.in", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ac.gn", + "appspot.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "col.ng", + "", + "", + "", + "", + "", + "", + "", + "", + "xz.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "suedtirol.it", + "", + "", + "", + "", + "", + "", + "", + "nordkapp.no", + "", + "", + "", + "", + "", "blogsite.org", "", "", @@ -18244,6 +13200,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "b.br", + "sch.so", + "gob.gt", "", "", "", @@ -18253,13 +13212,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "fundacio.museum", "", - "airport.aero", "", "", "", - "\345\261\261\345\217\243.jp", "", "", "", @@ -18269,112 +13225,3487 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "\345\257\214\345\261\261.jp", "", "", "", "", - "\345\262\241\345\261\261.jp", "", "", "", "", - "\345\261\261\346\242\250.jp", "", "", "", - "zhitomir.ua", - "\345\263\266\346\240\271.jp", "", "", - "judaica.museum", "", - "\345\261\261\345\275\242.jp", "", "", "", "", - "\344\272\254\351\203\275.jp", "", "", "", "", - "\346\235\261\344\272\254.jp", "", "", "", "", - "\345\276\263\345\263\266.jp", - "barsy.support", "", "", "", - "\347\276\244\351\246\254.jp", "", "", "", - "selje.no", - "\351\263\245\345\217\226.jp", "", "", "", "", - "jefferson.museum", "", "", "", "", - "\345\205\265\345\272\253.jp", - "baidar.no", "", "", "", - "\345\256\256\345\264\216.jp", "", "", "", "", - "\345\262\251\346\211\213.jp", + "", + "", + "", + "biz.ki", + "*.np", + "", + "", + "", + "", + "", + "", + "", + "sch.id", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.pn", + "", + "", + "", + "", + "", + "", + "", + "app.br", + "", + "cq.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.mz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.bz", + "", + "", + "", + "", + "gov.kz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.af", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "consulado.st", + "", + "geekgalaxy.com", + "", + "net.az", + "", + "", + "", + "", + "", + "", + "", + "from-mn.com", + "er.in", + "", + "", + "", + "", + "", + "", + "", + "", + "amot.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gratangen.no", + "", + "", + "", + "", + "", + "", + "", + "", + "from-fl.com", + "", + "gmina.pl", + "", + "", + "edu.af", + "", + "", + "", + "", + "from-dc.com", + "", + "", + "", + "", + "", + "", + "", + "emiliaromagna.it", + "", + "", + "slg.br", + "h\303\241bmer.no", + "brasilia.me", + "", + "", + "", + "genoa.it", + "", + "", + "biz.ss", + "", + "", + "", + "", + "", + "", + "", + "", + "b\303\241hcavuotna.no", + "", + "", + "jab.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bergamo.it", + "", + "", + "", + "", + "wi.us", + "", + "", + "", + "arts.ro", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "st.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ab.ca", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fc.it", + "", + "", + "", + "from-tn.com", + "", + "", + "adobeaemcloud.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "keliweb.cloud", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bib.br", + "", + "", + "", + "", + "", + "", + "conference.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.tj", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "from-pr.com", + "", + "", + "", + "", + "", + "", + "", + "", + "bj.cn", + "", + "", + "", + "", + "", + "from-pa.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "servemp3.com", + "", + "", + "", + "", + "", + "", + "", + "sc.ug", + "", + "", + "", + "jp.md", + "", + "co.ag", + "", + "", + "", + "", + "", + "bsb.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "surnadal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "fin.ec", + "", + "", + "fedje.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "engerdal.no", + "", + "", + "", + "", + "fin.ci", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "karelia.su", "", "", "", "", - "\345\256\256\345\237\216.jp", - "naples.it", "", "", "", - "\351\225\267\345\264\216.jp", - "jondal.no", "", "", "", - "\347\247\213\347\224\260.jp", "", "", "store.dk", "", - "\347\237\263\345\267\235.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.iq", + "yombo.me", + "", + "com.vu", + "", + "", + "", + "ntdll.top", + "", + "", + "", + "", + "nb.ca", + "", + "", + "", + "", + "", + "", + "askim.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.zm", + "jp.kg", + "", + "", + "", + "", + "", + "gov.bf", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.vu", + "", + "", + "", + "", + "", + "kvanangen.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.zw", + "", + "net.af", + "", + "", + "", + "wa.us", + "", + "adobeaemcloud.net", + "", + "", + "", + "", + "", + "", + "", + "", + "so.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bolzano.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "b\303\241id\303\241r.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sc.ke", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kvinesdal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "namdalseid.no", + "", + "", + "biz.ua", + "", + "", + "", + "cn.in", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fg.it", + "", + "", + "com.ve", + "", + "", + "", + "jpn.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kv\303\246fjord.no", + "", + "", + "", + "", + "karacol.su", + "", + "", + "sec.ps", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edu.ve", + "barsy.pro", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hu.net", + "", + "", + "", + "", + "", + "", + "forde.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.technology", + "", + "", + "", + "gz.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "med.sd", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "f.se", + "ac.in", + "", + "", + "", + "", + "", + "", + "", + "", + "me.vu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "conn.uk", + "", + "", + "", + "", + "", + "", + "", + "sch.ir", + "", + "", + "", + "googlecode.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "empresa.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nom.ve", + "", + "", + "gov.kp", + "", + "", + "", + "", + "", + "bremanger.no", + "klabu.no", + "", + "", + "", + "codespot.com", + "", + "", + "", + "", + "", + "net.vu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bokn.no", + "", + "muos\303\241t.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "works.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "jorpeland.no", + "galsa.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "andoy.no", + "", + "", + "", + "niteroi.br", + "\303\245fjord.no", + "", + "", + "", + "", + "sr.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "zgora.pl", + "", + "", + "aeroport.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "caobang.vn", + "", + "", + "", + "arts.co", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ws.na", + "", "", "", "", "fi.it", - "\351\235\231\345\262\241.jp", "", "", "", "", - "\351\246\231\345\267\235.jp", "", "", "", "", - "com.tj", "", "", "", "", - "bilbao.museum", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "asker.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.nl", + "", + "sch.lk", + "", + "", + "", + "", + "", + "", + "", + "servecounterstrike.com", + "", + "bbs.tr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.ve", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "n\303\246r\303\270y.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sch.ly", + "", + "", + "", + "", + "", + "", + "", + "", + "from-nc.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "biz.ls", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.gp", + "sorum.no", + "", + "", + "", + "", + "", + "", + "elementor.cool", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "svalbard.no", + "jgora.pl", + "", + "", + "bluebite.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sv.it", + "", + "", + "", + "6g.in", + "gov.az", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "quangninh.vn", + "", + "", + "", + "", + "", + "", + "", + "5g.in", + "edu.gp", + "", + "", + "siena.it", + "", + "", + "", + "", + "", + "camau.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "somna.no", + "cremona.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "smola.no", + "", + "", + "", + "", + "", + "", + "", + "", + "siellak.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cloudns.cc", + "sd.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "from-nm.com", + "sch.ae", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dr.tr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "natal.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "biz.dk", + "", + "", + "", + "", + "", + "adobeioruntime.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "serveirc.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gjemnes.no", + "", + "", + "", + "", + "", + "", + "", + "freeboxos.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "det.br", + "", + "agdenes.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "me.uk", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ai.in", + "", + "", + "", + "", + "", + "", + "krokstadelva.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "armenia.su", + "", + "", + "", + "", + "", + "", + "", + "bearalv\303\241hki.no", + "", + "", + "from-hi.com", + "", + "", + "", + "med.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "authgear-staging.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nalchik.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.gp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.af", + "", + "", + "", + "", + "", + "", + "", + "", + "cs.in", + "", + "", + "gob.ec", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "broke-it.net", + "", + "", + "", + "", + "sjc.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "qcx.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gildesk\303\245l.no", + "", + "", + "", + "", + "abo.pa", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "editorx.io", + "", + "", + "", + "", + "aver\303\270y.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "senseering.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "us.in", + "blogspot.md", + "", + "", + "", + "", + "", + "", + "", + "nyan.to", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.fj", + "", + "", + "", + "", + "", + "s\303\270rfold.no", + "adobeio-static.net", + "", + "gjesdal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "nieruchomosci.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "asnes.no", + "", + "balestrand.no", + "", + "", + "c.la", + "", + "", + "", + "", + "", + "", + "s\303\270r-odal.no", + "", + "", + "", + "blogspot.dk", + "", + "", + "", + "", + "blogspot.mk", + "", + "", + "", + "", + "", + "", + "", + "ca.in", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "avocats.bj", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.td", + "", + "", + "", + "", + "kalmykia.ru", + "", + "", + "", + "\346\224\277\345\272\234.\351\246\231\346\270\257", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\347\266\262\347\265\241.\351\246\231\346\270\257", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "med.ly", + "", + "", + "", + "kiengiang.vn", + "co.ao", + "", + "", + "", + "", + "", + "blogspot.my", + "", + "", + "", + "", + "blogspot.tw", + "", + "", + "askoy.no", + "", + "med.pro", + "", + "", + "", + "\345\205\254\345\217\270.\351\246\231\346\270\257", + "", + "", + "", + "", + "", + "", + "", + "", + "giize.com", + "", + "blogspot.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nic.tj", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "br\303\270nn\303\270y.no", + "", + "me.tc", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "from-in.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ed.ao", + "", + "sor-fron.no", + "", + "", + "", + "", + "", + "dev.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ulvik.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cherkasy.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "czest.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "biz.pl", + "", + "", + "\327\246\327\224\327\234.\327\231\327\251\327\250\327\220\327\234", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "spdns.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.ru", + "", + "", + "", + "", + "from-sc.com", + "", + "", + "", + "", + "blogspot.ro", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gildeskal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sauherad.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "a.bg", + "", + "", + "", + "", + "2.bg", + "", + "", + "", + "", + "4.bg", + "", + "", + "", + "", + "3.bg", + "", + "", + "", + "", + "1.bg", + "", + "", + "", + "", + "y.bg", + "", + "gov.ve", + "", + "", + "0.bg", + "cafjs.com", + "", + "", + "", + "z.bg", + "", + "", + "", + "", + "c.bg", + "", + "", + "", + "s.se", + "6.bg", + "", + "", + "", + "", + "x.bg", + "", + "", + "", + "", + "9.bg", + "", + "", + "", + "", + "net.fj", + "", + "", + "", + "", + "5.bg", + "", + "", + "", + "", + "8.bg", + "", + "", + "", + "", + "q.bg", + "", + "", + "eek.jp", + "", + "7.bg", + "batsfjord.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "servers.run", + "", + "", + "ski.no", + "", + "austrheim.no", + "", + "", + "", + "", + "", + "", + "", + "", + "biz.nr", + "", + "", + "", + "", + "", + "", + "cargo.aero", + "", + "kicks-ass.net", + "e.bg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ybo.science", + "", + "", + "", + "", + "", + "", + "", + "", + "gentapps.com", + "", + "sk\303\241nit.no", + "", + "sn.cn", + "", + "gonna.jp", + "", + "", + "co.place", + "", + "", + "", + "", + "", + "ballangen.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kutno.pl", + "", + "sa.com", + "augustow.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "u.bg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.re", + "", + "", + "", + "etnedal.no", + "", + "", + "", + "", + "", + "akita.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gouv.ml", + "", + "n.bg", + "cb.it", + "", + "", + "", + "", + "", + "ustka.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bytom.pl", + "", + "", + "", + "", + "", + "sokndal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kvitsoy.no", + "", + "", + "", + "", + "", + "frana.no", + "", + "", + "", + "", + "si.it", + "", + "aurskog-h\303\270land.no", + "", + "", + "", + "", + "", + "", + "", + "", + "fly.dev", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.vn", + "", + "karlsoy.no", + "syno-ds.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sc.cn", + "", + "", + "", + "m\303\245lselv.no", + "", + "", + "", + "", + "", + "j.bg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "flakstad.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "namsskogan.no", + "", + "", + "", + "", + "", + "", + "", + "", + "de.ls", + "edu.vn", + "", + "", + "", + "", + "", + "", + "belau.pw", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ss.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "med.ee", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "1kapp.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.mr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "barsy.ca", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gob.pa", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "md.ci", + "", + "kviteseid.no", + "", + "", + "", + "", + "blogspot.kr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "grondar.za", + "", + "sld.pa", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "en-root.fr", + "", + "", + "", + "", + "", + "", + "ferrara.it", + "kautokeino.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "no-ip.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\303\245lesund.no", + "", + "", + "", + "msk.su", + "", + "", + "", + "", + "", + "", + "no-ip.info", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "boo.jp", + "", + "", + "", + "", + "", + "sch.jo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "chimkent.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "naroy.no", + "", + "", + "", + "", + "sa.it", + "commune.am", + "", + "", + "", + "blogspot.no", + "", + "", + "", + "", + "", + "", + "", + "", + "arkhangelsk.su", + "blogdns.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.vc", + "", + "elverum.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "holt\303\245len.no", + "", + "", + "", + "", + "", + "sykkylven.no", + "", + "", + "", + "", + "boy.jp", + "", + "", + "", + "", + "", + "", + "", + "cloudfront.net", + "journalist.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "msk.ru", + "w.se", + "", + "", + "", + "", + "", + "", + "", + "sci.eg", + "", + "", + "", + "", + "", + "edu.vc", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "but.jp", + "", + "", + "", + "", + "unj\303\241rga.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kharkov.ua", + "gob.do", + "", + "", + "", + "", + "", + "", + "", + "giske.no", + "", + "", + "", + "", + "", + "", + "", + "me.ss", + "", + "", + "", + "net.vn", + "yolasite.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "at-band-camp.net", + "", + "", + "", + "", + "b.bg", + "", + "", + "", + "", + "", + "sld.do", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.com", + "", + "", + "", + "cuneo.it", + "sos.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "akamaiedge.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "assur.bj", + "skierva.no", + "", + "", + "", + "", + "", + "", + "gloppen.no", + "", + "", + "", + "", + "sn\303\245ase.no", + "", + "", + "", + "", + "", "", "", "", @@ -18384,74 +16715,61 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "\347\246\217\344\272\225.jp", "", "", "", "", - "\347\246\217\345\263\266.jp", "", "", "", "", - "\346\226\260\346\275\237.jp", "", "", "", "", - "\347\246\217\345\262\241.jp", "", "", "", "", - "\345\262\220\351\230\234.jp", "", "", "", "", - "\345\244\247\351\230\252.jp", "", "", "", "", - "\351\253\230\347\237\245.jp", "", "", "", "", - "edu.tj", "", "", + "br\303\270nn\303\270ysund.no", "", "", - "gov.kp", "", "", + "klepp.no", "", - "empresa.bo", - "\351\235\222\346\243\256.jp", "", "", "", "", - "gov.ng", "", - "ven.it", + "does-it.net", "", "", - "\350\214\250\345\237\216.jp", "", - "gildesk\303\245l.no", - "inf.br", "", "", "", "", "", - "arkhangelsk.su", "", "", "", + "mo\303\245reke.no", "", "", "", @@ -18459,12 +16777,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "\345\237\274\347\216\211.jp", "", "", "", "", "", + "kapsi.fi", "", "", "", @@ -18478,8 +16796,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "sport.hu", "", - "beauxarts.museum", "", "", "", @@ -18502,12 +16820,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "g.bg", "", "", - "\351\225\267\351\207\216.jp", "", "", "", + "aurskog-holand.no", "", "", "", @@ -18519,8 +16838,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "kinghost.net", "", - "ac.ae", + "workers.dev", "", "", "", @@ -18530,7 +16850,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "i.se", "", "", "", @@ -18539,10 +16858,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "comunica\303\247\303\265es.museum", "", "", - "national.museum", "", "", "", @@ -18551,6 +16868,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "amusement.aero", "", "", "", @@ -18562,16 +16880,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "blogdns.net", "", "", - "assisi.museum", "", "", "", "", + "k.bg", "", "", "", + "alstahaug.no", "", "", "", @@ -18587,6 +16907,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "s\303\270r-fron.no", "", "", "", @@ -18595,14 +16916,49 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "froya.no", "", "", "", "", "", - "2ix.at", - "inder\303\270y.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sel.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bielawa.pl", + "", + "", + "", + "", + "net.vc", + "", "", "", "de.us", @@ -18619,6 +16975,91 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "fermo.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ybo.trade", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sogne.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "codeberg.page", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "", "", "", @@ -18647,27 +17088,31 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "civilisation.museum", "", - "nom.tj", + "caserta.it", "", "", "", "", - "\344\270\211\351\207\215.jp", "", "", "", + "gunma.jp", + "mo.us", "", "", "", + "gob.es", "", "", "", "", + "des.br", + "md.us", "", "", "", + "kochi.jp", "", "", "", @@ -18677,11 +17122,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "cistron.nl", "", "", "", - "serveirc.com", "", + "ardal.no", "", "", "", @@ -18701,11 +17147,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "girly.jp", "", + "ddnss.de", + "gov.fj", "", - "sr.it", + "gob.cl", "", "", + "sunndal.no", "", "", "", @@ -18725,6 +17175,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "krym.ua", "", "", "", @@ -18734,9 +17185,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "c.la", + "caltanissetta.it", "", "", + "barletta-trani-andria.it", "", "", "", @@ -18746,12 +17198,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "edu.za", "", "", "", "", "", - "enterprisecloud.nu", "", "", "", @@ -18771,8 +17223,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "me.us", "", - "3utilities.com", "", "", "", @@ -18785,45 +17237,44 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "soundcast.me", "", + "mus.br", + "nerdpol.ovh", "", "", "", + "nalchik.ru", "", - "ybo.review", "", "", - "baseball.museum", "", "", "", "", "", "", + "konskowola.pl", "", "", "", "", "", - "avoues.fr", "", "", - "bj.cn", - "rec.ve", + "hi.us", "", "", "", - "ravendb.community", "", "", + "mt.us", "", "", "", - "biz.nr", - "asmatart.museum", - "rotorcraft.aero", "", "", + "eurodir.ru", "", "", "", @@ -18837,34 +17288,33 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "no-ip.org", "", "", "", - "umbria.it", "", "", + "karm\303\270y.no", "", "", "", "", "", "", - "from-hi.com", "", "", "", "", "", + "slz.br", "", - "os\303\270yro.no", "", - "sc.cn", "", - "bremanger.no", "", "", "", "", + "gv.ao", "", "", "", @@ -18874,6 +17324,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "kr\303\245anghke.no", + "kalmykia.su", "", "", "", @@ -18884,21 +17336,130 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "net.nf", "", "", + "nom.za", "", "", "", "", "", "", + "brindisi.it", "", "", + "notaires.km", "", "", "", - "sogndal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "carrd.co", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sondrio.it", + "", + "", + "gouv.ht", + "", + "", + "", + "", + "", + "", + "", + "sauda.no", + "", + "", + "", + "", + "", + "", + "awsglobalaccelerator.com", + "", + "goupile.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mar.it", + "", + "", + "biz.gl", + "", + "", + "", + "", + "", + "", + "", + "", + "vet.br", + "", + "", + "g12.br", + "nordreisa.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "skedsmokorset.no", + "", + "", + "", + "", + "", + "gran.no", + "", "", "", "", @@ -18914,18 +17475,1568 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "caserta.it", + "akamaihd-staging.net", "", "", "", "", "", "", - "glitch.me", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cloudns.us", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "folldal.no", + "air-surveillance.aero", + "", + "", + "", + "", + "edu.dz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bajddar.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mol.it", + "", + "", + "", + "", + "", + "naklo.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.za", + "\304\215\303\241hcesuolo.no", + "", + "", + "2ix.at", + "", + "frosinone.it", + "", + "", + "", + "", + "", + "waw.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.tz", + "", + "", + "", + "", + "co.nz", + "", + "", + "", + "", + "", + "", + "x0.to", + "", + "", + "co.cz", + "", + "", + "birkenes.no", + "", + "", + "", + "mat.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sakuratan.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "servesarcasm.com", + "", + "", + "", + "", + "", + "", + "", + "", + "fla.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "x0.com", + "blogspot.sk", + "", + "", + "hoylandet.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.ie", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "e4.cz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "crotone.it", + "", + "", + "kyiv.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "web.bo", + "", + "", + "kiev.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hm.no", + "", + "", + "", + "bmoattachments.org", + "", + "sch.ss", + "co.sz", + "", + "", + "sogndal.no", + "", + "", + "", + "", + "blogspot.it", + "", + "", + "", + "", + "", + "eero-stage.online", + "", + "", + "", + "", + "hu.com", + "", + "", + "", + "blogspot.se", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "arts.ve", + "skierv\303\241.no", + "web.id", + "", + "", + "", + "", + "", + "", + "", + "gaivuotna.no", + "", + "blogspot.am", + "", + "ven.it", + "", + "", + "", + "", + "gov.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hk.com", + "", + "", + "", + "", + "", + "", + "", + "blackbaudcdn.net", + "", + "", + "", + "", + "web.co", + "", + "dy.fi", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "baria-vungtau.vn", + "", + "kirkenes.no", + "", + "", + "", + "dr\303\270bak.no", + "kafjord.no", + "", + "", + "net.dz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\347\275\221\347\273\234.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ne.tz", + "spdns.eu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cloudns.eu", + "", + "", + "", + "", + "", + "bip.sh", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\347\266\262\347\265\241.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cloudns.pw", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.gr", + "", + "", + "", + "", + "\345\205\254\345\217\270.cn", + "coop.tt", + "", + "faststacks.net", + "", + "art.dz", + "", + "", + "", + "kicks-ass.org", + "", + "", + "", + "akamaiorigin.net", + "", + "", + "selbu.no", + "", + "", + "", + "", + "", + "", + "h\303\245.no", + "", + "", + "", + "hs.kr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dr.na", + "", + "", + "", + "blogspot.ae", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "government.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "zarow.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "com.nf", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bitbridge.net", + "", + "", + "net.nz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bahccavuotna.no", + "", + "dc.us", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bjerkreim.no", + "com.vi", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dyn53.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sor-odal.no", + "aichi.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.cv", + "", + "", + "", + "", + "", + "", + "gov.vc", + "mn.us", + "", + "", + "", + "", + "", + "", + "", + "ybo.review", + "", + "", + "", + "", + "", + "", + "", + "", + "edgeapp.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "curv.dev", + "", + "", + "", + "", + "k12.tr", + "", + "", + "", + "", + "brumunddal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "barsy.pub", + "sch.sa", + "", + "", + "", + "", + "", + "", + "", + "jelastic.team", + "", + "", + "", + "", + "", + "barsy.support", + "", + "", + "", + "", + "", + "sampa.br", + "", + "", + "", + "", + "campinagrande.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kids.us", + "", + "", + "web.pk", + "", + "", + "", + "gob.hn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fjell.no", + "unjarga.no", + "", + "", + "union.aero", + "econo.bj", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.vn", + "", + "", + "cloudns.pro", + "", + "", + "", + "", + "", + "", + "sandnes.no", + "", + "", + "", + "", + "bambina.jp", + "", + "", + "", + "", + "", + "", + "aland.fi", + "", + "", + "", + "", + "", + "stavern.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.lu", + "", + "co.uz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "shopselect.net", + "", + "", + "", + "cx.ua", + "", + "", + "", + "stordal.no", + "", + "", + "hemsedal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "airline.aero", + "", + "", + "", + "", + "", + "\303\241laheadju.no", + "", + "fj.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "flora.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "selje.no", + "now-dns.org", + "", + "", + "", + "", + "", + "", + "", + "aivencloud.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "serveexchange.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "coop.mv", + "", + "", + "", + "", + "snasa.no", + "", + "", + "", + "fhs.no", + "", + "", + "", + "", + "", + "", + "", + "withgoogle.com", + "", + "", + "sandcats.io", + "", + "", + "", + "net.nf", + "", + "", + "", + "", + "enscaled.sg", + "", + "go.tz", + "", + "", + "", + "", + "", + "", + "med.ec", + "", + "", + "", + "", + "", + "ybo.party", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.vi", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kopervik.no", + "", + "", + "", + "", + "", + "", + "drr.ac", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "angry.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sortland.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "wedeploy.me", + "mk.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kharkiv.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edgesuite.net", + "", + "", + "", + "", + "smushcdn.com", + "", + "", + "blogspot.lt", + "", + "co.mz", + "", + "", + "", + "", + "", + "", + "angiang.vn", + "", + "", + "chernovtsy.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "grajewo.pl", + "", + "", + "", + "", + "", + "", + "", + "aero.tt", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sorreisa.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "balsfjord.no", + "", + "", + "", + "barsy.uk", + "", + "csx.cc", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "conf.lv", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gov.za", + "", + "", + "", + "", + "hs.run", + "", + "", + "services.aero", + "", + "", + "", + "blogspot.ug", + "", + "", + "software.aero", + "", + "", + "", + "", + "", + "a\303\251roport.ci", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ac.tz", + "", + "", + "", + "", + "ac.nz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "boavista.br", + "jeju.kr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fukui.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gouv.km", + "", + "", + "", + "", + "", + "", + "", + "", + "kvam.no", + "hzc.io", + "", + "", + "", + "", + "", "", "", "", @@ -18948,7 +19059,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "sc.ke", "", "", "", @@ -18958,50 +19068,44 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "folldal.no", - "edu.dz", + "web.lk", "", "", "", - "ascolipiceno.it", + "\351\271\277\345\205\220\345\263\266.jp", "", "", "", - "novara.it", "", "", "", "", - "org.mu", "", "", "", "", - "org.so", "", "", "", "", - "org.mo", + "hurum.no", "", "", "", "", + "nfshost.com", "", "", "", "", "", - "org.bo", "", "", "", "", - "org.sd", "", "", "", - "in.ua", "", "", "", @@ -19014,25 +19118,27 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ok.us", "", "", "", - "agdenes.no", "", "", + "servegame.com", + "up.in", "", - "org.bm", + "siiites.com", + "ac.sz", "", "", "", "", - "org.km", - "sn\303\245ase.no", + "stj\303\270rdal.no", "", "", + "sh.cn", "", "", + "net-freaks.com", "", "", "", @@ -19042,6 +19148,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ngo.za", + "com.sb", "", "", "", @@ -19050,465 +19158,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "berg.no", "", "", - "org.mw", "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.bw", - "", - "", - "", - "americanart.museum", - "org.kw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "syno-ds.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sch.ae", - "", - "", - "cloudcontrolapp.com", - "", - "", - "fauske.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "si.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "guernsey.museum", - "", - "", - "", - "", - "", - "", - "br\303\270nn\303\270y.no", - "", - "", - "", - "", - "", - "", - "", - "barsycenter.com", - "", - "", - "", - "steigen.no", - "", - "or.bi", - "", - "", - "", - "", - "net.tj", - "", - "", - "", - "", - "", - "", - "", - "org.sy", - "", - "", - "", - "", - "org.my", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.ky", - "\347\256\207\344\272\272.hk", - "", - "", - "", - "org.cu", - "", - "", - "", - "", - "", - "", - "", - "s\303\270rum.no", - "", - "org.co", - "", - "", - "", - "", - "org.uy", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "santacruz.museum", - "", - "", - "", - "", - "", - "", - "gov.vn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bbs.tr", - "org.cw", - "", - "", - "", - "or.us", - "org.ru", - "", - "", - "championship.aero", - "", - "", - "", - "", - "", - "", - "org.ro", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ddr.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gjovik.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "finland.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "akita.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.im", - "", - "org.rw", - "ac.in", - "", - "", - "", - "", - "", - "servesarcasm.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nalchik.ru", - "", - "", - "", - "org.cy", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gorge.museum", - "", - "", - "bz.it", - "gov.az", - "scapp.io", - "", - "org.se", - "", - "", - "", - "", - "org.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "in.london", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sondrio.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "surrey.museum", - "", - "", - "", - "", - "", - "force.museum", "vao.it", - "donna.no", "", + "com.bb", "", "", "", @@ -19518,7 +19174,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "vda.it", "", - "antiques.museum", "", "", "", @@ -19526,14 +19181,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "de.gt", "", "", "", + "froya.no", "", "", "", "", - "dy.fi", "", "", "", @@ -19541,29 +19197,28 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "spdns.de", "", "", "", "", "", - "isa.us", "", + "gouv.fr", "", "", "", "", + "no-ip.biz", "", "", "", "", "", + "edu.sb", "", - "sos.pl", "", "", "", - "workshop.museum", "", "", "", @@ -19572,11 +19227,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "hornindal.no", "", + "edu.bb", "", "", "", - "australia.museum", "", "", "", @@ -19587,22 +19243,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "org.st", - "children.museum", - "sjc.br", - "commune.am", + "me.ke", "", - "org.mt", "", "", "", + "hotel.hu", "", "", "", "", "", - "com.vc", - "org.bt", + "med.sa", + "gov.dz", "", "", "", @@ -19610,12 +19263,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "sf.no", "", "", "", - "warmia.pl", "", - "groundhandling.aero", "", "", "", @@ -19623,23 +19275,108 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "salzburg.museum", "", "", "", "", - "sykkylven.no", "", "", "", "", "", "", + "hk.cn", + "naamesjevuemie.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mi.us", + "", + "", + "", + "", + "", + "", + "notodden.no", + "", + "", + "", + "aosta-valley.it", + "", + "hotel.lk", + "", + "", + "", + "", + "", + "contagem.br", + "", + "", + "", + "", + "", + "", + "gyeongbuk.kr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sk\303\245nland.no", + "", + "he.cn", + "from-wy.com", + "", + "", + "", + "", + "", + "", + "", + "", + "bronnoy.no", + "", + "", + "", + "", + "gangwon.kr", + "", + "", + "", + "", "", "", "", - "royrvik.no", "biz.ni", + "from-ky.com", "", "", "", @@ -19648,19 +19385,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "finearts.museum", "", "", - "edu.vc", "", "", "", - "bearalvahki.no", "", "", "", "", - "grajewo.pl", + "mil.bo", "", "", "", @@ -19668,841 +19402,23 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "org.mk", "", "", + "bologna.it", "", "", "", + "ms.us", "", "", "", "", - "", - "flakstad.no", - "watchandclock.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.dz", - "", - "", - "", - "org.uk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "chocolate.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "air-traffic-control.aero", - "", - "sport.hu", - "environment.museum", - "", - "", - "", - "", - "dr.na", - "", - "", - "", - "", - "", - "", - "", - "union.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "art.dz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.py", - "", - "", - "", - "", - "", - "spy.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ddnss.de", - "jewish.museum", - "", - "", - "arteducation.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "iz.hr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "biz.pl", - "", - "il.us", - "", - "", - "", - "co.education", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "aviation.museum", - "", - "", - "soundandvision.museum", - "", - "", - "", - "", - "", - "", - "", - "nom.vc", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "santoandre.br", - "", - "", - "", - "freiburg.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kr.com", - "", - "", - "", - "", - "", - "dyn53.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "drammen.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "fc.it", - "", - "", - "sandiego.museum", - "", - "", - "", - "", - "zgora.pl", - "org.gu", - "elverum.no", - "", - "", - "", - "", - "", - "ybo.trade", - "", - "", - "securitytactics.com", - "storj.farm", - "", - "freeboxos.fr", - "", - "", - "skole.museum", - "", - "", - "", - "", - "", - "col.ng", - "", - "", - "", - "", - "", - "", - "", - "", - "barueri.br", - "finnoy.no", - "skedsmokorset.no", - "", - "", - "", - "", - "", - "", - "org.pe", - "", - "", - "", - "or.na", - "", - "", - "", - "", - "", - "", - "", - "filegear.me", - "s\303\241lat.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "filegear-de.me", - "", - "", - "", - "equipment.aero", - "", - "", - "", - "ivanovo.su", - "", - "", - "", - "", - "", - "ravendb.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "id.ir", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sel.no", - "", - "", - "", - "", - "", - "", - "", - "org.ly", - "", - "", - "", - "", - "", - "bajddar.no", - "", - "", - "", - "", - "", - "", - "", - "bearalv\303\241hki.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gulen.no", - "", - "", - "ss.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sv.it", - "", - "", - "", - "", - "", - "", - "fr\303\270ya.no", - "", - "", - "", - "", - "", - "va.no", - "", - "", - "", - "", - "", - "org.gy", - "cloudns.eu", - "", - "", - "", - "", - "", - "", - "", - "", - "org.pt", - "", - "americana.museum", - "", - "", - "org.br", - "rindal.no", - "giehtavuoatna.no", - "", - "", - "", - "", - "", - "iki.fi", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "unusualperson.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "farmers.museum", - "", - "", - "", - "", - "", - "royken.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "biz.gl", - "", - "", - "", - "", - "coastaldefence.museum", - "sn\303\245sa.no", - "", - "", - "", - "", - "", - "", - "", - "", - "br\303\270nn\303\270ysund.no", - "", - "", - "", - "gjemnes.no", - "", - "", - "", - "", - "org.pk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sm\303\270la.no", - "", - "", - "", - "", - "", - "", - "", - "", - "slz.br", - "nohost.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "skanit.no", - "", - "", - "", - "", - "jgora.pl", - "", - "", - "", - "wales.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cloudns.cc", - "", - "", - "", - "", - "sa.it", - "", - "", - "", - "", - "", - "co.technology", - "", - "", - "", - "", - "", - "", - "", - "fineart.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "wnext.app", - "", - "", - "", - "", - "", - "", - "", - "aichi.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dni.us", - "stor-elvdal.no", - "cdn77-ssl.net", - "", - "", - "on.ca", - "", - "campinas.br", - "", - "", - "", - "", - "", - "", - "", - "net.vc", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "chernihiv.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "aejrie.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.ge", - "", - "", - "", - "campobasso.it", - "", - "", - "sorreisa.no", - "", - "", - "naturhistorisches.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nationalheritage.museum", - "", - "yolasite.com", - "", - "", - "azurecontainer.io", - "namsskogan.no", - "", - "", - "", - "", - "", - "carbonia-iglesias.it", - "", - "sch.sa", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "whaling.museum", - "", - "", - "", - "", - "", - "", - "ddnslive.com", - "", + "aero.mv", "", "", "", "", + "midsund.no", "", "", "", @@ -20513,6 +19429,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "freesite.host", "", + "mil.km", "", "", "", @@ -20531,198 +19448,27 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "sardinia.it", - "", - "", - "org.ir", - "", - "", - "", - "", - "", - "", - "ong.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "namdalseid.no", - "", - "", - "", - "", - "zapto.xyz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ng.ink", - "", - "", - "", - "", - "", - "", - "org.gt", - "", - "", - "", - "", - "burghof.museum", - "org.lk", - "", - "", - "", - "", - "workinggroup.aero", - "", - "", - "", - "", - "", - "sch.jo", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.om", - "", - "", - "certmgr.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "stadt.museum", - "", - "", - "", - "", - "", - "", - "", - "bergamo.it", - "caltanissetta.it", - "", - "", - "", - "", - "", - "serveexchange.com", - "or.kr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bolzano.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "broker.aero", - "", - "", + "32-b.it", "", "", "", "", "", + "forl\303\254cesena.it", + "woltlab-demo.com", "", "", "", + "nesoddtangen.no", "", "", + "mil.id", + "campania.it", "", "", "", + "kyoto.jp", + "16-b.it", "", "", "", @@ -20739,6 +19485,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "2-d.jp", + "", + "", + "", + "", + "", + "", + "", + "64-b.it", "", "", "", @@ -20750,18 +19505,70 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "basel.museum", "", "", "", - "blogdns.com", + "", + "mil.co", + "", + "", + "", + "hoabinh.vn", "", "", "", "", "", "", - "de.cool", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "web.tr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.ru", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "jeonnam.kr", + "", + "", + "", + "", + "", + "", + "", + "sa.au", + "", + "", + "", + "", + "sells-it.net", + "gouv.ci", "", "", "", @@ -20781,7 +19588,565 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gob.es", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "krasnodar.su", + "mil.sy", + "", + "dn.ua", + "", + "", + "mil.my", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.by", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.rw", + "", + "", + "", + "", + "", + "blogspot.hu", + "", + "", + "", + "net.sb", + "", + "", + "", + "", + "mil.uy", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.bb", + "", + "", + "enterprisecloud.nu", + "", + "", + "", + "", + "", + "", + "", + "", + "mytis.ru", + "", + "ma.us", + "", + "", + "", + "", + "", + "", + "gorlice.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.st", + "", + "", + "", + "", + "med.pa", + "", + "", + "", + "", + "mil.cy", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "de.com", + "", + "kontum.vn", + "", + "", + "", + "blogspot.hk", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "aure.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hemne.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "floripa.br", + "", + "cloudns.in", + "blogspot.in", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "securitytactics.com", + "bentre.vn", + "", + "", + "", + "", + "", + "", + "under.jp", + "", + "", + "", + "", + "sytes.net", + "", + "kherson.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "med.ht", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "north-kazakhstan.su", + "", + "", + "", + "", + "", + "", + "", + "jan-mayen.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.sn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gen.nz", + "", + "", + "", + "", + "", + "freeboxos.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "chu.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "web.gu", + "", + "", + "sp.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "med.om", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "xii.jp", + "", + "", + "", + "", + "123miweb.es", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "h\303\241pmir.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ddnslive.com", + "", + "", + "", + "", + "", + "mil.py", + "carrara-massa.it", + "", + "", + "", + "", + "", + "", + "", + "workisboring.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "groundhandling.aero", + "", + "", + "mil.pe", + "", + "", + "", + "", + "", + "", + "", + "", + "salud.bo", + "", + "", + "", + "", + "ms.kr", + "", + "", + "", + "ac.mz", + "", + "", + "", + "", + "noticias.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "me.so", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "", "", "", @@ -20794,19 +20159,21 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "granvin.no", + "barsycenter.com", + "aktyubinsk.su", "", "", - "org.pr", "", "", "", "", "", + "from.tv", "", + "web.in", "", - "ol.no", "", + "game-server.cc", "", "", "", @@ -20822,9 +20189,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "chernihiv.ua", + "slattum.no", "", "", "", + "mil.to", "", "", "", @@ -20834,14 +20204,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "sardinia.it", "", "", - "czest.pl", "", + "nome.cv", "", "", "", - "iveland.no", "", "", "", @@ -20854,8 +20224,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "mil.tm", + "hjelmeland.no", + "navuotna.no", "", - "stange.no", "", "", "", @@ -20863,16 +20235,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "filegear.me", "", "", "", "", - "org.sn", "", "", "", - "def.br", - "org.mn", "", "", "", @@ -20880,21 +20250,21 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "kozow.com", + "wa.au", "", - "gunma.jp", - "org.bn", "", "", + "gsj.bz", "", "", - "org.kn", "", "", - "collection.museum", "", "", "", "", + "mil.tw", "", "", "", @@ -20911,6 +20281,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "h.se", "", "", "", @@ -20924,46 +20295,50 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "uklugs.org", "", "", "", "", "", + "storj.farm", "", "", "", - "belau.pw", "", "", + "mr.no", "", "", "", "", + "unusualperson.com", "", "", + "com.lb", "", "", "", "", - "2ix.de", "", "", "", "", + "snillfjord.no", + "mil.br", "", "", "", "", + "mil.kr", + "cri.nz", "", "", - "ing.pa", "", "", - "friulivegiulia.it", "", "", "", + "bodo.no", "", "", "", @@ -20973,13 +20348,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "remotewd.com", "", "", - "spdns.eu", "", "", "", + "como.it", "", "", "", @@ -20988,28 +20362,32 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "sandnes.no", "", + "mo.it", "", + "ha.no", "", "", "", "", + "blogspot.rs", "", + "harstad.no", "", "", "", "", "", "", + "edu.lb", "", "", - "wegrow.pl", "", "", "", "", "", + "hn.cn", "", "", "", @@ -21020,14 +20398,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "dc.us", "", - "org.bi", "", "", "", "", - "org.ki", "", "", "", @@ -21035,42 +20410,40 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "americanantiques.museum", "", "", "", "", - "bindal.no", "", "", "", "", "", - "chernovtsy.ua", "", "", + "binhduong.vn", "", "", "", - "org.to", "", "", "", "", - "filegear-ie.me", "", "", "", "", + "alt.za", "", - "r\303\241isa.no", "", "", "", + "edgekey.net", "", "", "", "", + "charter.aero", "", "", "", @@ -21082,17 +20455,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "org.tm", "", "", "", "", "", + "biz.tj", "", - "blogspot.md", "", "", - "org.cn", "", "", "", @@ -21105,24 +20476,24 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "aseral.no", "", "", "", "", "", "", + "game-host.org", "", "", "", "", "", + "me.it", "", "", "", "", "", - "org.tw", "", "", "", @@ -21137,9 +20508,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "org.lr", "", - "notodden.no", "", "", "", @@ -21153,21 +20522,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "fg.it", - "unj\303\241rga.no", "", "", "", "", + "krodsherad.no", "", "", "", "", + "kilatiron.com", "", - "annefrank.museum", + "mt.it", "", "", - "nordreisa.no", "", "", "", @@ -21181,20 +20549,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ustka.pl", "", - "foggia.it", "", "", "", - "gotpantheon.com", + "airkitapps.eu", "", - "barsy.uk", "", "", "", "", "", + "now-dns.top", "", "", "", @@ -21204,83 +20570,27 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "blogspot.hr", "", "", "", - "org.gr", + "acct.pro", "", "", - "batsfjord.no", "", "", "", "", - "org.in", "", "", - "cloudns.us", "", "", - "virtuel.museum", "", "", "", "", "", "", - "fitjar.no", - "", - "", - "", - "org.ci", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "rimini.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.my", - "", - "", - "", - "", - "", - "beardu.no", - "", - "", - "", - "", - "", - "", - "", - "med.sd", - "cambridge.museum", - "", - "", - "", - "", - "farmstead.museum", - "", "", "", "", @@ -21290,6 +20600,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "nsupdate.info", "", "eidsvoll.no", "", @@ -21300,6 +20611,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "familyds.com", "", "", "", @@ -21307,1209 +20619,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "mod.gi", "", "", - "sveio.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gorlice.pl", - "", - "", - "", - "", - "williamsburg.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "airline.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "of.no", - "", - "", - "", - "", - "", - "", - "", - "", - "a.bg", - "", - "instantcloud.cn", - "", - "", - "q.bg", - "", - "", - "", - "", - "x.bg", - "", - "", - "", - "", - "fla.no", - "", - "", - "", - "", - "3.bg", - "", - "", - "", - "", - "blogspot.td", - "", - "", - "", - "", - "w.bg", - "", - "", - "", - "", - "gov.tj", - "", - "", - "", - "", - "c.bg", - "", - "", - "", - "", - "y.bg", - "", - "", - "", - "airtraffic.aero", - "4.bg", - "", - "", - "", - "", - "1.bg", - "", - "", - "", - "", - "9.bg", - "", - "", - "", - "", - "2.bg", - "", - "", - "", - "", - "z.bg", - "", - "", - "", - "", - "blogspot.tw", - "", - "", - "", - "", - "6.bg", - "", - "", - "vi.us", - "", - "dynns.com", - "", - "", - "", - "", - "0.bg", - "", - "", - "", - "", - "realestate.pl", - "", - "", - "", - "", - "e.bg", - "", - "", - "", - "juedisches.museum", - "8.bg", - "", - "", - "muos\303\241t.no", - "", - "7.bg", - "", - "", - "cloudcontrolled.com", - "", - "5.bg", - "", - "", - "", - "flanders.museum", - "", - "", - "", - "org.pn", - "", - "", - "field.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bytom.pl", - "", - "", - "", - "", - "", - "", - "orsta.no", - "", - "", - "", - "", - "", - "blogspot.de", - "", - "od.ua", - "", - "", - "", - "", - "", - "", - "", - "u.bg", - "", - "", - "", - "", - "", - "gotdns.ch", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "frosta.no", - "", - "", - "", - "blogdns.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "im.it", - "", - "", - "", - "i.ng", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "istmein.de", - "", - "", - "skaun.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bahccavuotna.no", - "", - "", - "", - "notaires.km", - "", - "", - "", - "", - "", - "", - "", - "", - "virtualuser.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.au", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "wroclaw.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.am", - "", - "blogspot.ru", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.ro", - "sa.au", - "", - "org.tt", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "n.bg", - "", - "", - "", - "", - "", - "", - "", - "for-better.biz", - "", - "d.se", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "childrens.museum", - "", - "", - "inf.ua", - "", - "nym.nz", - "", - "", - "", - "ferrara.it", - "", - "", - "", - "", - "", - "", - "", - "web.ni", - "", - "", - "", - "sherbrooke.museum", - "", - "", - "", - "j.bg", - "blogspot.dk", - "", - "", - "", - "", - "blogspot.mk", - "", - "", - "", - "services.aero", - "", - "k12.tr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kvits\303\270y.no", - "co.business", - "bjerkreim.no", - "", - "", - "", - "", - "", - "chiropractic.museum", - "", - "", - "", - "", - "skiptvet.no", - "", - "", - "", - "alstahaug.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nesoddtangen.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ky.us", - "", - "", - "", - "", - "", - "rollag.no", - "", - "", - "gotdns.org", - "", - "", - "", - "", - "", - "", - "geometre-expert.fr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "estate.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.gn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ballangen.no", - "", - "", - "", - "", - "", - "", - "bashkiria.ru", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sh.cn", - "", - "", - "", - "net-freaks.com", - "", - "birkenes.no", - "", - "", - "", - "saltdal.no", - "gov.dz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "idv.hk", - "ybo.party", - "", - "", - "", - "", - "", - "", - "gob.ec", - "", - "", - "", - "", - "", - "vikna.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "o.se", - "", - "", - "", - "", - "", - "salud.bo", - "", - "", - "amot.no", - "siteleaf.net", - "", - "", - "", - "stockholm.museum", - "", - "", - "", - "", - "", - "birthplace.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.no", - "", - "", - "int.ni", - "", - "", - "", - "", - "bitballoon.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "napoli.it", - "zarow.pl", - "", - "", - "", - "", - "", - "org.gi", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "filegear-au.me", - "", - "", - "", - "or.ug", - "org.ae", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "prd.km", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.re", - "", - "", - "", - "", - "", - "", - "", - "", - "router.management", - "", - "", - "", - "", - "", - "boavista.br", - "", - "", - "", - "", - "", - "", - "", - "", - "dnsup.net", - "", - "", - "", - "", - "", - "", - "", - "", - "rec.nf", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.nz", - "", - "", - "", - "berkeley.museum", - "", - "", - "sandefjord.no", - "webhosting.be", - "vadso.no", - "", - "dn.ua", - "", - "", - "", - "", - "", - "", - "", - "channelsdvr.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "recreation.aero", - "sorfold.no", - "", - "", - "state.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "run.app", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sandoy.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "stokke.no", - "nom.vg", - "", - "", - "", - "", - "", - "", - "", - "square7.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "riodejaneiro.museum", - "", - "", - "", - "", - "", - "", - "org.tr", - "", - "", - "", - "", - "", - "", - "fhs.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.mr", - "", - "", - "", - "", - "nic.tj", - "", - "", - "", - "\347\265\204\347\271\224.\351\246\231\346\270\257", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gob.cl", - "", - "", - "", - "", - "", - "", - "\346\225\231\350\202\262.\351\246\231\346\270\257", - "", - "cb.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ownprovider.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "g.bg", - "", - "", - "", - "agrinet.tn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ciscofreak.com", - "", - "sk\303\245nland.no", - "", - "", - "", - "", - "bss.design", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "med.ly", - "", - "md.ci", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "zapto.org", - "", - "", - "", - "", - "", - "", - "", - "", - "square7.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pro.cy", - "", - "", - "", - "", - "", - "", - "", - "gotdns.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "association.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "frogn.no", - "", - "", - "skodje.no", - "", - "", - "ybo.science", - "", - "med.br", - "", - "friuliveneziagiulia.it", - "friulivenezia-giulia.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gob.ni", - "", - "", - "radom.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.ie", "", "", "", @@ -22540,6 +20652,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "mer\303\245ker.no", + "", + "", + "", + "", + "", + "", + "mil.ye", + "giehtavuoatna.no", "", "", "", @@ -22547,7 +20668,46 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "from-az.net", + "", + "", + "", + "", + "", + "biz.zm", + "", + "", + "", + "", + "", + "modum.no", + "", + "", + "", + "sorfold.no", + "", + "hammerfest.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "coop.ar", + "", + "", + "", + "wedeploy.io", + "", + "", + "", + "", + "", + "familyds.net", + "def.br", "", "", "", @@ -22569,16 +20729,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "realm.cz", - "", - "", - "", - "", - "", - "", - "", - "wafflecell.com", - "detroit.museum", "", "", "", @@ -22592,10 +20742,351 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "r.bg", + "", + "", + "", + "", + "", + "", + "", + "spdns.org", + "", + "", + "", + "etne.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "3utilities.com", + "", + "", + "", + "", + "", + "", + "biz.wf", + "bryansk.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "assn.lk", + "", + "grue.no", + "net.lb", + "", + "", + "", + "qoto.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "saotome.st", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "name.et", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "karasjok.no", + "", + "", + "", + "", + "", + "aerobatic.aero", + "", + "", + "", + "", + "nic.za", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "name.tt", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "from-ny.net", + "student.aero", + "", + "", + "", + "", + "", + "", + "mcdir.ru", + "skanland.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "de.md", + "", + "", + "media.hu", + "", + "", + "", + "", + "gov.sb", + "nore-og-uvdal.no", + "", + "", + "sandnessjoen.no", + "", + "", + "", + "", + "", + "", + "", + "cherkassy.ua", + "", + "", + "gov.bb", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "busan.kr", + "", + "", + "", + "", + "med.pl", + "", + "simplesite.gr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.ae", + "", + "", + "", + "", + "", + "hl.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.mv", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "baclieu.vn", + "", + "", + "", + "", + "", + "", + "\303\270stre-toten.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "finn\303\270y.no", + "", + "", + "", + "monza.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "alto-adige.it", + "", + "", + "", + "", + "", + "", + "k12.ec", + "", + "", + "", + "", "vic.au", "", - "sicily.it", + "scrysec.com", "", "", "", @@ -22603,11 +21094,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "org.iq", "", - "nebraska.museum", - "", - "blogspot.kr", + "co.business", "", "", "", @@ -22621,8 +21109,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gov.vc", - "ravendb.run", "", "", "", @@ -22635,7 +21121,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "andriatranibarletta.it", + "", + "", + "", + "mil.tr", "", "", "", @@ -22645,7 +21134,729 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "utsira.no", + "", + "", + "", + "", + "name.tr", + "", + "", + "", + "my-vigor.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gob.ni", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "wpmucdn.com", + "", + "", + "", + "", + "dynv6.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "uni5.net", + "", + "", + "no-ip.ca", + "naturbruksgymn.se", + "", + "", + "", + "simplesite.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "wlocl.pl", + "", + "", + "", + "", + "", + "", + "", + "blogspot.sg", + "nx.cn", + "", + "", + "", + "", + "", + "", + "hol.no", + "", + "", + "", + "", + "", + "", + "", + "", + "buyshop.jp", + "", + "", + "", + "", + "", + "mo.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "academia.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "123siteweb.fr", + "", + "", + "", + "shacknet.nu", + "murmansk.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "jpn.org", + "", + "", + "endoftheinternet.org", + "", + "", + "", + "", + "", + "", + "bss.design", + "", + "", + "", + "", + "", + "", + "jx.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fem.jp", + "", + "", + "", + "", + "", + "", + "", + "servegame.org", + "", + "", + "", + "", + "", + "", + "kuleuven.cloud", + "", + "", + "steigen.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "zhitomir.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "hi.cn", + "", + "", + "", + "", + "", + "", + "service.one", + "", + "", + "", + "name.mv", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sandefjord.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "f.bg", + "", + "", + "", + "", + "dyn-ip24.de", + "", + "", + "", + "bar2.net", + "", + "", + "", + "", + "bar1.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "friulivenezia-giulia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vi.us", + "", + "", + "", + "", + "", + "", + "", + "quangngai.vn", + "", + "", + "", + "", + "", + "", + "mn.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "familyds.org", + "gitpage.si", + "", + "", + "biz.az", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "alessandria.it", + "za.bz", + "ninhbinh.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "journal.aero", + "", + "nesodden.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "conf.se", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "samnanger.no", + "", + "", + "", + "", + "", + "", + "skjervoy.no", + "", + "", + "", + "", + "", + "catering.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bialystok.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.pe", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.ar", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sb.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "chiba.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mc.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "alaheadju.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "andasuolo.no", + "", + "blogspot.pt", + "name.mk", + "", + "", + "airkitapps.com", + "saltdal.no", + "", + "", + "", + "", + "", + "", + "dni.us", + "", + "", + "", + "", + "", + "", + "", + "blogspot.ca", + "stord.no", + "mil.in", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "community-pro.de", + "servehttp.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ha.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "stavanger.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "nis.za", + "mil.ge", + "", + "", + "", + "", + "v\303\245gs\303\270y.no", + "", + "zapto.org", + "", + "", + "", + "", + "", + "", + "arts.nf", + "", + "", + "synology.me", "", "", "", @@ -22662,8 +21873,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "googleapis.com", "", - "vardo.no", "", "", "", @@ -22671,15 +21882,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "friulive-giulia.it", + "bplaced.com", "", "", "", "", + "sellsyourhome.org", "", "", "", - "blogspot.it", "", "", "", @@ -22710,7 +21921,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "shell.museum", "", "", "", @@ -22719,31 +21929,32 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "mil.gt", "", "", "", "", - "epilepsy.museum", "", - "servehttp.com", "", "", "", "", "", "", + "hk.org", "", "", "", "", "", + "graphox.us", "", "", "", "", "", + "for-better.biz", "", - "coop.tt", "", "", "", @@ -22761,32 +21972,40 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "blush.jp", "", "", "", - "sci.eg", "", "", "", + "sandnessj\303\270en.no", "", "", + "storfjord.no", "", "", "", "", "", + "santamaria.br", "", "", "", "", - "barsy.pub", "", "", "", + "\303\270ystre-slidre.no", "", + "amli.no", + "hostedpi.com", + "sor-aurdal.no", + "community-pro.net", "", "", "", + "blogspot.si", "", "", "", @@ -22810,13 +22029,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "sund.no", "", - "gob.hn", + "gx.cn", "", "", "", "", "", + "kepno.pl", "", "", "", @@ -22827,7 +22048,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "org.ar", "", "", "", @@ -22840,12 +22060,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "d.se", "", "", + "asti.it", "", "", - "arts.ro", "", + "ehime.jp", + "vn.ua", "", "", "", @@ -22855,12 +22078,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "mp.br", "", "", "", "", "", "", + "from-me.org", "", "", "", @@ -22870,27 +22095,709 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "dynns.com", "", + "2ix.de", "", + "gov.lb", "", "", + "donna.no", "", "", "", "", - "id.au", "", "", "", + "blogspot.is", "", + "cupcake.is", "", "", "", "", + "encoreapi.com", + "", + "", + "", + "", + "", + "", + "", + "blogspot.fr", + "", + "", + "slask.pl", + "", + "", + "", + "", + "", + "", + "jobs.tt", + "", + "myvnc.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blog.gt", + "", + "", + "gouv.sn", + "", + "", + "mil.lv", + "ddns5.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "servepics.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ddnss.org", + "", + "curitiba.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bozen.it", + "", + "", + "", + "", + "", + "", + "", + "", + "bplaced.de", + "", + "firm.ro", + "", + "", + "", + "", + "club.tw", + "", + "", + "", + "", + "", + "", + "", + "sakuraweb.com", + "", + "", + "", + "", + "coop.ht", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hokksund.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gulen.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fl\303\245.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fredrikstad.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "shopitsite.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bplaced.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mayfirst.org", + "", + "better-than.tv", + "", + "m.se", + "kr\303\270dsherad.no", + "", + "cloudns.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hl.cn", + "", + "", + "", + "", + "", + "", + "", + "123hjemmeside.no", + "", + "", + "", + "soctrang.vn", + "", + "", + "", + "bashkiria.ru", + "", + "", + "", + "", + "ngrok-free.dev", + "", + "", + "", + "", + "", + "", + "", + "", + "engineer.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "boyfriend.jp", + "", + "", + "", + "", + "xx.gl", + "", + "", + "", + "", + "davvenj\303\241rga.no", + "", + "", + "", + "", + "", + "", + "123hjemmeside.dk", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "firm.dk", + "", + "", + "", + "", + "", + "", + "", + "utwente.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "exnet.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kropyvnytskyi.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mi.it", + "mil.jo", + "", + "zapto.xyz", + "", + "", + "", + "", + "cloudapp.net", + "", + "", + "", + "", + "", + "", + "", + "vp4.me", + "", + "", + "", + "", + "", + "business.in", + "", + "gangaviika.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "campinas.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "konin.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "frogn.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cf-ipfs.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ms.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bihar.in", + "", + "", + "4lima.ch", + "", + "", + "", + "", + "", + "ve.it", + "", + "", + "", + "", + "", + "", + "bjugn.no", + "bacgiang.vn", + "", + "", + "", + "", + "", + "badaddja.no", + "cyon.link", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vt.it", + "", + "economia.bo", + "", + "", + "sejny.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "space-to-rent.com", + "", + "", "", "", "", - "bergen.no", "", "", "", @@ -22906,7 +22813,31 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "embroidery.museum", + "", + "", + "blogspot.nl", + "", + "", + "", + "", + "", + "", + "", + "nikolaev.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "doomdns.com", "", "", "", @@ -22927,8 +22858,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "med.om", - "artsandcrafts.museum", + "", + "", + "", + "", + "", + "est-a-la-masion.com", + "", + "", + "mein-iserv.de", + "balat.no", "", "", "", @@ -22941,8 +22880,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "blogspot.li", "", - "org.tn", + "", + "", + "", + "", + "", + "", + "", + "airtraffic.aero", "", "", "", @@ -22959,10 +22906,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "cri.nz", "", "", - "aerodrome.aero", + "", + "daegu.kr", + "", + "de.cool", "", "", "", @@ -22970,6 +22919,198 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "skaun.no", + "", + "", + "", + "", + "", + "florence.it", + "", + "", + "", + "", + "", + "", + "", + "fastlylb.net", + "", + "", + "myasustor.com", + "", + "", + "", + "", + "netgamers.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "skiptvet.no", + "", + "", + "", + "", + "", + "est-a-la-maison.com", + "", + "", + "sch.ng", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "k12.il", + "", + "", + "", + "", + "niigata.jp", + "", + "", + "blogspot.cl", + "", + "est-mon-blogueur.com", + "", + "", + "", + "", + "", + "", + "", + "steinkjer.no", + "", + "", + "quangbinh.vn", + "", + "", + "", + "", + "akrehamn.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vr.it", + "mil.mg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.kg", + "", + "", + "", + "floro.no", + "khmelnitskiy.ua", + "", + "", + "", + "\347\275\221\347\273\234.hk", + "", + "", + "", + "", + "\344\270\252\344\272\272.hk", + "", + "", + "", + "", + "", + "", + "", + "", + "bjarkoy.no", + "\347\266\262\347\273\234.hk", + "", + "", + "dedibox.fr", + "", + "spb.su", + "", + "", + "", + "shiftedit.io", + "\347\275\221\347\265\241.hk", + "", + "", + "", + "", + "\346\224\277\345\272\234.hk", + "", + "", + "", + "", + "firm.co", + "", + "", + "", + "", + "\347\266\262\347\265\241.hk", + "", + "", + "", + "", + "", + "", + "", + "flatanger.no", + "mosj\303\270en.no", + "", + "", + "", "", "", "", @@ -22995,16 +23136,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "freemyip.com", "", + "\345\205\254\345\217\270.hk", "", "", + "s.bg", "", "", "", "", "", "", - "accident-investigation.aero", "", "", "", @@ -23053,6 +23196,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "noip.us", "", "", "", @@ -23062,12 +23206,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ddnsgeek.com", "", + "sand\303\270y.no", "", + "herokussl.com", "", "", "", - "game-server.cc", "", "", "", @@ -23078,11 +23224,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "v\303\245gs\303\270y.no", - "med.pro", - "flor\303\270.no", "", - "org.ms", "", "", "", @@ -23092,11 +23234,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "org.bs", "", "", "", - "vf.no", "", "", "", @@ -23104,7 +23244,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nesodden.no", "", "", "", @@ -23114,15 +23253,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "spb.ru", + "servebbs.com", "", "", "", - "org.sv", "", "", "", "", - "org.mv", "", "", "", @@ -23130,19 +23269,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "economia.bo", "", "", "", - "scrysec.com", - "nx.cn", "", "", "", "", "", "", - "dynvpn.de", "", "", "", @@ -23170,14 +23305,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "sassari.it", "", - "research.museum", "", "", + "hyllestad.no", "", "", "", - "org.ws", "", "", "", @@ -23191,10 +23326,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "chernigov.ua", "", "", "", - "nerdpol.ovh", "", "", "", @@ -23203,47 +23338,51 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "media.aero", "", "", "", - "if.ua", + "fylkesbibl.no", "", "", "", - "jx.cn", "", "", "", + "servebbs.net", "", "", "", "", "", + "coolblog.jp", "", "", "", "", "", + "meinforum.net", "", "", "", "", + "cloudcontrolapp.com", "", "", "", "", - "org.ee", "", + "forli-cesena.it", + "air-traffic-control.aero", "", "", - "virtual.museum", "", "", + "vv.it", "", "", "", "", - "pro.br", "", "", "", @@ -23257,14 +23396,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "coop.mw", "", "", + "mayfirst.info", "", "", - "mr.no", "", "", + "g\303\241ivuotna.no", "", + "gliding.aero", + "serveftp.com", "", "", "", @@ -23276,6 +23419,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "mil.ac", "", "", "", @@ -23284,6 +23428,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "bari.it", "", "", "", @@ -23299,12 +23444,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "skr.jp", "", "", "", "", "", "", + "cloudycluster.net", "", "", "", @@ -23313,14 +23460,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "alwaysdata.net", "", "", "", "", - "beeldengeluid.museum", "", - "blogspot.am", - "ancona.it", "", "", "", @@ -23329,19 +23474,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "for-our.info", "", - "pmn.it", "", "", "", "", + "stat.no", "", "", "", "", "", "", + "goiania.br", "", "", "", @@ -23352,6 +23497,782 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "", + "serveftp.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "square7.de", + "", + "", + "", + "", + "", + "", + "", + "", + "ulsan.kr", + "", + "", + "", + "", + "", + "", + "jele.cloud", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gj\303\270vik.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "servep2p.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "messina.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "knx-server.net", + "", + "", + "", + "square7.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cyon.site", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "yenbai.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "spacekit.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "web.do", + "", + "", + "", + "", + "", + "x443.pw", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "stjordal.no", + "", + "", + "", + "v\303\241rgg\303\241t.no", + "", + "", + "", + "", + "", + "", + "stranda.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "for-more.biz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "h\303\270nefoss.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "w.bg", + "", + "", + "", + "", + "", + "", + "casacam.net", + "", + "", + "", + "", + "", + "", + "", + "", + "coop.rw", + "", + "hitra.no", + "", + "", + "", + "", + "", + "groks-this.info", + "", + "", + "", + "", + "coop.km", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "co.education", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mytuleap.com", + "", + "", + "", + "", + "", + "", + "", + "from.hr", + "", + "", + "", + "", + "", + "mil.ec", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vc.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "drammen.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "clerk.app", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dyndns1.de", + "", + "", + "", + "", + "akamaiorigin-staging.net", + "", + "", + "", + "", + "", + "", + "hof.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "barlettatraniandria.it", + "", + "biz.fj", + "", + "", + "jele.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "and\303\270y.no", + "", + "", + "cloudcontrolled.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "s\303\270r-varanger.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gyeonggi.kr", + "", + "notaires.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "stor-elvdal.no", + "", + "", + "khanhhoa.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "balashov.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kongsvinger.no", + "", + "", + "", + "servebbs.org", + "", + "", + "", + "", + "", + "", + "", + "carbonia-iglesias.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "meraker.no", + "", + "", + "", + "", + "", + "", + "calabria.it", + "", + "", + "blogspot.al", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "framercanvas.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "name.na", + "myactivedirectory.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nome.pt", + "", + "", + "", + "", + "", + "", + "", + "", + "girlfriend.jp", "bashkiria.su", "", "", @@ -23367,6 +24288,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "hanoi.vn", "", "", "", @@ -23375,19 +24297,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "\351\271\277\345\205\220\345\263\266.jp", "", "", "", "", + "altoadige.it", "", "", + "boutir.com", "", "", "", "", "", - "org.rs", "", "", "", @@ -23402,11 +24324,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "org.et", "", "", "", - "msk.su", "", "", "", @@ -23415,61 +24335,52 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "communication.museum", - "communications.museum", - "forumz.info", "", "", + "serveftp.org", "", "", "", "", + "ashgabad.su", "", - "org.is", "", "", "", - "b.bg", "", "", + "channelsdvr.net", "", - "skien.no", + "synology-diskstation.de", "", "", - "of.by", "", "", "", "", "", - "oster\303\270y.no", - "inc.hk", "", "", "", - "org.sa", "", "", "", "", - "org.ma", "", "", "", - "doomdns.com", "", "", "", "", "", - "org.ba", "", "", "", + "accesscam.org", "", "", "", - "gen.nz", "", "", "", @@ -23487,7 +24398,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "org.ua", "", "", "", @@ -23507,18 +24417,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "baghdad.museum", "", "", "", "", "", + "golffan.us", "", "", + "marnardal.no", + "squares.net", "", "", "", - "fj.cn", "", "", "", @@ -23529,12 +24440,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "family.museum", "", "", "", "", - "wellbeingzone.eu", "", "", "", @@ -23544,18 +24453,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "blogspot.se", "", "", "", "", - "shacknet.nu", "", "", "", "", "", - "cherkassy.ua", "", "", "", @@ -23573,9 +24479,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "gob.ve", "", - "yorkshire.museum", "", + "siteleaf.net", + "id.ly", + "greater.jp", "", "", "", @@ -23586,261 +24495,36 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "arna.no", "", "", "", "", "", - "is.it", - "org.jo", "", "", "", "", "", "", + "airkitapps-au.com", "", "", "", - "crimea.ua", "", "", "", "", "", "", - "denmark.museum", "", "", "", + "bulsan.it", + "zachpomor.pl", "", "", - "", - "sandnessjoen.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.sb", - "", - "", - "", - "", - "", - "", - "", - "", - "org.ai", - "", - "", - "", - "", - "", - "com.bb", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "me.uk", - "", - "webspace.rocks", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "arboretum.museum", - "", - "", - "", - "alvdal.no", - "", - "org.ps", - "edu.sb", - "", - "stj\303\270rdal.no", - "", - "pro.om", - "mo\303\245reke.no", - "", - "", - "", - "", - "", - "", - "rovno.ua", - "", - "", - "edu.bb", - "or.jp", - "blogspot.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "msk.ru", - "", - "", - "", - "washingtondc.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.ae", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dyn-ip24.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "f\303\270rde.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.sk", - "", - "", - "", - "", - "", - "", - "ks.us", - "", - "", - "", - "or.ke", - "", - "", - "augustow.pl", - "", - "durham.museum", - "", - "", - "", - "", - "", - "nombre.bo", - "", - "", - "", - "", - "", - "", - "", - "gx.cn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pro.pr", - "", - "", + "mil.ba", "", "", "", @@ -23851,6 +24535,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "byen.site", "", "", "", @@ -23866,10 +24551,45 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "noticias.bo", "", "", - "now-dns.top", + "", + "", + "", + "dp.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "ecologia.bo", + "cloudns.info", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "noip.me", + "vik.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ind.kw", "", "", "", @@ -23891,6 +24611,702 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "va.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dnsup.net", + "for-our.info", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "int.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "drangedal.no", + "", + "", + "", + "", + "", + "", + "", + "univ.sn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "canva-apps.com", + "", + "", + "ascolipiceno.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "agrigento.it", + "", + "", + "", + "", + "", + "", + "mycloud.by", + "", + "", + "", + "", + "", + "", + "", + "int.mw", + "biella.it", + "", + "", + "", + "", + "", + "", + "biz.vn", + "", + "", + "catfood.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cloudns.club", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "2ix.ch", + "vi.it", + "", + "", + "feste-ip.net", + "", + "", + "", + "", + "12hp.at", + "", + "", + "", + "", + "", + "", + "", + "int.co", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "certification.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "delhi.in", + "", + "", + "", + "", + "", + "", + "int.ru", + "damnserver.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "campidanomedio.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.eg", + "chips.jp", + "", + "", + "", + "", + "", + "andriatranibarletta.it", + "", + "", + "", + "", + "", + "", + "", + "fuoisku.no", + "", + "", + "", + "vs.it", + "cahcesuolo.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "servehalflife.com", + "", + "", + "", + "fukuoka.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bialowieza.pl", + "", + "", + "", + "", + "ballooning.aero", + "", + "", + "", + "", + "", + "bearalvahki.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "molde.no", + "", + "", + "", + "", + "", + "", + "", + "", + "noho.st", + "", + "", + "", + "blogspot.be", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sch.zm", + "", + "", + "", + "", + "idv.tw", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "malselv.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "frei.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "halsa.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mydatto.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "moareke.no", + "", + "", + "airport.aero", + "", + "", + "", + "va.it", + "", + "", + "sch.wf", + "", + "altervista.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dyn-berlin.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "staba.jp", + "", + "", + "", + "", + "", + "", + "ngrok.pizza", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "homeftp.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "koobin.events", + "", + "", + "", + "", + "", + "", + "", + "", + "ask\303\270y.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cloud.goog", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.do", + "", + "", + "cocotte.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", "", "", "sardegna.it", @@ -23899,6 +25315,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "serveblog.net", "", "", "", @@ -23906,2621 +25323,24 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "foundation.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.in", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "co.events", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blackbaudcdn.net", - "", - "jeonnam.kr", - "", - "", - "", - "org.ls", - "", - "", - "", - "", - "org.je", - "", - "", - "", - "servegame.com", - "", - "", - "", - "", - "archaeology.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.pa", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.lv", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mod.gi", - "imb.br", - "", - "", - "", - "", - "", - "", - "oristano.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.do", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.dm", - "", - "v\303\241rgg\303\241t.no", - "outsystemscloud.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "isa-geek.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vn.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "slattum.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cadaques.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "fosnes.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.sc", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "exeter.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "coop.mv", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mus.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "m\303\245lselv.no", - "est-a-la-masion.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "environmentalconservation.museum", - "", - "", - "", - "", - "", - "", - "correios-e-telecomunica\303\247\303\265es.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "oh.us", - "sandnessj\303\270en.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "naturalhistory.museum", - "", - "", - "", - "bmoattachments.org", - "", - "naturalhistorymuseum.museum", - "", - "bokn.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.la", - "", - "", - "", - "", - "rep.kp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "essex.museum", - "", - "", - "net.sb", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "net.bb", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "video.hu", - "", - "", - "bialowieza.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dyr\303\270y.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "carraramassa.it", - "", - "balat.no", - "", - "now-dns.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "barsyonline.com", - "", - "", - "", - "", - "", - "", - "", - "", - "ringerike.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "game-host.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "elvendrell.museum", - "", - "", - "", - "", - "", - "badaddja.no", - "", - "", - "", - "", - "", - "kvalsund.no", - "", - "", - "", - "", - "", - "", - "", - "", - "dnepropetrovsk.ua", - "km.ua", - "com.lb", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "exnet.su", - "", - "", - "", - "", - "", - "damnserver.com", - "stryn.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "daegu.kr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.lb", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "association.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "federation.aero", - "", - "", - "komforb.se", - "", - "", - "", - "", - "ddnsking.com", - "", - "pro.tt", - "", - "", - "", - "github.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "agrigento.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "modum.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "it.ao", - "", - "", - "", - "", - "", - "mat.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "endofinternet.net", - "", - "", - "", - "", - "", - "samnanger.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "no-ip.net", - "", - "", - "mar.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "readmyblog.org", - "", - "", - "", - "", - "", - "", - "press.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nikolaev.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "poa.br", - "arts.co", - "", - "", - "", - "skydiving.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "kristiansund.no", - "", - "", - "", - "", - "fukui.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kustanai.ru", - "", - "", - "", - "fortmissoula.museum", - "", - "", - "", - "", - "presse.km", - "", - "", - "", - "", - "", - "brumunddal.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "wedeploy.sh", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "journal.aero", - "cloudns.in", - "", - "", - "", - "", - "", - "blogspot.sn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.vn", - "", - "", - "", - "", - "", - "", - "", - "charter.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sellsyourhome.org", - "", - "basicserver.io", - "", - "", - "", - "", - "", - "", - "", - "", - "k.se", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "est-mon-blogueur.com", - "press.se", - "brandywinevalley.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bologna.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "usgarden.museum", - "", - "", - "", - "", - "stavanger.no", - "", - "mo.us", - "", - "blogspot.si", - "", - "", - "", - "", - "", - "", - "", - "md.us", - "", - "", - "", - "\303\241laheadju.no", - "perso.ht", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "unjarga.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kr.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.lc", - "", - "", - "bjugn.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dyroy.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pvt.ge", - "", - "", - "", - "rost.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vik.no", - "", - "", - "", - "", - "", - "skjervoy.no", - "", - "", - "", - "", - "", - "", - "", - "", - "ot.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kommune.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gj\303\270vik.no", - "", - "", - "\304\215\303\241hcesuolo.no", - "", - "", - "", - "golffan.us", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "est-a-la-maison.com", - "", - "", - "", - "", - "", - "", - "", - "me.us", - "", - "", - "", - "", - "", - "", - "", - "", - "isa-geek.com", - "", - "", - "", - "", - "", - "", - "", - "florence.it", - "", - "", - "", - "", - "", - "", - "dielddanuorri.no", - "", - "net.lb", - "research.aero", - "", - "dnipropetrovsk.ua", - "", - "", - "", - "", - "", - "drangedal.no", - "", - "", - "", - "", - "", - "", - "", - "cloudapp.net", - "", - "", - "", - "", - "", - "floripa.br", - "", - "", - "", - "", - "", - "", - "", - "", - "bialystok.pl", - "", - "", - "", - "", - "i.ph", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "volda.no", - "", - "", - "", - "", - "", - "", - "", - "mt.us", - "", - "", - "", - "", - "roan.no", - "", - "", - "", - "", - "", - "cesenaforl\303\254.it", - "", - "rahkkeravju.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "fastlylb.net", - "", - "", - "", - "syncloud.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "amber.museum", - "", - "", - "", - "", - "", - "", - "", - "ddnsgeek.com", - "", - "", - "", - "", - "", - "", - "org.qa", - "", - "", - "", - "", - "", - "med.ee", - "", - "", - "", - "", - "", - "", - "balsfjord.no", - "cloudeity.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "salvadordali.museum", - "", - "", - "", - "", - "", - "", - "", - "bryansk.su", - "", - "", - "", - "", - "uscountryestate.museum", - "", - "", - "", - "aktyubinsk.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "student.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "or.it", - "naval.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cyber.museum", - "", - "", - "", - "", - "", - "", - "southwest.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "familyds.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "santamaria.br", - "", - "", - "blogspot.lu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "filegear-sg.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sp.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "psi.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "columbia.museum", - "", - "", - "", - "", - "", - "", - "", - "med.sa", - "", - "childrensgarden.museum", - "nhlfan.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "busan.kr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "casadelamoneda.museum", - "", - "", - "blogspot.rs", - "", - "", - "googleapis.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "store.nf", - "rackmaze.net", - "", - "", - "", - "", - "in.th", - "", - "ethnology.museum", - "", - "", - "", - "", - "", - "", - "", - "skj\303\245k.no", - "", - "", - "gran.no", - "", - "", - "", - "", - "", - "", - "", - "beiarn.no", - "", - "", - "", - "", - "", - "uzhgorod.ua", - "dscloud.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ltd.cy", - "", - "", - "", - "", - "", - "", - "idf.il", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "aosta-valley.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "freeddns.us", - "", - "", - "", - "", - "", - "", - "dyndns1.de", - "", - "", - "", - "", - "", - "", - "", - "", - "synology.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "wolomin.pl", - "", - "navigation.aero", - "", - "", - "gemological.museum", - "", - "", - "", - "", - "swinoujscie.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "4lima.ch", - "", - "", - "", - "", - "", - "", - "aurskog-h\303\270land.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "internet-dns.de", - "", - "", - "", - "nationalfirearms.museum", - "", - "", - "", - "", - "", - "fhapp.xyz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "illustration.museum", - "ufcfan.org", - "spdns.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\345\200\213\344\272\272.\351\246\231\346\270\257", - "", - "", - "biz.vn", - "", - "", - "", - "", - "", - "akrehamn.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.gr", - "", - "", - "mn.us", - "", - "gouv.ml", - "", - "", - "", - "", - "", - "", - "lom.it", - "", - "gouv.sn", - "", - "", - "rnrt.tn", - "", - "", - "", - "", - "", - "", - "", - "", - "svelvik.no", - "", - "", - "com.zm", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "perso.tn", - "", - "", - "", - "", - "", - "", - "slask.pl", - "pr.us", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mi.us", - "", - "francaise.museum", - "", - "", - "", - "edu.zm", - "ltd.uk", - "", - "", - "", - "", - "", - "", - "", - "", - "pro.mv", - "", - "sirdal.no", - "", - "", - "", - "blogspot.is", - "", - "", - "", - "", - "", - "", - "pug.it", - "", - "", - "", - "", - "", - "", - "", - "", - "storfjord.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "freeddns.org", - "", - "", - "", - "", - "glogow.pl", - "", - "", - "", - "", - "biz.az", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "keymachine.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "contagem.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gyeongnam.kr", - "", - "", - "", - "", - "", - "gyeongbuk.kr", - "ddnss.org", - "", - "", - "", - "figueres.museum", - "", - "", - "alto-adige.it", - "", - "", - "blogspot.lt", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gitlab.io", - "", - "nord-aurdal.no", - "", - "", - "voorloper.cloud", - "", - "", - "", - "", - "", - "", - "dnshome.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "med.pa", - "", - "sb.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "shiftedit.io", - "", - "", - "kustanai.su", - "", - "", - "", - "", - "klepp.no", - "", - "", - "", - "", - "", - "", - "goiania.br", - "", - "", - "", - "ownip.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "piedmont.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "broadcast.museum", - "", + "name.hr", "dnsupdater.de", - "ushistory.museum", - "", - "", - "pe.ca", - "g\303\241ivuotna.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "", "", + "divttasvuotna.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.ch", "", "", "", @@ -26536,6 +25356,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "goip.de", "", "", "", @@ -26591,21 +25412,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "mol.it", - "andasuolo.no", + "svelvik.no", + "my.id", "", "", "", "", - "org.sg", "", "", "", "", - "org.mg", "", "", - "vaksdal.no", "", "", "", @@ -26617,39 +25435,33 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "org.kg", "", "", "", "", - "ks.ua", "", "", "", "", "", - "siljan.no", + "ind.br", "", "", "", "", "", "", - "serveftp.net", "", - "org.ug", "", "", "", "", - "kv.ua", "", "", "", "", "", "", - "dvrcam.info", "", "", "", @@ -26657,13 +25469,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "int.pt", "", - "gov.sb", "", "", "", "", - "mil.bo", "", "", "", @@ -26673,32 +25484,30 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "gov.bb", "", "", "", "", "", "", + "bulsan-s\303\274dtirol.it", "", "", "", "", + "grozny.ru", "", "", "", + "mil.no", "", - "web.ve", "", "", - "niigata.jp", "", "", - "varese.it", "", "", "", - "mil.km", "", "", "", @@ -26712,6 +25521,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "mydatto.net", + "kv\303\246nangen.no", "", "", "", @@ -26739,7 +25550,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "blogspot.hu", "", "", "", @@ -26754,14 +25564,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "sveio.no", "", - "indiana.museum", "", "", - "mincom.tn", "", - "fr\303\246na.no", "", + "\345\214\227\346\265\267\351\201\223.jp", "", "", "", @@ -26771,6 +25580,443 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "mcpre.ru", + "ullensvang.no", + "", + "", + "", + "", + "", + "", + "", + "sicilia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.fi", + "", + "gyeongnam.kr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vikna.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vestnes.no", + "", + "", + "", + "", + "", + "zgorzelec.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "siracusa.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ddnsfree.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "stryn.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sklep.pl", + "", + "", + "int.tt", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sch.tf", + "", + "", + "", + "", + "", + "", + "firm.ht", + "", + "canva-apps.cn", + "", + "", + "", + "accident-investigation.aero", + "", + "", + "dagestan.ru", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "granvin.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.cl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "b\303\270mlo.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "s\303\270ndre-land.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "navigation.aero", + "b\303\246rum.no", + "", + "", + "mi.th", + "", + "int.lk", + "", + "", + "", + "", + "flynnhosting.net", + "", + "merseine.nu", + "saitama.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "moonscale.net", + "", + "", + "cosenza.it", + "friulivgiulia.it", + "", + "", + "", + "yalta.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "small-web.org", + "", + "", + "wpmudev.host", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "salvador.br", + "", + "", + "", + "", + "", + "", + "", + "m\303\245s\303\270y.no", "basilicata.it", "", "", @@ -26796,2450 +26042,29 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "familyds.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "square.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.sy", - "", - "", - "", - "", - "mil.my", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.by", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.co", - "veneto.it", - "campidano-medio.it", - "", - "", - "mil.uy", - "", - "", - "fukuoka.jp", - "", - "", - "", - "", - "rivne.ua", - "org.ac", - "", - "", - "", - "", - "org.es", - "net.zm", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ehime.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bible.museum", - "", - "", - "", - "", - "", - "mosjoen.no", - "", - "ecologia.bo", - "", - "", - "", - "", - "", - "", - "", - "mil.ru", - "int.ve", - "", - "", - "ve.it", - "", - "", - "", - "", - "", - "firewall-gateway.com", - "", - "", - "rybnik.pl", - "kg.kr", - "", - "", - "", - "2ix.ch", - "", - "", - "", - "", - "bozen.it", - "", - "", - "", - "", - "rackmaze.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "spb.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "incheon.kr", - "", - "", - "", - "", - "", - "mil.id", - "", - "", - "", - "", - "", - "org.hu", - "", - "", - "", - "", - "", - "mil.rw", - "", - "", - "", - "", - "", - "", - "", - "", - "vt.it", - "", - "synology-diskstation.de", - "", - "", - "", - "", - "", - "", - "", - "", - "k\303\245fjord.no", - "", - "", - "", - "dolls.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ltd.lk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "of.london", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "casacam.net", - "", - "", - "", - "chernigov.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "parti.se", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "isa-geek.org", - "kristiansand.no", - "", - "", - "", - "portland.museum", - "", - "", - "", - "imageandsound.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pe.kr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "firewall-gateway.de", - "blogspot.qa", - "", - "", - "", - "midsund.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ms.us", - "", - "archaeological.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "botany.museum", - "", - "", - "", - "", - "siracusa.it", - "", - "", - "gouv.fr", - "", - "", - "", - "univ.sn", - "", - "", - "", - "", - "", - "aomori.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gangaviika.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cloudycluster.net", - "", - "", - "firewall-gateway.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.st", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sor-aurdal.no", - "", - "", - "", - "", - "pri.ee", - "", - "", - "", - "for-more.biz", - "", - "", - "", - "spb.ru", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "schokoladen.museum", - "", - "", - "", - "", - "", - "", - "psc.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "curitiba.br", - "vr.it", - "", - "", - "", - "", - "slupsk.pl", - "id.lv", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dagestan.ru", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ma.us", - "", - "", - "", - "", - "santafe.museum", - "", - "", - "", - "chiba.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "montreal.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.fr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.hk", - "", - "", - "", - "", - "", - "", - "", - "gob.ve", - "", - "", - "", - "", - "mil.py", - "", - "", - "", - "", - "steinkjer.no", - "", - "", - "", - "chirurgiens-dentistes.fr", - "utwente.io", - "", - "", - "", - "", - "", - "", - "", - "", - "chirurgiens-dentistes-en-france.fr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cesenaforli.it", - "", - "", - "", - "", - "", - "", - "gouv.ht", - "", - "", - "read-books.org", - "", - "", - "", - "", - "", - "", - "", - "uhren.museum", - "", - "", - "", - "coloradoplateau.museum", - "", - "", - "", - "noip.us", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cheltenham.museum", - "", - "", - "", - "", - "africa.com", - "", - "", - "", - "", - "grandrapids.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "raholt.no", - "", - "civilization.museum", - "", - "", - "", - "", - "", - "k12.ec", - "", - "", - "", - "", - "", - "", - "", - "", - "ooguy.com", - "", - "", - "", - "notaires.fr", - "", - "", - "", - "", - "", - "bellevue.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.lb", - "", - "r\303\270d\303\270y.no", - "", - "", - "", - "", - "", - "", - "org.gg", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "spacekit.io", - "", - "", - "", - "", - "org.ht", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "academia.bo", - "", - "per.la", - "", - "", - "", - "", - "", - "", - "", - "hm.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mad.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hu.com", - "", - "blogspot.li", - "", - "", - "", - "mil.pe", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.hk", - "aure.no", - "", - "", - "", - "", - "atlanta.museum", - "", - "", - "", - "", - "", - "", - "canada.museum", - "synology-ds.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vi.it", - "", - "", - "geelvinck.museum", - "", - "", - "", - "", - "", - "", - "pa.us", - "", - "", - "", - "", - "", - "", - "", - "chattanooga.museum", - "", - "", - "", - "", - "", - "certification.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.nr", - "", - "", - "", - "", - "", - "", - "", - "presidio.museum", - "", - "", - "", - "", - "", - "muncie.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "serveftp.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vinnica.ua", - "", - "", - "", - "blogspot.cv", - "salem.museum", - "", - "", - "", - "", - "davvenj\303\241rga.no", - "", - "naturbruksgymn.se", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "servebbs.net", - "", - "", - "", - "", - "", - "", - "", - "dnsdojo.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "calabria.it", - "", - "", - "", - "", - "", - "", - "mil.br", - "", - "s\303\270mna.no", - "", - "", - "mil.kr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "press.cy", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "karelia.su", - "", - "", - "", - "", - "", - "camdvr.org", - "", - "", - "", - "", - "judygarland.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "apigee.io", - "", - "", - "", - "", - "stranda.no", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.hr", - "forlicesena.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "elburg.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hk.com", - "", - "", - "", - "k12.il", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.ec", - "", - "", - "", - "ravenna.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dray-dns.de", - "", - "steam.museum", - "", - "vestby.no", - "", - "", - "", - "", - "", - "", - "k\303\241r\303\241\305\241johka.no", - "piemonte.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.ge", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "muenster.museum", - "", - "", - "", - "", - "", - "coop.ht", - "", - "", - "", - "", - "", - "f.bg", - "", - "", - "", - "", - "venice.it", - "", - "nflfan.org", - "", - "", - "bergbau.museum", - "pors\303\241\305\213gu.no", - "", - "", - "", - "perso.sn", - "", - "", - "varggat.no", - "", - "blogspot.ca", - "", - "", - "", - "cahcesuolo.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "rome.it", - "", - "", - "", - "", - "", - "", - "biz.tj", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.sl", - "", - "", - "", - "", - "org.ml", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sejny.pl", - "", - "", - "blogspot.fi", - "", - "", - "", - "", - "", - "", - "", - "", - "vercelli.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "presse.ml", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "yalta.ua", - "", - "", - "", - "", - "mil.gt", - "", - "", - "", - "", - "", - "agric.za", - "penza.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mk.ua", - "", - "", - "", - "endofinternet.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "stpetersburg.museum", - "", - "vads\303\270.no", - "saintlouis.museum", - "", - "", - "", - "", - "", - "", - "daejeon.kr", - "", - "", - "", - "or.id", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vindafjord.no", - "", - "", - "", - "alwaysdata.net", - "", - "", - "", - "", - "int.vn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "m.se", - "dyndns.ws", - "", - "", - "", - "", - "", - "", - "", - "", - "vaapste.no", - "", - "ulsan.kr", - "", - "", - "", - "genova.it", - "", - "", - "", - "", - "dyndns.tv", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "com.mx", - "", - "", - "", - "", - "", - "etne.no", - "", - "", - "", - "", - "", - "", - "", - "", - "ltd.gi", - "sosnowiec.pl", - "", - "", - "", - "", - "", - "", - "", - "vestnes.no", - "", - "ip6.arpa", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "n\303\246r\303\270y.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "edu.mx", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.il", - "", - "", - "", - "", - "", - "missoula.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "int.az", - "", - "", - "familyds.org", - "", - "", - "", - "", - "zachpomor.pl", - "", - "", - "", - "", - "", - "", - "ms.kr", - "", - "", - "", - "accesscam.org", - "kautokeino.no", - "", - "", - "", - "", - "", - "forli-cesena.it", - "", - "", - "", - "", - "", - "", - "", - "webhop.me", - "org.ni", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "omaha.museum", - "", - "", - "", - "", - "", - "", - "", - "cupcake.is", - "", - "", - "", - "", - "", - "", - "", - "valle.no", - "", - "", - "", - "", - "", - "flatanger.no", - "", - "", - "", - "", - "dnsdojo.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kr.it", - "idrett.no", - "", - "", - "", - "", - "", - "blogspot.ug", - "ambulance.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "h\303\241pmir.no", - "", - "", - "name.et", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "issmarterthanyou.com", - "", - "mil.to", - "grue.no", - "", - "", - "", - "", - "", - "casino.hu", - "", - "", - "", - "", - "", - "", - "", - "kep.tr", - "", - "", - "", - "", - "", - "arts.nf", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.tm", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.cn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vs.it", - "", - "name.tt", - "", - "", - "", - "", - "", - "", - "og.it", - "", - "mil.tw", - "", - "", - "", - "", - "", - "", - "", - "dagestan.su", - "vv.it", - "sassari.it", - "klabu.no", - "dp.ua", - "", - "org.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "p.se", - "", - "", - "", - "", - "", - "", - "", - "mansions.museum", - "", - "", - "", - "nym.sx", - "", - "", - "", - "", - "nym.mx", - "", - "", - "", - "eigersund.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "koebenhavn.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vard\303\270.no", - "", - "usercontent.jp", - "", - "", - "", - "kommunalforbund.se", - "barcelona.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.in", - "", - "", - "", - "", - "", - "", - "", - "s.bg", - "", - "", - "h\303\241bmer.no", - "", - "", - "", - "or.th", - "", - "", - "", - "gliding.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.hn", - "", - "", - "", - "", - "", - "coop.mw", - "", - "", - "oregon.museum", - "", - "", - "", - "", - "", - "", - "prd.fr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "alaheadju.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kunstunddesign.museum", - "", - "", - "", - "", - "va.it", - "", - "", - "", - "", - "indian.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "servegame.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "servebbs.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\303\270ystre-slidre.no", - "", - "name.tr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "openair.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "stufftoread.com", - "blogspot.pe", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kunst.museum", - "", - "", - "", - "", - "", - "", - "endoftheinternet.org", - "", - "", - "", - "", - "", - "", - "gov.zm", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "saitama.jp", - "", - "", - "", - "", - "", - "", - "", - "xs4all.space", - "suldal.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gov.zw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cloudns.info", - "", - "", - "", - "", - "", - "", - "", - "", - "law.pro", - "", - "", - "", - "", - "", - "", - "", - "org.gl", - "", - "", "stathelle.no", "", "", "", "", "", + "sanok.pl", "", "", "", "", "", "", + "2038.io", "", - "principe.st", "", "", "", - "org.ag", + "\303\241lt\303\241.no", + "mytabit.com", "", "", + "its.me", "", "", "", @@ -29251,435 +26076,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "aquarium.museum", - "aurskog-holand.no", - "net.mx", - "", - "", - "", - "", - "or.at", - "blogspot.pt", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "zoological.museum", - "", - "", - "", - "", - "", - "", - "pol.tr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "noip.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "stateofdelaware.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vc.it", - "", - "", - "sigdal.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "plc.uk", - "", - "", - "swidnik.pl", - "", - "", - "", - "surgeonshall.museum", - "", - "", - "", - "", - "", - "", - "webhop.info", - "varoy.no", - "", - "", - "", - "", - "", - "", - "valleeaoste.it", - "gouv.km", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "como.it", - "", - "", - "", - "ambulance.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "name.mk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "skjak.no", - "", - "", - "fuoisku.no", - "", - "mytis.ru", - "mer\303\245ker.no", - "lind\303\245s.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "aero.tt", - "space.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "coop.rw", - "", - "", - "vpnplus.to", - "", - "", - "", - "", - "", - "", - "mansion.museum", - "", - "", - "", - "off.ai", - "", - "", - "", - "myvnc.com", - "", - "", - "", - "", - "", - "", - "", - "stjordal.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "s\303\270r-varanger.no", - "", - "", - "", - "", - "", - "mo.cn", - "", - "", - "", - "dnsfor.me", - "", - "", - "", - "", - "", - "", - "", - "", - "serveftp.org", - "vladimir.ru", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "artanddesign.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "guovdageaidnu.no", - "", - "dyn-berlin.de", - "arts.ve", - "", - "", - "", - "vantaa.museum", - "", - "", - "", - "", - "", - "", - "", - "delmenhorst.museum", - "s\303\270rreisa.no", - "", - "", - "", - "", - "", - "civilaviation.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "prd.mg", - "", - "", - "engineer.aero", - "", - "", - "", - "", - "carrara-massa.it", - "", - "", - "", - "", - "", - "", - "", - "sklep.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "", "", "", @@ -29694,8 +26090,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "dep.no", "", "", + "enna.it", "", "", + "name.tj", "", "", "", @@ -29705,6 +26103,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "syncloud.it", "", "", "", @@ -29712,9 +26111,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "soc.dz", "", "", - "gouv.ci", + "nesseby.no", "", "", "", @@ -29722,19 +26122,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "engine.aero", "", "", "", "", "", - "mil.ae", "", "", "", "", "", - "museum.tt", "", "", "", @@ -29751,7 +26148,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "sanok.pl", "", "", "", @@ -29759,13 +26155,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "east-kazakhstan.su", "", "", "", "", + "konyvelo.hu", "", + "quicksytes.com", "", + "filegear-de.me", "", "", "", @@ -29773,10 +26171,288 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "krasnik.pl", + "", + "narvik.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kuron.jp", + "wellbeingzone.eu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.pl", + "", + "", + "", + "nedre-eiker.no", + "", + "alta.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "avocat.pro", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "s\303\270rreisa.no", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.qa", + "", + "", + "", + "", + "", + "", + "", + "bozen-sudtirol.it", + "vladimir.ru", + "", + "int.mv", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sc.tz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "streamlitapp.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ddnsking.com", + "", + "holtalen.no", + "", + "", + "", + "", + "name.jo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "simplesite.pl", + "", + "", + "int.cv", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bulsan-suedtirol.it", + "", + "", + "", + "", + "", + "forlicesena.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "schokokeks.net", + "", + "", + "", + "id.us", "vgs.no", "", "", - "pyatigorsk.ru", + "appspacehosted.com", "", "", "", @@ -29790,15 +26466,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "uni5.net", "", "", "", + "bib.ve", "", "", "", "", - "plc.ly", "", "", "", @@ -29807,7 +26482,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "merseine.nu", "", "", "", @@ -29817,6 +26491,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "catania.it", "", "", "", @@ -29825,18 +26500,23 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "avocat.fr", + "12hp.de", "", "", "", + "ybo.faith", "", "", "", + "dscloud.me", "", "", "", "", "", "", + "gamvik.no", "", "", "", @@ -29858,11 +26538,1362 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "per.sg", "", "", "", - "encyclopedic.museum", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "skydiving.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gujarat.in", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "firm.ve", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vercelli.it", + "", + "", + "", + "sex.hu", + "", + "kongsberg.no", + "", + "", + "", + "", + "", + "", + "", + "", + "federation.aero", + "", + "", + "", + "", + "", + "cantho.vn", + "", + "", + "", + "", + "", + "", + "", + "voorloper.cloud", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogdns.org", + "", + "", + "", + "", + "", + "", + "", + "", + "in.net", + "", + "", + "freebox-os.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "uzhgorod.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "dielddanuorri.no", + "", + "", + "mintere.site", + "equipment.aero", + "", + "\303\270vre-eiker.no", + "", + "", + "", + "", + "", + "", + "", + "bolivia.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "skjerv\303\270y.no", + "easypanel.host", + "", + "", + "volda.no", + "salerno.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "in.ni", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "odo.br", + "", + "", + "", + "me.in", + "", + "", + "", + "hs.zone", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "valleeaoste.it", + "", + "", + "", + "", + "", + "", + "", + "\303\270rsta.no", + "", + "", + "", + "", + "gialai.vn", + "", + "gorizia.it", + "", + "", + "app.gp", + "", + "", + "", + "mil.qa", + "campobasso.it", + "", + "", + "", + "mo-siemens.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hungyen.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cloudjiffy.net", + "", + "", + "dr.in", + "", + "", + "cheap.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ind.in", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "coop.in", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "freebox-os.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "azure-mobile.net", + "", + "", + "", + "", + "", + "", + "", + "or.cr", + "", + "", + "ngrok.app", + "", + "", + "", + "", + "vinnica.ua", + "", + "binhphuoc.vn", + "int.ar", + "", + "", + "guovdageaidnu.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dienbien.vn", + "", + "", + "mydobiss.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nowaruda.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "abkhazia.su", + "", + "", + "", + "", + "cloudns.biz", + "", + "", + "", + "", + "", + "andriabarlettatrani.it", + "", + "", + "", + "", + "", + "", + "knowsitall.info", + "", + "", + "neko.am", + "kurgan.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kl\303\246bu.no", + "", + "", + "", + "", + "int.in", + "", + "", + "", + "", + "", + "", + "ind.gt", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "asso.ht", + "", + "", + "carraramassa.it", + "", + "", + "", + "jambyl.su", + "", + "", + "", + "", + "", + "", + "", + "name.pr", + "", + "", + "", + "", + "", + "", + "balsan.it", + "", + "", + "", + "", + "", + "", + "authgearapps.com", + "", + "mil.al", + "", + "", + "", + "", + "mil.hn", + "", + "", + "", + "", + "", + "", + "emergency.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "geometre-expert.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "myspreadshop.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "game.tw", + "", + "", + "", + "myspreadshop.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "jeez.jp", + "", + "kaas.gg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "clickrising.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mcdir.me", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dyroy.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "myspreadshop.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "myspreadshop.dk", + "", + "", + "", + "", + "", + "grozny.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ciscofreak.com", + "", + "auto.pl", + "com.mx", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mb.ca", + "", + "", + "", + "sumy.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "myspreadshop.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blog.kg", + "", + "oy.lc", + "", + "", + "", + "", + "", + "edu.mx", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hjartdal.no", + "musician.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "myspreadshop.se", + "", + "", + "", + "bievat.no", + "", + "", + "asso.re", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "or.mu", + "", + "", + "sunnyday.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vaksdal.no", + "mordovia.ru", + "basicserver.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "architectes.bj", + "", + "wmcloud.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "own.pm", + "", + "", + "", + "", + "", + "ullensaker.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "1337.pictures", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "180r.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hanam.vn", + "h.bg", + "", + "", + "", + "", + "", + "", + "", + "", + "myftp.org", + "in.us", + "", + "nesset.no", + "", + "", + "coop.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "servebeer.com", + "", + "", + "", + "", + "", + "", + "", + "", + "ind.tn", + "gentlentapis.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "skien.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "honefoss.no", + "", + "", + "", + "", + "", + "", + "", + "", + "dagestan.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "haiduong.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "beagleboard.io", + "", + "", + "campidano-medio.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "12hp.ch", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "h\303\241mm\303\241rfeasta.no", + "", + "", + "biz.bb", + "", + "", + "", + "", + "", + "", + "", + "", + "fashionstore.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "divtasvuodna.no", + "dynserv.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "net.mx", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "name.eg", + "", + "", + "", + "", + "", + "", + "backan.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "aus.basketball", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "", "", "", @@ -29874,10 +27905,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "vologda.su", "", - "flynnhosting.net", "", "", - "me.ke", "", "", "", @@ -29886,16 +27915,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ha.no", - "org.pf", "", + "bolzano-altoadige.it", "", "", "", + "\303\245rdal.no", "", "", "", - "drobak.no", "", "", "", @@ -29905,11 +27933,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "m\303\241latvuopmi.no", "", "", + "zhytomyr.ua", "", "", "", + "evenes.no", "", "", "", @@ -29918,12 +27949,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "med.ht", "", "", + "movimiento.bo", + "friulive-giulia.it", "", "", + "heavy.jp", + "vf.no", "", + "her.jp", "", "", "", @@ -29932,20 +27967,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "webhop.net", - "mil.tr", "", "", "", "", - "ltd.ua", "", "", + "supabase.co", "", "", "", "", + "!www.ck", "", + "blogspot.cz", "", "", "", @@ -29953,6 +27988,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "bedzin.pl", "", "", "", @@ -29960,6 +27996,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "beep.pl", "", "", "", @@ -29967,7 +28004,1412 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "stj\303\270rdalshalsen.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "binhdinh.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sex.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "balsan-s\303\274dtirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "supabase.net", + "", + "", + "", + "", + "mil.sh", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "myeffect.net", + "", + "", + "", + "", + "", + "", + "", + "", + "org.mu", + "", + "", + "", + "", + "org.so", + "", + "", + "", + "", + "org.mo", + "", + "", + "", + "", + "", + "", + "", + "vladimir.su", + "", + "org.bo", + "", + "", + "", + "", + "org.sd", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "exchange.aero", + "", + "", + "", + "noor.jp", + "", + "", + "", + "", + "", + "org.bm", + "", + "", + "", + "", + "org.km", + "", + "", + "", + "ie.ua", + "", + "gaular.no", + "", + "", + "copro.uk", + "civilaviation.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.mw", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.bw", + "", + "", + "", + "", + "org.kw", + "", + "123website.lu", + "org.im", + "", + "", + "", + "", + "", + "", + "v.ua", + "", + "", + "", + "", + "", + "", + "", + "mosjoen.no", + "", + "", + "", + "", + "", + "", + "dev.vu", + "", + "", + "", + "", + "org.cu", + "org.mk", + "", + "", + "", + "endofinternet.net", + "", + "", + "", + "", + "org.co", + "utazas.hu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "arte.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.uk", + "", + "", + "", + "", + "", + "", + "", + "", + "org.ru", + "", + "", + "", + "", + "", + "", + "", + "", + "serveminecraft.net", + "org.ro", + "", + "", + "", + "", + "", + "", + "", + "daejeon.kr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "virtualuser.de", + "", + "", + "", + "", + "", + "", + "org.cw", + "", + "skjak.no", + "web.ni", + "", + "", + "", + "", + "", + "co.events", + "", + "", + "cloud66.ws", + "", + "", + "", + "", + "", + "", + "", + "org.sy", + "", + "name.vn", + "", + "", + "org.my", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sx.cn", + "", + "org.ky", + "sosnowiec.pl", + "", + "", + "filegear-ie.me", + "org.rw", + "", + "", + "", + "", + "org.se", + "", + "", + "", + "", + "org.me", + "", + "", + "", + "", + "org.uy", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "in.na", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.bg", + "varggat.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kumamoto.jp", + "", + "", + "xnbay.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "barsyonline.com", + "", + "", + "", + "", + "", + "", + "", + "blogspot.cf", + "", + "lk3.ru", + "gdansk.pl", + "org.st", + "", + "", + "", + "", + "org.mt", + "", + "", + "", + "", + "org.cy", + "", + "123website.be", + "", + "", + "org.bt", + "", + "", + "", + "", + "", + "homelink.one", + "", + "", + "", + "", + "association.aero", + "", + "", + "", + "", + "", + "boxfuse.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "edugit.io", + "", + "", + "", + "", + "", + "", + "", + "wnext.app", + "", + "", + "", + "", + "chambagri.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cable-modem.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "or.ci", + "", + "", + "mil.ph", + "", + "", + "", + "", + "", + "ltd.uk", + "", + "", + "gdynia.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "filegear-au.me", + "", + "", + "", + "", + "", + "", + "", + "boomla.net", + "", + "", + "", + "", + "", + "", + "", + "", + "drud.io", + "", + "", + "", + "", + "", + "", + "", + "homelinux.com", + "", + "futuremailing.at", + "", + "", + "", + "", + "", + "myspreadshop.be", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "iveland.no", + "", + "", + "", + "", + "", + "ecommerce-shop.pl", + "", + "", + "", + "", + "", + "jele.host", + "", + "lom.it", + "cloudns.asia", + "", + "", + "", + "sor-varanger.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "githubpreview.dev", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "teo.br", + "", + "asso.km", + "gotpantheon.com", + "", + "hb.cn", + "org.pk", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "barsy.shop", + "", + "", + "", + "", + "", + "", + "", + "", + "eigersund.no", + "", + "", + "", + "", + "homelinux.net", + "", + "", + "", + "", + "", + "freeddns.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ltd.cy", + "alpha-myqnapcloud.com", + "", + "", + "", + "", + "", + "", + "", + "free.hr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tm.cy", + "", + "bamble.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "wixsite.com", + "", + "", + "", + "", + "", + "", + "", + "my-router.de", + "", + "", + "", + "", + "", + "org.py", + "", + "", + "", + "aerodrome.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "championship.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.pe", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dedyn.io", + "", + "", + "", + "", + "zagan.pl", + "", + "", + "", + "", + "", + "", + "mydissent.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sells-for-less.com", + "", + "", + "", + "", + "asso.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "name.ng", + "", + "", + "", + "", + "virtual-user.de", + "", + "", + "", + "", + "", + "", + "", + "kraanghke.no", + "", + "", + "", + "", + "", + "", + "", + "santoandre.br", + "jondal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "org.pt", + "", + "", + "", + "", + "org.to", + "", + "", + "", + "", + "mypep.link", + "", + "", + "", + "", + "", + "", + "name.qa", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.tm", + "", + "", + "", + "", + "verbania.it", + "", + "", + "", + "", + "", + "", + "", + "", + "eu.ax", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ok.us", + "", + "", + "", + "balsan-suedtirol.it", + "", + "", + "", + "org.tw", + "", + "", + "", + "ia.us", + "", + "", + "mein-vigor.de", + "", + "", + "", + "", + "", + "frenchkiss.jp", + "", + "", + "", + "", + "", + "", + "schulserver.de", + "", + "", + "", + "xy.ax", + "", + "", + "", + "", + "", + "tur.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trd.br", + "", + "friuliveneziagiulia.it", + "", + "", + "", + "drud.us", + "", + "", + "", + "", + "", + "", + "", + "tv.tr", + "org.br", + "", + "budejju.no", + "", + "", + "", + "", + "", + "", + "azurewebsites.net", + "", + "", + "kvinnherad.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gwangju.kr", + "", + "in.rs", + "", + "", + "", + "", + "", + "", + "", + "", + "org.ir", + "", + "", + "", + "", + "", + "", + "", + "dnsdojo.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "klodzko.pl", + "", + "valledaosta.it", + "", + "", + "", + "", + "", + "", + "gov.sx", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.ng", + "", + "", + "", + "", + "h\303\270yanger.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mjondalen.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "int.ci", + "", + "", + "", + "in.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sopot.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "asso.ci", + "", + "\303\245snes.no", + "", + "", + "", + "", + "", + "", + "123paginaweb.pt", + "", + "", + "", + "", + "\347\266\262\350\267\257.tw", + "", + "", + "", + "", + "", + "troms\303\270.no", + "gov.cx", + "", + "", + "", + "ninhthuan.vn", + "org.tt", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "namaste.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "diskstation.eu", + "vindafjord.no", + "", + "", + "oya.to", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mordovia.su", + "upli.io", + "", + "", + "gjovik.no", + "", + "", + "", + "", + "", + "int.is", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "d.bg", + "id.ir", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "davvesiida.no", + "", + "", + "", + "", + "org.ye", + "org.lk", + "", + "it.com", + "or.us", + "", + "", + "", + "web.tj", "salat.no", + "e12.ve", + "", + "", + "", + "", + "moo.jp", "", "", "", @@ -29979,15 +29421,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "operaunite.com", - "", - "", - "", - "", - "", - "", - "", - "puglia.it", "", "", "", @@ -29997,44 +29430,1852 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "algard.no", "", "", "", "", "", - "goip.de", "", "", "", - "fortal.br", "", "", "", - "web.nf", + "myspreadshop.fr", + "", + "", + "", + "na4u.ru", + "", + "", + "", + "", + "", + "", + "", + "", + "org.au", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hacca.jp", + "", + "", + "", + "", + "", + "", + "", + "agro.pl", + "", + "", + "", + "", + "", + "org.yt", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.am", + "", + "store.nf", + "", + "", + "org.ly", + "", + "", + "agents.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.pr", + "wedeploy.sh", + "", + "myftp.biz", + "boleslawiec.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "freedesktop.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "shopware.store", + "cesenaforli.it", + "", + "dnsdojo.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "flier.jp", + "", + "", + "", + "media.pl", + "", + "", + "", + "", + "", + "khmelnytskyi.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "wroc.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "swinoujscie.pl", + "il.us", + "", + "", + "", + "", + "", + "m.bg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "valle.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "firm.ng", + "", + "idv.hk", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ltd.lk", + "", + "", + "", + "myamaze.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.ae", + "", + "", + "moss.no", + "im.it", + "", + "", + "", + "", + "", + "", + "", + "myspreadshop.ca", + "", + "", + "", + "", + "couchpotatofries.org", + "", + "", + "org.sv", + "", + "", + "", + "", + "org.mv", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "karasjohka.no", + "", + "tm.se", + "ong.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "asso.mc", + "", + "", + "", + "", + "", + "minisite.ms", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "be.ax", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "khakassia.su", + "", + "", + "", + "", + "mb.it", + "", + "", + "binhthuan.vn", + "", + "andebu.no", + "stargard.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "club.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "org.tr", + "", + "", + "", + "", + "org.cv", + "", + "", + "", + "tm.hu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.ba", + "", + "", + "", + "or.bi", + "", + "", + "", + "", + "", + "", + "sorocaba.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.ni", + "", + "", + "", + "for-some.biz", + "", + "", + "", + "", + "", + "", + "avoues.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "democracia.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "myiphost.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "synology-ds.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "static-access.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sakura.tv", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dnshome.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tec.br", + "hamar.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blog.br", + "", + "", + "", + "", + "blog.bo", + "", + "", + "", + "", + "", + "", + "", + "webspace.rocks", + "", + "", + "", + "", + "", + "azerbaijan.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tm.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "or.kr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "os\303\270yro.no", + "", + "agro.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tysv\303\246r.no", + "monzaebrianza.it", + "", + "", + "", + "", + "", + "", + "mantova.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "filegear-sg.me", + "", + "", + "", + "", + "", + "", + "", + "", + "gjerdrum.no", + "", + "", + "", + "", + "", + "sinaapp.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "or.na", + "org.ee", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.lr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "wafflecell.com", + "akamai.net", + "", + "", + "", + "", + "", + "", + "", + "video.hu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "univ.bj", + "", + "", + "", + "", + "", + "", + "", + "blogsite.xyz", + "", + "", + "", + "", + "tm.mc", + "", + "", + "", + "shop.ro", + "", + "", + "news.hu", + "", + "", + "", + "", + "baidar.no", + "", + "", + "", + "", + "", + "", + "", + "", + "org.et", + "", + "", + "", + "", + "", + "", + "", + "", + "hopto.me", + "", + "", + "", + "", + "", + "mil.gh", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "meloy.no", + "", + "", + "", + "", + "org.gu", + "", + "", + "wroclaw.pl", + "", + "", + "haram.no", + "", + "", + "", + "", + "", + "", + "", + "", + "moskenes.no", + "", + "", + "", + "", + "", + "", + "myspreadshop.nl", + "tos.it", + "", + "", + "", + "edgekey-staging.net", + "", + "", + "", + "", + "vald-aosta.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tur.ar", + "", + "v\303\246r\303\270y.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.ar", + "", + "", + "myds.me", + "", + "", + "", + "", + "", + "", + "\347\256\207\344\272\272.hk", + "", + "", + "", + "tv.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vevelstad.no", + "", + "", + "", + "", + "", + "", + "", + "", + "org.sn", + "", + "", + "forl\303\254-cesena.it", + "", + "org.mn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.bn", + "", + "", + "", + "", + "org.kn", + "", + "zakopane.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ebiz.tw", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.in", + "", + "", + "", + "", + "", + "", + "", + "", + "firm.in", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "inf.cu", + "inf.mk", + "", + "", + "", + "nobushi.jp", + "", + "", + "", + "", + "cesenaforl\303\254.it", + "", + "", + "", + "", + "", + "", + "", + "", + "io.kg", + "", + "", + "", + "", + "", + "org.gy", + "", + "", + "", + "od.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "firm.nf", + "", + "", + "", + "", + "", + "org.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.ge", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hotel.tz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mediatech.dev", + "", + "", + "", + "", + "", + "", + "dyn-o-saur.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "io.vn", + "", + "", + "", + "", + "modalen.no", + "", + "", + "", + "", + "id.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sells-for-u.com", + "", + "i.ng", + "", + "", + "", + "", + "", + "", + "org.gt", + "", + "", + "vadso.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "beskidy.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "draydns.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "stuff-4-sale.us", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.mz", + "", + "", + "", + "", + "cnpy.gdn", + "for-the.biz", + "", + "", + "", + "bulsan-sudtirol.it", + "", + "", + "", + "", + "mil.kz", + "", + "", + "", + "base.ec", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "aejrie.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nord-aurdal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "nz.basketball", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.sc", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tm.km", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fusa.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "chungbuk.kr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "store.bb", + "homelinux.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "on.ca", + "org.lv", + "", + "", + "", + "", + "org.pn", + "dray-dns.de", + "", + "upper.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "asso.nc", + "", + "naples.it", + "", + "", + "", + "", + "conf.au", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tc.br", + "", + "", + "", + "", + "", + "", + "", + "bronnoysund.no", + "", + "shiftcrypto.dev", + "int.la", + "stufftoread.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "katowice.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "unicloud.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "masfjorden.no", + "", + "", + "", + "", + "", + "", + "", + "mil.tj", + "", + "", + "", + "", + "log.br", + "", + "medecin.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fakefur.jp", + "", + "kaluga.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "verse.jp", + "", + "", + "", + "", + "donetsk.ua", + "", + "", + "hopto.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "freeddns.us", + "", + "", + "", + "", + "", + "", + "buyshouses.net", + "", + "vardo.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fuel.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "novara.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "or.ug", + "leg.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hicam.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "daknong.vn", + "", + "", + "", + "", + "", + "supabase.in", + "", + "", + "", + "", + "mil.iq", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "myspreadshop.fi", + "", + "", + "", + "ddns.me", + "", + "broker.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "gotdns.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.zm", + "", + "", + "", + "", + "hostyhosting.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "worse-than.tv", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.zw", + "", + "org.tn", + "", + "", + "", + "", + "org.gr", + "", "", "", "arezzo.it", "", "", + "akamaiedge-staging.net", "", "", "", "", "", - "moscow.museum", "", - "wpcomstaging.com", "", "", "", "", + "miniserver.com", "", "", - "ashgabad.su", "", - "sicilia.it", - "from-me.org", "", "", "", @@ -30046,12 +31287,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "dsmynas.com", + "es.ax", "", "", "", "", + "fool.jp", "", - "coop.km", "", "", "", @@ -30061,11 +31304,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "org.jo", + "fauske.no", "", "", "", "", "", + "vpnplus.to", "", "", "", @@ -30076,7 +31322,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "sex.hu", "", "", "", @@ -30088,8 +31333,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "uber.space", "", - "museum.om", "", "", "", @@ -30099,6 +31344,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "inf.br", "", "", "", @@ -30109,6 +31355,896 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "inc.hk", + "", + "", + "", + "", + "", + "", + "", + "us.ax", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "here-for-more.info", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "alvdal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "or.ke", + "cat.ax", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "medecin.km", + "", + "", + "", + "mx.na", + "", + "", + "", + "virtualserver.io", + "", + "", + "", + "", + "", + "", + "", + "h\303\244kkinen.fi", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bergen.no", + "", + "", + "", + "", + "", + "", + "", + "dnsiskinky.com", + "", + "", + "", + "", + "", + "", + "", + "myspreadshop.ch", + "", + "ham-radio-op.net", + "", + "cutegirl.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "friulivegiulia.it", + "", + "", + "", + "", + "", + "mil.tz", + "", + "", + "", + "", + "endofinternet.org", + "dynalias.com", + "", + "", + "", + "org.je", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "me.tz", + "", + "", + "dreamhosters.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bindal.no", + "", + "", + "", + "", + "", + "", + "", + "swidnik.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-leet.com", + "", + "", + "", + "", + "web.ve", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "611.to", + "", + "", + "", + "", + "bookonline.app", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dynalias.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "s\303\241lat.no", + "akamai-staging.net", + "", + "", + "", + "", + "i.se", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dsmynas.net", + "", + "", + "", + "", + "", + "gg.ax", + "", + "", + "", + "", + "", + "macerata.it", + "", + "vaapste.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tsk.tr", + "", + "mie.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.sg", + "", + "", + "", + "", + "org.mg", + "", + "azurestaticapps.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.kg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.ug", + "", + "", + "", + "", + "terni.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hammarfeasta.no", + "", + "", + "", + "", + "", + "", + "", + "", + "name.fj", + "v.bg", + "", + "", + "", + "iz.hr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "forgeblocks.com", + "", + "", + "", + "", + "", + "", + "", + "schulplattform.de", + "", + "", + "square7.ch", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kikirara.jp", + "", + "", + "", + "masoy.no", + "", + "", + "", + "", + "inder\303\270y.no", + "", + "", + "", + "", + "", + "", + "", + "", + "noticeable.news", + "", + "", + "", + "", + "tn.us", + "", + "", + "", + "", + "", + "", + "", + "mircloud.ru", + "", + "instantcloud.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ing.pa", + "guam.gu", + "", + "", + "", + "", + "", + "", + "voss.no", + "", + "", + "", + "veg\303\245rshei.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ath.cx", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "averoy.no", + "org.lc", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fldrv.com", + "", + "", + "is.it", + "", + "", + "", + "", + "shiga.jp", + "", + "omg.lol", + "", + "hagiang.vn", + "", + "", + "", + "", + "", + "", + "", + "aseral.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vb.it", + "", + "", + "hobby-site.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "herokuapp.com", + "", + "", + "valleaosta.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.az", + "", + "", + "", + "mysecuritycamera.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ts.net", + "kill.jp", + "", + "", + "", + "tonsberg.no", + "", + "", + "", + "", + "", + "", + "workinggroup.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "deporte.bo", + "", + "", + "", + "", + "", + "", + "", + "org.ac", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "skygearapp.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "misconfused.org", + "", + "", + "", + "mypsx.net", + "", + "", + "", + "", + "", + "", + "", + "", + "varoy.no", + "law.pro", + "", + "", + "mysecuritycamera.net", + "", + "", + "", + "", + "", + "", "", "", "", @@ -30148,10 +32284,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "mydrobo.com", "", + "or.pw", "", "", "", + "nghean.vn", "", "", "", @@ -30160,7 +32299,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "mein-iserv.de", "", "", "", @@ -30179,7 +32317,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "furniture.museum", "", "", "", @@ -30195,10 +32332,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "sarpsborg.no", + "foggia.it", + "haugiang.vn", "", - "servepics.com", "", "", + "tv.na", "", "", "", @@ -30213,14 +32353,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "salerno.it", - "vevelstad.no", - "altoadige.it", - "go-vip.net", "", "", "", - "servep2p.com", "", "", "", @@ -30232,6 +32367,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "uh-oh.jp", + "ot.it", "", "", "", @@ -30250,12 +32387,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "donostia.museum", "", + "sub.jp", "", "", "", - "cloud66.ws", "", "", "", @@ -30267,15 +32403,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "kh.ua", "", "", "", "", "", "", - "mil.iq", "", + "nohost.me", "", "", "", @@ -30286,43 +32421,52 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "web.tj", "", "", "", "", + "glitch.me", + "name.my", "", "", "", "", "", + "hra.health", "", "", "", "", "", + "streamlit.app", "", "", "", "", "", + "akamaized.net", + "frosta.no", "", + "hlx.live", "", "", + "edgesuite-staging.net", "", "", - "usculture.museum", "", "", "", "", "", "", + "spjelkavik.no", + "umbria.it", "", - "name.mv", "", + "te.ua", "", "", + "s\303\270rum.no", "", "", "", @@ -30336,15 +32480,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "dynalias.org", "", "", - "med.ec", - "hurum.no", "", + "lel.br", + "vps-host.net", "", "", "", "", + "fr\303\270ya.no", "", "", "", @@ -30355,35 +32501,33 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "sx.cn", - "preservation.museum", "", "", "", + "org.fm", "", "", "", "", "", - "capebreton.museum", "", - "org.sh", - "leg.br", "", + "webhosting.be", "", "", "", + "diskstation.org", "", "", "", "", "", + "appspaceusercontent.com", "", "", "", + "fh-muenster.io", "", - "org.bh", - "12hp.at", "", "", "", @@ -30394,16 +32538,1054 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "cuiaba.br", "", + "org.bi", "", "", + "shiftcrypto.io", + "", + "org.ki", + "", + "", + "", + "", + "prd.km", + "", + "ddns.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bozen-s\303\274dtirol.it", + "123homepage.it", + "", + "", + "", + "", + "tm.za", + "", + "", + "", + "", + "", + "", + "in-butter.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lind\303\245s.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ftpaccess.cc", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.ec", + "", + "", + "", + "", + "", + "", + "", + "evje-og-hornnes.no", + "", + "org.gn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.ci", + "", + "", + "", + "", + "", + "", + "", + "lig.it", + "or.it", + "", + "", + "", + "", + "maringa.br", + "", + "", + "", + "", + "", + "", + "", + "myspreadshop.ie", + "", + "fitjar.no", + "tel.tr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "myforum.community", + "", + "", + "", + "", + "", + "", + "", + "cieszyn.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "myspreadshop.it", + "", + "", + "", + "", + "gotdns.com", + "", + "", + "", + "", + "", + "", + "andria-trani-barletta.it", + "org.ss", + "", + "", + "", + "", + "org.ms", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.bs", + "", + "", + "", + "name.pm", + "", + "", + "", + "hagebostad.no", + "asso.gp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "jellybean.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.is", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "huissier-justice.fr", + "", + "", + "", + "", + "", + "", + "tm.mg", + "", + "", + "", + "", + "aquila.it", + "", + "", + "", + "", + "\345\276\263\345\263\266.jp", + "", + "id.au", + "", + "", + "firewalledreplit.co", + "pro.cy", + "", + "", + "", + "\345\263\266\346\240\271.jp", + "", + "", + "", + "", + "\345\261\261\345\275\242.jp", + "org.ws", + "", + "", + "", + "\345\261\261\346\242\250.jp", + "", + "", + "", + "", + "\345\261\261\345\217\243.jp", + "", + "nyaa.am", + "", + "", + "\345\256\256\345\264\216.jp", + "isa.us", + "", + "", + "", + "\345\262\241\345\261\261.jp", + "", + "", + "\330\247\333\214\330\261\330\247\331\206.ir", + "", + "\351\263\245\345\217\226.jp", + "", + "", + "", + "khplay.nl", + "\345\257\214\345\261\261.jp", + "", + "", + "", + "", + "\347\276\244\351\246\254.jp", + "", + "", + "", + "", + "\345\256\256\345\237\216.jp", + "", + "", + "", + "", + "\347\246\217\344\272\225.jp", + "", + "", + "", + "", + "\347\246\217\345\263\266.jp", + "", + "", + "", + "", + "\346\235\261\344\272\254.jp", + "", + "", + "", + "", + "\351\225\267\345\264\216.jp", + "org.rs", + "", + "", + "", + "\346\226\260\346\275\237.jp", + "backplaneapp.io", + "blogspot.jp", + "vix.br", + "", + "\345\244\247\351\230\252.jp", + "", + "", + "or.jp", + "", + "\347\246\217\345\262\241.jp", + "", + "", + "", + "", + "cooperativa.bo", + "", + "", + "", + "", + "\345\262\220\351\230\234.jp", + "", + "", + "", + "", + "\344\272\254\351\203\275.jp", + "", + "", + "", + "", + "\345\205\265\345\272\253.jp", + "", + "", + "", + "", + "\351\246\231\345\267\235.jp", + "", + "", + "", + "hosp.uk", + "to.gt", + "", + "", + "", + "skanit.no", + "\347\237\263\345\267\235.jp", + "", + "", + "", + "", + "\351\235\231\345\262\241.jp", + "", + "", + "", + "nombre.bo", + "\351\253\230\347\237\245.jp", + "", + "", + "", + "", + "\351\235\222\346\243\256.jp", + "", + "", + "", + "", + "\350\214\250\345\237\216.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "valle-aosta.it", + "", + "", + "", + "money.bj", + "", + "", + "", + "", + "", + "doomdns.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "urbinopesaro.it", + "", + "", + "", + "", + "", + "", + "", + "the.br", + "jele.site", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "undo.jp", + "", + "", + "", + "", + "\345\237\274\347\216\211.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "capoo.jp", + "flap.id", + "", + "", + "", + "est-le-patron.com", + "", + "", + "", + "", + "", + "", + "", + "lt.ua", + "org.ag", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "author.aero", + "", + "", + "\351\225\267\351\207\216.jp", + "", + "", + "", + "", + "", + "", + "gotdns.ch", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mykolaiv.ua", + "massacarrara.it", + "", + "", + "", + "taa.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "catanzaro.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tt.im", + "org.ps", + "", + "mil.ve", + "", + "", + "", + "nysa.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ufcfan.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "homesklep.pl", + "", + "", + "", + "org.sa", + "", + "", + "", + "", + "org.ma", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.ba", + "", + "", + "", + "", + "", + "", + "", + "", + "\344\270\211\351\207\215.jp", + "", + "", + "", + "", + "jcloud.kz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.ua", + "", + "", + "", + "oh.us", + "", + "", + "dynu.net", + "", + "", + "", + "", + "", + "", + "groks-the.info", + "", + "", + "", + "", + "", + "", + "mazeplay.com", + "fastly-edge.com", + "", + "", + "", + "", + "", + "", + "", + "balsan-sudtirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "h\303\270ylandet.no", + "", + "", + "", + "", + "", + "", + "", + "lenug.su", + "", + "", + "", + "", + "clerkstage.app", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vaporcloud.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "chieti.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sula.no", + "", + "", + "", + "", + "sola.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "wloclawek.pl", + "", + "", + "", + "", + "homebuilt.aero", + "", + "jaworzno.pl", + "pro.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "clan.rip", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dongnai.vn", + "", + "", + "", + "", + "tm.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "jele.club", + "", + "org.hu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rec.co", + "", + "", + "", + "", + "sellfy.store", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fnwk.site", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rdv.to", + "utsira.no", + "", + "", + "", + "", + "", + "", + "", + "", + "rec.ro", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ltd.ua", + "", "", "", "", "", "", - "pvh.br", "", "", "", @@ -30417,8 +33599,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "assn.lk", + "skodje.no", "", + "stokke.no", + "pmn.it", "", "", "", @@ -30433,11 +33617,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "hu.net", "", "", "", - "ddnsfree.com", "", "", "", @@ -30448,23 +33630,21 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "cosenza.it", "", "", "", "", "", + "org.hk", "", "", - "divttasvuotna.no", "", "", - "ullensaker.no", "", - "bplaced.com", "", "", "", + "pro.tt", "", "", "", @@ -30472,13 +33652,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "aver\303\270y.no", "", "", "", "", "", "", + "suwalki.pl", "", "", "", @@ -30491,29 +33671,33 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ullensvang.no", + "dynamisches-dns.de", "", "", "", "", "", "", - "mil.ar", "", + "red.sv", "", "", "", + "web.app", "", "", "", "", "", + "from-az.net", + "bolt.hu", "", "", "", "", "", "", + "dongthap.vn", "", "", "", @@ -30530,7 +33714,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "myasustor.com", + "org.pa", "", "", "", @@ -30547,12 +33731,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "int.tj", "", "", "", "", "", + "of.je", "", "", "", @@ -30564,12 +33748,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "berg.no", "", - "monmouth.museum", "", + "dnsalias.com", "", "", + "lv.ua", "", "", "", @@ -30577,7 +33761,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "sologne.museum", + "imb.br", "", "", "", @@ -30588,35 +33772,34 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "vinhlong.vn", "", "", "", "", "", - "edu.za", "", "", "", + "development.run", "", "", "", "", "", - "rade.no", "", "", "", + "herad.no", "", "", "", "", - "s\303\270gne.no", "", "", "", "", "", - "fylkesbibl.no", "", "", "", @@ -30631,61 +33814,67 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "encr.app", "", "", "", "", "", - "murmansk.su", "", "", "", "", + "trogstad.no", "", "", "", "", "", "", + "sn\303\245sa.no", "", "", "", - "afjord.no", "", "", "", "", + "tas.au", "", "", "", + "tv.im", "", + "dnsalias.net", + "sm\303\270la.no", "", "", "", "", + "press.se", "", "", "", "", "", - "cooperativa.bo", - "sund.no", "", "", + "org.ht", "", "", "", "", "", + "ketrzyn.pl", "", "", "", "", "", "", - "pro.ht", "", "", + "org.ai", "", "", "", @@ -30694,766 +33883,23 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "press.museum", "", "", "", "", "", "", + "pro.pr", "", "", "", + "mysecuritycamera.org", "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "divtasvuodna.no", - "", - "", - "virtualserver.io", - "blogspot.sg", - "", - "", - "", - "", - "", - "", - "", - "", - "plo.ps", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "servebbs.org", - "mil.mv", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nom.za", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "monza.it", - "", - "org.al", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-leet.com", - "", - "", - "", - "", - "", - "", - "mulhouse.museum", - "", - "", - "kutno.pl", - "readthedocs.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cloudns.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lig.it", - "", - "", - "", - "", - "", - "ng.city", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "daplie.me", - "", - "", - "louvre.museum", - "", - "", - "madrid.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "sor-varanger.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "iglesiascarbonia.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.ph", - "", - "firm.ro", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "aero.mv", - "", - "", - "", - "", - "", - "", - "vladimir.su", - "", - "", - "insurance.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "fjaler.no", - "", - "", - "", - "", - "", - "", - "org.na", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "firm.dk", - "", - "", - "", - "", - "", - "", - "", - "bplaced.de", - "", - "", - "", - "", - "", - "", - "pp.ru", - "med.pl", - "", - "michigan.museum", - "dallas.museum", - "", - "", - "", - "", - "accident-prevention.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pro.ec", - "", - "", - "", - "", - "", - "", - "", - "", - "gyeonggi.kr", - "", - "", - "", - "", - "", - "", - "prato.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mosj\303\270en.no", - "", - "mil.ba", - "", - "in-butter.de", - "", - "", - "", - "", - "", - "", - "hi.us", - "", - "", - "vald-aosta.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lel.br", - "", - "", - "", - "", - "", - "reggioemilia.it", - "", - "decorativearts.museum", - "", - "", - "9guacu.br", - "", - "", - "", - "", - "", - "", - "", - "broke-it.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "valledaosta.it", - "", - "", - "", - "", - "", - "kragero.no", - "", - "blogspot.nl", - "", - "balashov.su", - "", - "", - "", - "", - "", - "", - "", - "sch.ng", - "", - "", - "", - "", - "vefsn.no", - "", - "pc.pl", - "", - "", - "", - "", - "", - "safety.aero", - "", - "", - "", - "", - "", - "", - "museum.mv", - "", - "bplaced.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.jo", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "selfip.info", - "", - "", - "", - "kr\303\270dsherad.no", - "", - "", - "", - "myactivedirectory.com", - "", - "", - "", - "", - "", - "", - "", - "applinzi.com", - "", - "", - "molde.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "name.na", - "", - "", - "", - "net.za", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kvafjord.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "serveblog.net", - "santabarbara.museum", - "", - "", - "static.land", - "", - "", - "solund.no", - "", - "", - "skjerv\303\270y.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "salvador.br", - "", - "", - "", - "", - "", - "org.gh", - "", - "", - "", - "", - "cloudns.club", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "zone.id", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lenug.su", - "", - "", - "", - "", - "", - "", - "mo.it", - "", - "kafjord.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", + "shop.ht", + "fastvps.site", + "org.ls", "", "", "", @@ -31462,40 +33908,40 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "jogasz.hu", "", "", "", "", - "\303\270stre-toten.no", "", "", "", "", "", "", + "namsos.no", + "monza-brianza.it", + "shop.th", "", "", "", "", - "gov.sx", "", "", "", "", + "kirara.st", + "ltd.hk", "", - "ybo.faith", "", "", - "north-kazakhstan.su", "", "", "", "", "", "", - "v\303\245g\303\245.no", "", + "tr.no", "", "", "", @@ -31506,14 +33952,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "mex.com", + "if.ua", + "tayninh.vn", "", "", - "ibestad.no", - "ostroda.pl", "", "", "", - "ilovecollege.info", "", "", "", @@ -31524,9 +33970,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "flor\303\270.no", "", "", "", + "org.gg", "", "", "", @@ -31535,7 +33983,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "volkenkunde.museum", + "flekkefjord.no", "", "", "", @@ -31550,19 +33998,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "to.it", "", "", "", "", - "abkhazia.su", "", "", "", "", + "org.om", "", "", + "karmoy.no", "", - "podzone.net", "", "", "", @@ -31572,25 +34021,28 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ivanovo.su", "", + "og.it", "", "", "", + "kartuzy.pl", "", + "org.do", "", "", "", "", "", "", - "krager\303\270.no", "", - "savona.it", "", "", "", "", "", + "nordre-land.no", "", "", "", @@ -31602,19 +34054,22 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "pointto.us", "", + "knightpoint.systems", "", "", "", + "org.dm", "", "", "", + "barsy.club", "", "", "", "", "", + "akadns.net", "", "", "", @@ -31629,6 +34084,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ru.net", "", "", "", @@ -31637,6 +34093,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "fhsk.se", "", "", "", @@ -31647,39 +34104,35 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "protonet.io", "", - "bodo.no", "", + "bounty-full.com", "", "", "", - "kl\303\246bu.no", "", "", "", "", + "pro.mv", "", "", "", - "me.it", "", "", "", - "xnbay.com", "", "", + "te.it", "", "", "", "", "", - "no-ip.biz", "", "", "", "", - "nctu.me", "", "", "", @@ -31700,12 +34153,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "czeladz.pl", "", "", - "gov.cx", "", "", + "o.se", "", "", "", @@ -31714,12 +34166,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "hemne.no", "", "", "", "", - "friulivgiulia.it", "", "", "", @@ -31727,7 +34177,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "og.ao", "", "", "", @@ -31738,16 +34187,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "mil.lv", + "hepforge.org", "", "", "", "", "", "", - "catania.it", "", - "mt.it", "", "", "", @@ -31756,12 +34203,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "sarpsborg.no", "", "", "", "", "", + "of.by", "", "", "", @@ -31770,6 +34217,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "seidat.net", "", "", "", @@ -31781,11 +34229,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "frankfurt.museum", "", "", "", "", + "in.th", "", "", "", @@ -31794,16 +34242,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "riobranco.br", "", "", "", "", "", + "pug.it", "", "", "", - "mil.do", "", "", "", @@ -31811,28 +34258,172 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "serveminecraft.net", "", "", "", - "wloclawek.pl", "", "", + "cloudflare-ipfs.com", "", - "diskstation.eu", "", - "design.museum", "", "", - "wpdevcloud.com", "", "", - "pu.it", "", - "selfip.net", "", "", - "po.it", + "", + "vanylven.no", + "", + "", + "daa.jp", + "", + "", + "", + "torsken.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hole.no", + "", + "", + "", + "", + "", + "storebase.store", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "browsersafetymark.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rec.br", + "", + "", + "", + "", + "travinh.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "iki.fi", + "", + "lom.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vefsn.no", + "", + "", + "", + "", + "", + "", + "", + "", + "shimane.jp", + "", + "in-berlin.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.la", + "", + "", + "", + "", + "", + "", + "", + "", + "messwithdns.com", + "", + "", + "", + "", + "", "", "", "", @@ -31840,9 +34431,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "diskstation.me", + "zombie.jp", "", "", - "pd.it", "", "", "", @@ -31858,11 +34449,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "verbania.it", "", "", - "sex.pl", "", + "123webseite.at", "", "", "", @@ -31871,18 +34461,21 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lucca.it", "", + "tr.it", "", "", "", "", "", "", + "eero.online", "", - "project.museum", "", "", "", + "beardu.no", "", "", "", @@ -31891,15 +34484,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "org.sl", "", "", "", "", + "org.ml", "", "", "", "", - "holt\303\245len.no", "", "", "", @@ -31911,29 +34505,30 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "blog.vu", "", "", "", "", "", "", - "org.af", "", "", "", "", - "tm.hu", "", "", + "tv.sd", "", "", "", "", "", "", - "zakopane.pl", "", + "crap.jp", "", + "hyogo.jp", "", "", "", @@ -31944,12 +34539,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "mil.fj", "", "", "", "", "", "", + "k12.vi", + "org.il", "", "", "", @@ -31963,10 +34561,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ponpes.id", "", "", "", + "org.es", + "hobol.no", "", "", "", @@ -31977,60 +34576,58 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "snoasa.no", "", "", "", "", - "tm.cy", "", "", "", "", "", + "weblike.jp", + "123website.nl", "", - "conf.lv", "", "", "", "", "", "", + "abruzzo.it", "", - "web.app", "", "", - "donetsk.ua", + "lu.it", "", "", - "pokrovsk.su", "", "", + "lo.it", "", - "schokokeks.net", "", "", + "hattfjelldal.no", "", "", - "lesja.no", "", "", "", "", "", "", + "dnepropetrovsk.ua", "", "", "", "", - "name.qa", "", "", "", "", + "collegefan.org", "", "", - "no-ip.info", "", "", "", @@ -32044,21 +34641,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "stat.no", - "california.museum", - "nuremberg.museum", "", "", "", - "virginia.museum", "", - "pe.it", "", "", "", "", "", "", + "123kotisivu.fi", "", "", "", @@ -32072,22 +34665,22 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "countryestate.museum", + "bahcavuotna.no", "", "", "", "", - "cloudns.asia", "", "", "", - "misconfused.org", "", "", + "easypanel.app", "", "", "", "", + "dnsalias.org", "", "", "", @@ -32097,7 +34690,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "jeju.kr", "", "", "", @@ -32105,8 +34697,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "randaberg.no", "", + "to.md", + "tmp.br", "", "", "", @@ -32122,11 +34715,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "web.za", "", "", "", "", - "freedesktop.org", "", "", "", @@ -32135,15 +34728,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "le.it", "", "", - "pt.it", "", "", - "odessa.ua", "", + "org.gi", "", "", + "deno-staging.dev", "", "", "", @@ -32157,7 +34751,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "mp.br", "", "", "", @@ -32169,24 +34762,25 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "verran.no", "", "", "", "", "", - "wodzislaw.pl", "", + "lg.ua", "", + "torproject.net", "", - "asti.it", "", + "lt.it", "", "", "", "", "", "", + "homesecuritypc.com", "", "", "", @@ -32197,7 +34791,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "snaase.no", "", "", "", @@ -32211,15 +34804,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "cloudfunctions.net", "", "", "", + "tv.it", "", "", - "oksnes.no", "", "", - "dyndns.info", "", "", "", @@ -32230,13 +34823,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "rennebu.no", "", "", "", "", "", - "histoire.museum", "", "", "", @@ -32261,10 +34852,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "hl.no", "", "", - "cloudns.biz", "", "", "", @@ -32272,8 +34861,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ol.no", "", - "no-ip.ca", "", "", "", @@ -32283,15 +34872,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "media.hu", + "pro.in", "", "", "", "", "", "", - "applicationcloud.io", "", + "fosnes.no", "", "", "", @@ -32313,349 +34902,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "teo.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "i.bg", - "", - "", - "", - "", - "", - "valley.museum", - "", - "", - "gjerdrum.no", - "mysecuritycamera.com", - "", - "", - "", - "", - "lt.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "exchange.aero", - "", - "", - "botanical.museum", - "", - "", - "stargard.pl", - "", - "orkdal.no", - "", - "", - "", - "servehalflife.com", - "", - "", - "", - "", - "", - "", - "mn.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "iwate.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "troms\303\270.no", - "", - "", - "", - "netlify.com", - "pr.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kunstsammlung.museum", - "", - "", - "gwangju.kr", - "", - "mi.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sells-it.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mayfirst.info", - "kurgan.su", - "", - "", - "", - "", - "", - "", - "", - "ontario.museum", - "olecko.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.al", - "", - "", - "", - "", - "", - "", - "tm.no", - "", - "", - "", - "", - "l.se", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.sz", - "", - "", - "", - "", - "org.mz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.bz", - "", - "", - "", - "", - "org.kz", - "karmoy.no", - "", - "", - "", - "", - "", - "dynalias.net", - "", - "", - "", - "", - "", - "", - "", - "", - "cloudaccess.net", - "", - "", - "", - "org.uz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "otago.museum", - "davvesiida.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mysecuritycamera.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "", "", "", @@ -32679,40 +34925,36 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "flog.br", "", "", "", "", "", "", - "vix.br", "", "", "", "", - "bolivia.bo", "", "", - "tur.br", + "tw.cn", "", "", "", "", - "modern.museum", "", "", "", - "\346\240\203\346\234\250.jp", "", "", "", "", "", - "trd.br", "", "", "", - "\345\272\203\345\263\266.jp", + "org.pl", "", "", "", @@ -32720,15 +34962,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "development.run", "", "", "", "", "", "", - "repl.co", - "lom.no", + "homeftp.org", "", "", "", @@ -32738,17 +34978,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "qbuser.com", "", - "karacol.su", "", "", "", "", "", - "draydns.de", - "isernia.it", "", "", + "noop.app", "", "", "", @@ -32763,7 +35002,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "hotel.hu", "", "", "", @@ -32773,17 +35011,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "nhlfan.net", "", "", "", "", - "pn.it", "", "", "", "", "", - "ox.rs", "", "", "", @@ -32792,21 +35029,239 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ltd.gi", "", - "name.vn", "", "", "", "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "agro.bj", + "", + "", + "daklak.vn", + "", + "", + "lecce.it", + "", + "", + "rio.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nflfan.org", + "", + "", + "szczytno.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "crafting.xyz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ancona.it", + "", + "", + "", + "", + "", + "", + "", + "tn.it", + "", + "", + "", + "", + "", + "org.nr", + "", + "", + "", + "", + "", + "", + "stange.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "simple-url.com", + "", + "", + "", + "", + "int.ni", + "", + "", + "", + "", + "fedorainfracloud.org", + "", + "", + "", + "chirurgiens-dentistes.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "chirurgiens-dentistes-en-france.fr", + "", + "", + "", + "", + "", + "scrapper-site.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "home-webserver.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vestre-slidre.no", + "", + "press.aero", + "", + "", + "", + "", "boldlygoingnowhere.org", "", "", "", + "dev-myqnapcloud.com", "", "", "", + "fukushima.jp", "", "", "", @@ -32822,12 +35277,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "co.tz", "", "", "", "", - "co.nz", "", "", "", @@ -32837,9 +35290,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "co.cz", "", "", + "myphotos.cc", "", "", "", @@ -32847,12 +35300,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "pi.it", "", "", "", "", "", + "wphostedmail.com", "", "", "", @@ -32860,6 +35313,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "friuliv-giulia.it", "", "", "", @@ -32883,11 +35337,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "backplaneapp.io", "", "", "", - "\345\215\203\350\221\211.jp", "", "", "", @@ -32897,19 +35349,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "webhop.org", "", "", "", - "cyon.site", - "\347\273\204\347\271\224.hk", "", "", "", "", - "\347\265\204\347\271\224.hk", "", - "blogspot.cl", "", "", "", @@ -32917,7 +35364,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "e4.cz", "", "", "", @@ -32925,25 +35371,23 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "myspreadshop.es", "", "", "", "", "", - "larsson.museum", "", "", "", "", "", "", - "\346\225\231\350\202\262.hk", "", "", "", "", "", - "steiermark.museum", "", "", "", @@ -32954,10 +35398,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "versailles.museum", "", "", - "pl.ua", "", "", "", @@ -32972,18 +35414,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "\346\225\216\350\202\262.hk", "", "", "", "", "", "", - "square7.ch", "", + "sicily.it", "", "", "", + "candypop.jp", "", "", "", @@ -33000,359 +35442,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "firm.co", - "", - "", - "", - "", - "jan-mayen.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mordovia.ru", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "parma.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "oz.au", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "osoyro.no", - "", - "", - "", - "", - "", - "org.vu", - "", - "tr.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hotel.lk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "co.sz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tysv\303\246r.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bahcavuotna.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.qa", - "", - "", - "", - "", - "", - "conf.se", - "", - "", - "", - "", - "", - "", - "", - "", - "britishcolumbia.museum", - "ribeirao.br", - "", - "", - "", - "", - "", - "mosvik.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nedre-eiker.no", - "", - "", - "", - "bpl.biz", - "", - "", - "blogspot.be", - "", - "", - "", - "", - "schoenbrunn.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "couchpotatofries.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "memset.net", - "", - "", - "", - "", - "", - "", - "", - "", - "l\303\244ns.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "wroc.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "acct.pro", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ne.tz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "iwi.nz", - "", - "", - "", - "", - "pp.se", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "onred.one", "vossevangen.no", - "ovre-eiker.no", "", "", "", @@ -33362,22 +35452,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "database.museum", "", - "fl\303\245.no", "", "", "", - "firm.in", "", "", "", "", "", - "no-ip.org", "", "", "", + "pvt.ge", "", "", "", @@ -33389,6 +35476,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "webthings.io", "", "", "", @@ -33397,6 +35485,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "hoyanger.no", "", "", "", @@ -33410,6 +35499,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "r2.dev", "", "", "", @@ -33420,22 +35510,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "in-vpn.de", "", "", "", "", + "spydeberg.no", "", - "h.se", "", "", - "matera.it", "", "", "", "", "", - "recife.br", "", "", "", @@ -33447,6 +35534,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "poa.br", "", "", "", @@ -33458,17 +35546,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "stuff-4-sale.org", "", "", "", "", "", "", + "gifu.jp", "", "", "", + "blogspot.bj", "", - "biz.bb", + "org.qa", "", "", "", @@ -33478,6 +35569,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "inf.ua", "", "", "", @@ -33486,16 +35578,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "jobs.tt", "", "", "", + "szczecin.pl", "", "", - "ms.it", "", "", "", + "mo-i-rana.no", "", "", "", @@ -33503,10 +35595,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "dedyn.io", "", "", "", + "pr.us", "", "", "", @@ -33527,10 +35619,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "d\303\270nna.no", "", "", "", + "skj\303\245k.no", "", "", "", @@ -33539,6 +35631,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "tv.bo", "", "", "", @@ -33548,22 +35641,22 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "farmequipment.museum", "", "", + "tv.kg", "", "", + "mil.vc", "", - "org.ve", "", "", "", + "srht.site", "", "", "", "", "", - "gov.za", "", "", "", @@ -33580,379 +35673,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "messina.it", - "", - "hs.kr", - "", - "", - "", - "", - "", - "", - "", - "hokksund.no", - "", - "", - "", - "", - "padua.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kvinesdal.no", - "", - "", - "", - "h\303\245.no", - "koeln.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "delaware.museum", - "", - "muosat.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hitra.no", - "", - "torsken.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sinaapp.com", - "", - "", - "", - "", - "", - "deatnu.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "12hp.de", - "", - "go-vip.co", - "", - "", - "", - "", - "", - "", - "", - "", - "dynalias.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sorocaba.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "benevento.it", - "co.mz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "go.tz", - "", - "", - "chambagri.fr", - "", - "", - "", - "", - "", - "", - "", - "vagsoy.no", - "", - "", - "kherson.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "meraker.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tv.tr", - "", - "nome.pt", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nis.za", - "", - "", - "tos.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "imperia.it", - "", - "", - "", - "", - "", - "pv.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "scientist.aero", - "", - "", - "", - "", - "", - "", - "", - "in-berlin.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "medecin.km", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "", "", "", @@ -33962,11 +35682,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "idf.il", "", + "oster\303\270y.no", "", "", "", "", + "coop.py", "", "", "", @@ -33989,7 +35712,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "blogspot.cf", "", "", "", @@ -34004,8 +35726,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "verdal.no", "", + "web.nf", "", "", "", @@ -34022,15 +35744,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "dnsking.ch", "", "", + "urown.cloud", + "vladikavkaz.ru", "", "", "", "", "", "", - "urbinopesaro.it", "", "", "", @@ -34043,12 +35767,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ppg.br", "", "", "", "", "", + "crimea.ua", "", "", "", @@ -34067,7 +35791,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "pa.it", "", "", "", @@ -34077,22 +35800,23 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "mc.it", "", + "tm.ro", "", "", "", - "muenchen.museum", "", "", "", "", + "orsta.no", "", - "pro.na", + "mymediapc.net", "", - "handson.museum", "", + "orsites.com", "", + "mj\303\270ndalen.no", "", "", "", @@ -34102,32 +35826,36 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "onavstack.net", "", "", "", "", "", + "vinnytsia.ua", "", + "org.al", "", "", "", "", + "org.hn", "", + "res.aero", "", "", "", "", - "lv.ua", "", "", "", "", "", "", + "jelenia-gora.pl", "", "", "", - "london.museum", "", "", "", @@ -34135,11 +35863,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "re.kr", "", "", + "accident-prevention.aero", "", "", - "marnardal.no", "", "", "", @@ -34155,9 +35884,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "isteingeek.de", "", - "okinawa.jp", "", "", "", @@ -34168,12 +35895,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "reggiocalabria.it", - "ltd.hk", "", "", "", "", + "usercontent.jp", "", "", "", @@ -34184,19 +35910,1980 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "h\303\241mm\303\241rfeasta.no", - "budejju.no", "", - "org.kp", "", "", + "fastvps.host", + "lombardy.it", "", "", "", "", "", + "messerli.app", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "aeroclub.aero", + "", + "", + "", + "", + "", + "", + "lc.it", + "", + "", + "", + "", + "", + "", + "", + "", + "t.se", + "", + "", + "", + "", + "", + "pe.kr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hamaroy.no", + "", + "", + "", + "", + "i.ph", + "", + "", + "", + "", + "", + "io.in", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "homedns.org", + "", + "", + "", + "wmflabs.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "or.th", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cranky.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "edgecompute.app", + "", + "", + "", + "mircloud.us", + "", + "pe.ca", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gniezno.pl", + "val-daosta.it", + "", + "swiebodzin.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "maori.nz", + "", + "", + "", + "", + "", + "mircloud.host", + "", + "", + "", + "", + "", + "prd.mg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "daynight.jp", + "parti.se", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "cloudsite.builders", + "", + "", + "", + "", + "myspreadshop.pl", + "karaganda.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "forumz.info", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tysfjord.no", + "napoli.it", + "", + "", + "", + "", + "", + "", + "", + "", + "per.sg", + "", + "", + "", + "", + "", + "", + "", + "valdaosta.it", + "discourse.team", + "int.tj", + "", + "", + "", + "", + "", + "", + "agric.za", + "", + "", + "", + "", + "", + "", + "l\303\270dingen.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mil.za", + "ts.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "firenze.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "isa-geek.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "wielun.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "duckdns.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "benevento.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "isa-geek.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "chowder.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ambulance.aero", + "servequake.com", + "", + "", + "", + "", + "httpbin.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "penza.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "shizuoka.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "tr\303\270gstad.no", + "", + "", + "ciao.jp", + "", + "", + "fastly-terrarium.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "l.se", + "ta.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ooguy.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "test-iserv.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "r\303\270mskog.no", + "", + "psc.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ln.cn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lg.jp", + "", + "", + "", + "", + "", + "", + "", + "org.gl", + "", + "dnipropetrovsk.ua", + "outsystemscloud.com", + "", + "", + "", + "", + "beiarn.no", + "", + "", + "", + "", + "", + "", + "", + "plc.uk", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "davvenjarga.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "or.at", + "", + "", + "", + "saobernardo.br", + "", + "", + "res.in", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "chicappa.jp", + "", + "", + "", + "", + "", + "", + "east-kazakhstan.su", + "", + "", + "", + "", + "", + "", + "pri.ee", + "", + "", + "", + "", + "", + "f\303\270rde.no", + "", + "", + "", + "scrapping.cc", + "", + "bar0.net", + "", + "", + "", + "", + "", + "", + "myspreadshop.at", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "li.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.sh", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.bh", + "", + "", + "", + "", + "", + "", + "treviso.it", + "", + "", + "mil.nz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vegarshei.no", + "", + "laz.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "storipress.app", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "emilia-romagna.it", + "", + "", + "123website.ch", + "", + "", + "", + "", + "", + "", + "", + "ote.bj", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "aparecida.br", + "", + "", + "", + "", + "", + "", + "", + "", + "pol.tr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ras.ru", + "friuli-ve-giulia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pro.ec", + "", + "", + "", + "", + "", + "", + "", + "dyr\303\270y.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "bona.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "wodzislaw.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "casino.hu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "val-d-aosta.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sdscloud.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "health.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "monzaedellabrianza.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vestre-toten.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "diadem.cloud", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "wpdevcloud.com", + "dontexist.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "webredirect.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "eating-organic.net", + "", + "homeunix.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ri.us", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "istmein.de", + "", + "", + "", + "", + "", + "yamagata.jp", + "", + "", + "", + "", + "", + "isa-geek.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "skoczow.pl", + "", + "", + "", + "", + "org.ph", + "", + "", + "", + "", + "", + "express.aero", + "", + "", + "", + "", + "", + "dontexist.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "homeunix.net", + "", + "", + "", + "", + "", + "", + "", + "", + "filegear-gb.me", + "", + "", + "", + "", + "", + "", + "", + "ro.im", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "int.az", + "folkebibl.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "salangen.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hlx.page", + "", + "", + "", + "", + "", + "wiki.br", + "", + "", + "", + "", + "wiki.bo", + "", + "", + "", + "algard.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sirdal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "africa.com", + "", + "", + "", + "", + "", + "", + "rv.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "psi.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hobby-site.org", + "", + "", + "", + "", + "", + "", + "", + "", + "vang.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "joinville.br", + "", + "", + "filegear-jp.me", + "", + "", + "", + "", + "romskog.no", + "", + "", + "", + "", + "", + "", + "framer.wiki", + "", + "muni.il", + "", + "pleskns.com", + "", + "dynathome.net", + "", + "", + "", + "saves-the-whales.com", + "", + "", + "", + "", + "", + "", + "", + "dattoweb.com", + "", + "fr\303\246na.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "of.no", + "", + "", + "", + "", + "", + "", + "vladikavkaz.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "or.id", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pa.us", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "base.shop", + "", + "", + "", + "id.lv", + "", + "", + "", + "cloudaccess.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dojin.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pyatigorsk.ru", + "", + "", + "", + "", + "123webseite.de", + "finnoy.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "city.hu", + "", + "", + "", + "prd.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gob.mx", + "", + "", + "", + "wolomin.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ru.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tm.pl", + "", + "", + "", + "", + "", + "", + "", + "rieti.it", + "", + "", + "nagasaki.jp", + "", + "", + "", + "transporte.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trana.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "internet-dns.de", "org.ng", "", "", @@ -34221,1208 +37908,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "museumcenter.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "media.aero", - "", - "", - "", - "kalmykia.ru", - "", - "mil.no", - "", - "", - "", - "azurewebsites.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "friuliv-giulia.it", - "", - "", - "", - "", - "", - "mil.mg", - "", - "", - "sanfrancisco.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.kg", - "", - "", - "", - "kharkov.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "marche.it", - "", - "medecin.fr", - "my.id", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "better-than.tv", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bulsan-s\303\274dtirol.it", - "", - "", - "virtueeldomein.nl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "coop.br", - "", - "", - "", - "", - "", - "omasvuotna.no", - "", - "", - "", - "", - "", - "riik.ee", - "", - "name.hr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-wy.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pc.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mordovia.su", - "", - "", - "", - "", - "", - "", - "kviteseid.no", - "", - "", - "", - "southcarolina.museum", - "", - "", - "", - "", - "", - "", - "", - "nativeamerican.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dnsalias.net", - "", - "", - "", - "portal.museum", - "", - "", - "", - "schlesisches.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.ac", - "", - "", - "", - "", - "", - "", - "", - "tm.ro", - "", - "", - "", - "", - "", - "l\303\246rdal.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kyoto.jp", - "zoology.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ln.cn", - "", - "buyshouses.net", - "", - "", - "", - "", - "", - "osasco.br", - "", - "", - "", - "", - "press.ma", - "", - "", - "ngo.za", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ath.cx", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "meeres.museum", - "", - "", - "", - "", - "k12.vi", - "amli.no", - "", - "", - "withyoutube.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "chungbuk.kr", - "", - "", - "", - "", - "", - "", - "", - "tm.mc", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pol.ht", - "", - "helsinki.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "davvenjarga.no", - "", - "", - "", - "", - "hoylandet.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "servebeer.com", - "bari.it", - "", - "", - "", - "sciences.museum", - "", - "", - "", - "", - "", - "roma.it", - "valleaosta.it", - "in-vpn.net", - "", - "", - "\303\270vre-eiker.no", - "", - "", - "production.aero", - "", - "", - "", - "from-ky.com", - "", - "", - "", - "", - "", - "", - "", - "kvanangen.no", - "", - "", - "", - "", - "s\303\270r-aurdal.no", - "", - "", - "", - "", - "kaluga.su", - "", - "", - "he.cn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "selfip.org", - "", - "medical.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "badajoz.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "club.tw", - "", - "", - "", - "", - "", - "", - "", - "blogsite.xyz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lecco.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "flekkefjord.no", - "cloudfunctions.net", - "", - "", - "", - "", - "", - "on.fashion", - "", - "", - "", - "", - "", - "", - "", - "finn\303\270y.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "for-some.biz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "co.uz", - "", - "x443.pw", - "", - "", - "", - "", - "", - "hk.cn", - "", - "", - "", - "", - "veg\303\245rshei.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "porsangu.no", - "", - "", - "", - "", - "", - "", - "", - "", - "mayfirst.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hornindal.no", - "", - "mytuleap.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "karikatur.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "v\303\245gan.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.vn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "moonscale.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "perugia.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "krokstadelva.no", - "", - "pordenone.it", - "", - "", - "", - "", - "from.hr", - "", - "", - "columbus.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.ch", - "", - "", - "memorial.museum", - "", - "", - "", - "laz.it", - "", - "", - "", - "", - "dsmynas.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "stj\303\270rdalshalsen.no", - "school.museum", - "", - "", - "", - "freight.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "webredirect.org", - "", - "", - "", - "org.vi", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "byen.site", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.az", - "", - "", - "moareke.no", - "", - "", - "", - "", - "", - "", - "org.gp", - "", - "", - "", - "", - "", - "parliament.cy", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tm.se", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lur\303\270y.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "servequake.com", - "", - "", - "arna.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "writesthisblog.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "saobernardo.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ralingen.no", - "cloud.goog", - "", - "", - "", - "", - "", - "", - "", - "", - "kharkiv.ua", - "", - "", - "", - "", - "monticello.museum", - "", - "", - "", - "", - "", - "tur.ar", - "presse.ci", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "filegear-gb.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mel\303\270y.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "selfip.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "alt.za", - "", - "", - "", - "", - "", - "", - "", - "nowaruda.pl", - "", - "", - "", - "diskstation.org", - "", - "ac.tz", - "", - "design.aero", - "", - "", - "ac.nz", "", "", "", @@ -35434,15 +37919,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "drayddns.com", "", "", + "museum.tt", "", "", "", "", + "scientist.aero", "", "", "", "", - "science.museum", "", "", "", @@ -35451,9 +37937,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lesja.no", "", + "plc.ly", "", + "suldal.no", "", + "dscloud.mobi", "", "", "", @@ -35462,12 +37952,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "flight.aero", + "jogasz.hu", + "troitsk.su", "", "", "", "", - "filegear-jp.me", "", "", "", @@ -35479,6 +37969,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "matta-varjjat.no", "", "", "", @@ -35486,12 +37977,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "enna.it", "", "", "", "", - "d.bg", "", "", "", @@ -35501,20 +37990,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "isa-hockeynut.com", - "tm.km", "", "", "", "", - "hn.cn", "", - "dynalias.org", - "hammerfest.no", "", "", "", + "horten.no", "", + "krakow.pl", "", "", "", @@ -35527,12 +38013,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "lu.it", "", "", "", "", - "lo.it", "", "", "", @@ -35542,21 +38026,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "prvcy.page", "", "", "", "", "", "", + "dynvpn.de", "", "", "", "", + "gitlab.io", "", - "masoy.no", "", - "intl.tn", "", "", "", @@ -35572,18 +38055,72 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "siljan.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ascoli-piceno.it", + "", + "", + "", + "", + "monza-e-della-brianza.it", + "", + "", + "", + "", "maintenance.aero", "", - "likescandy.com", "", "", - "hi.cn", "", "", "", "", "", - "modalen.no", "", "", "", @@ -35591,13 +38128,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "my-gateway.de", "", "", + "fbxos.fr", "", "", + "pvh.br", "", - "eisenbahn.museum", - "tm.fr", "", "", "", @@ -35613,6 +38151,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "homeunix.org", + "gliwice.pl", "", "", "", @@ -35620,14 +38160,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "lucca.it", "", "", "", "", "", "", - "pescara.it", "", "", "", @@ -35641,8 +38179,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ltd.ng", "", - "pg.it", "", "", "", @@ -35662,9 +38200,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "kilo.jp", "", "", "", + "mangyshlak.su", "", "", "", @@ -35675,31 +38215,29 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "museumvereniging.museum", - "\345\244\247\345\210\206.jp", "", "", "", "", - "\347\206\212\346\234\254.jp", "", "", - "cyon.link", "", "", + "toscana.it", "", "", "", "", - "kalmykia.su", + "camdvr.org", "", "", + "hokkaido.jp", "", "", + "pro.ht", "", "", "", - "dnsalias.com", "", "", "", @@ -35718,7 +38256,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "dyndns.org", "", "", "", @@ -35727,23 +38264,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ac.sz", "", "", "", "", "", - "mil.ec", "", "", "", "", "", + "muosat.no", "", - "geology.museum", "", "", - "scienceandindustry.museum", "", "", "", @@ -35752,8 +38286,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "le.it", - "miners.museum", "", "", "", @@ -35761,1514 +38293,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "maryland.museum", "", "", "", "", "", - "", - "", - "", - "", - "", - "mi.th", - "", - "", - "", - "", - "o.bg", - "", - "", - "", - "hzc.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "firm.ht", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pistoia.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lt.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lg.ua", - "", - "kv\303\246fjord.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "emilia-romagna.it", - "", - "", - "", - "dsmynas.net", - "", - "", - "railway.museum", - "", - "", - "l\303\270dingen.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "democracia.bo", - "", - "mydatto.com", - "myftp.biz", - "", - "", - "", - "", - "", - "", - "", - "", - "s\303\270ndre-land.no", - "", - "", - "", - "", - "", - "", - "", - "biz.zm", - "", - "", - "", - "tn.us", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "interactive.museum", - "", - "", - "", - "", - "trento.it", - "", - "", - "pp.ua", - "", - "game.tw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "luroy.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dnsiskinky.com", - "jaworzno.pl", - "", - "", - "", - "", - "hemsedal.no", - "", - "", - "dynserv.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "boxfuse.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "elasticbeanstalk.com", - "", - "", - "dreamhosters.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "val-daosta.it", - "", - "pantheonsite.io", - "", - "molise.it", - "", - "", - "space-to-rent.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.cl", - "", - "", - "", - "", - "", - "meloy.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "elblag.pl", - "", - "", - "", - "christiansburg.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mysecuritycamera.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "from-ny.net", - "", - "", - "nuernberg.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "copenhagen.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mypep.link", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "indianmarket.museum", - "", - "", - "", - "suwalki.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "sopot.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "discovery.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "money.museum", - "", - "", - "", - "mil.ni", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lindesnes.no", - "", - "", - "taa.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bronnoy.no", - "", - "", - "", - "", - "", - "", - "", - "dynamisches-dns.de", - "", - "", - "", - "", - "", - "homelink.one", - "", - "", - "", - "", - "", - "", - "", - "name.pr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "odesa.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bulsan-sudtirol.it", - "", - "ac.mz", - "", - "", - "", - "", - "botanicalgarden.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lenvik.no", - "jewelry.museum", - "", - "", - "", - "", - "crafting.xyz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "noho.st", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "browsersafetymark.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "veterinaire.km", - "", - "", - "", - "", - "", - "homelinux.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tv.br", - "mil.pl", - "", - "", - "12hp.ch", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "shop.ro", - "", - "", - "", - "", - "", - "name.tj", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogdns.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tsk.tr", - "li.it", - "", - "", - "", - "", - "", - "", - "mydobiss.com", - "", - "", - "", - "", - "", - "", - "newmexico.museum", - "", - "", - "", - "", - "", - "", - "lombardy.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "voagat.no", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.hn", - "", - "", - "nore-og-uvdal.no", - "", - "", - "", - "", - "mantova.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "balsan-s\303\274dtirol.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vinnytsia.ua", - "tm.mg", - "", - "", - "", - "", - "\345\200\213\344\272\272.hk", - "", - "", - "", - "", - "", - "", - "", - "in-dsl.de", - "quicksytes.com", - "", - "", - "", - "", - "", - "lancashire.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "orskog.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mydatto.net", - "bulsan-suedtirol.it", - "", - "", - "\345\214\227\346\265\267\351\201\223.jp", - "groks-this.info", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "malselv.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\347\265\204\347\271\224.tw", - "massacarrara.it", - "", - "", - "", - "", - "", - "", - "verona.it", - "", - "", - "", - "", - "", - "", - "", - "tm.za", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "beep.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "azerbaijan.su", - "", - "", - "", - "", - "", - "name.jo", - "", - "", - "ha.cn", - "", - "", - "", - "", - "", - "", - "", - "hjelmeland.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kochi.jp", - "", - "", - "", - "", - "zagan.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "firm.nf", - "", - "", - "", - "myiphost.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vanylven.no", - "olsztyn.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "barsy.shop", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "groks-the.info", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sumy.ua", - "", - "", - "", - "indigena.bo", - "", - "", - "drud.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lecce.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tc.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sand\303\270y.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "village.museum", - "", - "tonsberg.no", - "freebox-os.com", - "", - "", - "", - "", - "", - "", - "", - "", - "is-slick.com", - "", - "", - "", - "", - "", - "", - "", - "", - "org.tj", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "konskowola.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "porsanger.no", - "", - "", - "", - "", - "", - "", - "tv.bo", - "", - "", - "", - "", - "", - "", - "", - "mydrobo.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kinghost.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.ba", - "", - "", - "blogspot.cz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "opole.pl", - "", - "", - "", - "terni.it", + "ro.it", "", "", "", @@ -37283,12 +38313,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "rm.it", "", "", "", "", "", "", + "ibestad.no", "", "", "", @@ -37296,16 +38328,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "bitter.jp", "", - "poltava.ua", "", "", "", "", - "harstad.no", "", "", "", + "demon.nl", "", "", "", @@ -37313,26 +38345,26 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "nara.jp", "", "", "", "", - "strand.no", "", "", "", "", - "i234.me", "", - "cloudaccess.host", "", "", "", + "framer.media", "", "", "", "", "", + "fastvps-server.com", "", "", "", @@ -37348,12 +38380,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "kirkenes.no", "", "", + "pro.om", "", "", - "dvrdns.org", "", "", "", @@ -37362,9 +38393,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pu.it", "", "", "", + "fortal.br", + "po.it", "", "", "", @@ -37374,23 +38408,24 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pd.it", "", "", - "trentinos\303\274dtirol.it", "", "", "", "", "", - "of.work", "", "", "", - "trentinosud-tirol.it", "", "", + "firewall-gateway.com", "", + "re.it", "", + "apigee.io", "", "", "", @@ -37408,6 +38443,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "hosting-cluster.nl", + "museum.mv", "", "", "", @@ -37416,6 +38453,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "half.host", "", "", "", @@ -37434,6 +38472,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "bozen-suedtirol.it", "", "", "", @@ -37447,29 +38486,30 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "int.ve", "", "", "", "", + "vall\303\251edaoste.it", "", "", "", - "halsa.no", "", "", "", "", "", - "lg.jp", "", "", "", - "dnsalias.org", + "milan.it", "", "", "", "", "", + "wpenginepowered.com", "", "", "", @@ -37483,13 +38523,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pe.it", "", - "futuremailing.at", "", "", "", "", "", + "firewall-gateway.de", "", "", "", @@ -37510,33 +38551,35 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "dyn-o-saur.com", "", "", - "macerata.it", "", "", + "is-saved.org", "", + "mediatech.by", "", "", "", - "store.bb", "", + "tokke.no", "", "", "", "", "", + "pt.it", "", - "v.bg", "", "", "", "", "", "", + "firewall-gateway.net", "", "", + "komvux.se", "", "", "", @@ -37561,8 +38604,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "barrell-of-knowledge.info", + "babymilk.jp", "", - "mediocampidano.it", "", "", "", @@ -37574,7 +38618,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "homelinux.com", "", "", "", @@ -37594,16 +38637,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "shop.pl", "", "", - "trentinosuedtirol.it", "", - "museet.museum", "", - "pub.sa", - "myftp.org", "", "", + "org.ni", "", "", "", @@ -37612,24 +38653,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "assassination.museum", "", "", "", "", "", "", - "mb.ca", "", "", "", - "vestv\303\245g\303\270y.no", - "discourse.group", "", - "wmflabs.org", "", "", - "frei.no", "", "", "", @@ -37643,9 +38678,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "plants.museum", "", "", + "backdrop.jp", "", "", "", @@ -37653,8 +38688,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "indie.porn", "", - "mjondalen.no", "", "", "", @@ -37668,12 +38703,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "sciencesnaturelles.museum", - "museum.no", "", "", + "fjaler.no", "", - "monzaedellabrianza.it", "", "", "", @@ -37692,23 +38725,23 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "org.dz", "", "", "", "", "", "", + "ip6.arpa", "", "", "", "", "", "", - "ditchyourip.com", "", "", "", + "deno.dev", "", "", "", @@ -37716,19 +38749,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "in-vpn.org", - "ostrowiec.pl", "", "", "", "", "", - "oppeg\303\245rd.no", "", "", + "microlight.aero", "", - "za.bz", "", + "museum.om", "", "", "", @@ -37743,6 +38774,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "r\303\270st.no", "", "", "", @@ -37754,12 +38786,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "firm.ve", "", "", "", "", "", + "sigdal.no", "", "", "", @@ -37781,9 +38813,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "halfmoon.jp", "", "", - "tas.au", "", "", "", @@ -37791,9 +38823,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "it.ao", + "haiphong.vn", "", "", "", + "hemnes.no", "", "", "", @@ -37801,16 +38836,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "mallorca.museum", - "tv.na", "", "", + "tempurl.host", "", "", "", "", "", "", + "dnsupdate.info", "", "", "", @@ -37828,6 +38863,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pr.it", "", "", "", @@ -37837,7 +38873,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "saskatchewan.museum", "", "", "", @@ -37853,18 +38888,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "onrender.com", "", "", "", "", "", + "holmestrand.no", "", "", "", "", "", - "asso.fr", "", "", "", @@ -37875,6 +38909,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "per.la", "", "", "", @@ -37882,18 +38917,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "github.io", "", "", "", + "press.cy", "", "", "", - "szczytno.pl", - "stjohn.museum", "", "", "", "", + "org.bj", "", "", "", @@ -37926,17 +38962,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "try-snowplow.com", "", "", "", "", "", + "arvo.network", + "i.bg", "", - "tcm.museum", "", "", "", - "shiga.jp", "", "", "", @@ -37956,26 +38993,21 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "32-b.it", "", "", "", "", "", "", - "fedorainfracloud.org", "", "", - "academy.museum", "", - "other.nf", "", "", "", "", "", "", - "mypsx.net", "", "", "", @@ -37986,15 +39018,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "16-b.it", "", "", "", - "malvik.no", - "in-dsl.net", "", "", - "trana.no", "", "", "", @@ -38006,864 +39034,28 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "org.gh", "", "", - "the.br", - "lincoln.museum", "", "", "", - "", - "tr\303\270gstad.no", - "", - "", - "", - "", - "", - "", - "", - "te.ua", - "", - "myeffect.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "64-b.it", - "", - "mil.sh", - "", - "", - "", - "", - "", - "", - "pro.vn", - "", - "", - "vipsinaapp.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tm.pl", - "", - "", - "", - "larvik.no", - "", - "trentino.it", - "", - "missile.museum", - "", - "", - "", - "mil.eg", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentinosudtirol.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "rana.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "oygarden.no", - "", - "", - "", - "", - "", - "", - "", - "", - "dscloud.mobi", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "asso.ht", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sibenik.museum", - "", - "", - "", - "", - "", - "", - "", - "lc.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "modena.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pro.az", - "", - "", - "", - "", - "", - "for-the.biz", - "", - "marburg.museum", - "", - "hol.no", - "", - "", - "", - "", - "", - "", - "", - "kozow.com", - "", - "", - "", - "", - "", - "", - "is-found.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "t.se", - "", - "", - "", - "", - "", - "", - "", - "house.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "virtual-user.de", - "", - "", - "", - "freebox-os.fr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "melhus.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kepno.pl", - "", - "", - "catanzaro.it", - "", - "", - "", - "", - "", - "", - "org.vc", - "", - "", - "", - "", - "", - "", - "", - "", - "trentinostirol.it", - "", - "", - "", - "tv.sd", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vb.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trogstad.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ostre-toten.no", - "", - "", - "simple-url.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.al", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "!www.ck", - "saogonca.br", - "", - "", - "", - "krasnodar.su", - "", - "", - "bydgoszcz.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dattoweb.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bozen-s\303\274dtirol.it", - "", - "balsan-sudtirol.it", - "alta.no", - "", - "", - "", - "", - "", - "pz.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "zhytomyr.ua", - "", - "", - "here-for-more.info", - "", - "", - "loppa.no", - "asso.re", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.ph", - "", - "deporte.bo", - "", - "", - "", - "", - "", - "", - "", - "szkola.pl", - "", - "", - "", - "osen.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kopervik.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ringebu.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "oyer.no", - "", - "", - "", - "", - "", - "", - "lillesand.no", - "", - "", - "", - "hof.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "exhibition.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vaporcloud.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "arte.bo", - "", - "", - "", - "", - "", - "", - "", - "joinville.br", - "", - "", - "", - "", - "", - "", - "", - "trentino-suedtirol.it", - "trentino-sued-tirol.it", - "", - "", - "", - "trentino-s\303\274dtirol.it", - "trentino-s\303\274d-tirol.it", - "", - "movimiento.bo", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "k.bg", - "", + "memset.net", "", "", + "l\303\246rdal.no", "", "", + "haugesund.no", + "mypi.co", "", "", + "ownip.net", "", + "perso.ht", "", "", + "mosvik.no", "", "", "", @@ -38912,6 +39104,23 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lecco.it", + "", + "", + "", + "", + "rep.br", + "", + "", + "", + "", + "rennesoy.no", + "", + "", + "", + "", + "", + "sevastopol.ua", "", "", "", @@ -38931,10 +39140,75 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "balsan-suedtirol.it", - "industria.bo", + "\347\245\236\345\245\210\345\267\235.jp", "", - "h\303\270nefoss.no", + "", + "", + "", + "", + "", + "", + "nyanta.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "engine.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "swidnica.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "9guacu.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pv.it", "", "", "", @@ -38967,6 +39241,1008 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tv.bb", + "", + "", + "glogow.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dnsdojo.org", + "lomza.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "romsa.no", + "", + "hotelwithflight.com", + "", + "", + "", + "", + "", + "chillout.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.na", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kazimierz-dolny.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hippy.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "impertrix.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "impertrixcdn.com", + "", + "", + "i234.me", + "", + "", + "", + "", + "", + "", + "", + "", + "dontexist.org", + "", + "rn.it", + "", + "", + "", + "", + "", + "org.sz", + "", + "", + "", + "", + "org.mz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.bz", + "", + "", + "", + "rs.ba", + "org.kz", + "", + "", + "crew.aero", + "homesecuritymac.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.uz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "drobak.no", + "", + "", + "", + "", + "", + "ngrok-free.app", + "", + "", + "", + "", + "", + "", + "pn.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hasvik.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "s\303\270r-aurdal.no", + "", + "", + "", + "", + "", + "mods.jp", + "", + "", + "", + "", + "trentino-suedtirol.it", + "", + "", + "", + "", + "trentino-s\303\274dtirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rc.it", + "", + "", + "", + "", + "warmia.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trentino.it", + "hurdal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mail.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.tj", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pc.it", + "r\303\245de.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "medicina.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "revista.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dvrcam.info", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "handcrafted.jp", + "", + "incheon.kr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "r\303\246lingen.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dyndns.tv", + "org.iq", + "", + "", + "", + "mincom.tn", + "", + "", + "", + "", + "", + "", + "", + "reg.dk", + "", + "", + "", + "", + "", + "", + "meteorapp.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "r\303\241hkker\303\241vju.no", + "pl.ua", + "sandoy.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.zm", + "", + "", + "", + "s\303\270mna.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.zw", + "", + "mragowo.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fantasyleague.cc", + "", + "dscloud.biz", + "iwate.jp", + "", + "", + "", + "", + "", + "", + "", + "mel\303\270y.no", + "", + "", + "", + "", + "", + "", + "", + "ivgu.no", + "", + "", + "", + "", + "", + "togliatti.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trentinostirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trentino-sued-tirol.it", + "", + "", + "", + "", + "trentino-s\303\274d-tirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "selfip.org", + "", + "", + "", + "", + "", + "", + "", + "tp.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rg.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "friuli-v-giulia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mutual.ar", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "schoolbus.jp", + "", + "", + "", + "", + "", + "", + "", + "egoism.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pg.it", + "", + "", + "solund.no", + "", + "", + "", + "", + "", + "", + "hvaler.no", + "", + "", + "ditchyourip.com", + "", + "", + "", + "", + "", + "", "l\303\241hppi.no", "", "", @@ -38977,22 +40253,27 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pp.se", + "marche.it", + "lillesand.no", "", + "r.se", "", "", "", "", "", - "tel.tr", "", "", "", "", "", "", + "githubusercontent.com", "", "", "", + "h\303\246gebostad.no", "", "", "", @@ -39007,10 +40288,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "pilot.aero", "", "", "", + "padua.it", "", "", "", @@ -39018,19 +40299,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "plo.ps", + "dattolocal.net", "", "", + "tj.cn", "", "", "", "", - "rawa-maz.pl", "", "", "", "", "", - "resindevice.io", "", "", "", @@ -39038,6 +40320,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "kagoshima.jp", + "flop.jp", "", "", "", @@ -39045,6 +40329,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "babia-gora.pl", "", "", "", @@ -39052,29 +40337,35 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "diskussionsbereich.de", "", "", "", + "hashbang.sh", "", "", "", - "mil.gh", "", "", "", "", "", + "viterbo.it", + "saogonca.br", "", "", "", "", "", + "p.se", "", "", + "afjord.no", "", "", "", - "of.fashion", + "org.kp", + "likescandy.com", "", "", "", @@ -39083,12 +40374,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "appchizi.com", "", "", "", "", "", + "lakas.hu", "", "", "", @@ -39097,10 +40388,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "applinzi.com", "", + "thruhere.net", "", "", "", + "molise.it", "", "", "", @@ -39110,24 +40404,22 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "org.pf", "", "", "", "", "", "", - "trentino-sudtirol.it", + "tksat.bo", "", "", "", "", - "miniserver.com", "", "", "", "", - "trentins\303\274d-tirol.it", - "temp-dns.com", "", "", "", @@ -39137,7 +40429,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "oppegard.no", "", "", "", @@ -39147,6 +40438,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-slick.com", "", "", "", @@ -39162,7 +40454,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "pharmacy.museum", "", "", "", @@ -39172,18 +40463,21 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "hl.cn", "", "", "", + "folionetwork.site", "", + "veterinaire.km", "", "", "", "", + "pp.ru", "", "", "", + "oristano.it", "", "", "", @@ -39209,6 +40503,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ri.it", "", "", "", @@ -39222,7 +40517,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "bozen-sudtirol.it", "", "", "", @@ -39232,15 +40526,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "tt.im", "", "", "", - "doomdns.org", "", "", "", - "khmelnitskiy.ua", "", "", "", @@ -39257,7 +40548,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "lyngdal.no", "", "", "", @@ -39266,6 +40556,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "int.vn", "", "", "", @@ -39284,7 +40575,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "l\303\270ten.no", "", "", "", @@ -39313,18 +40603,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pi.it", "", "", "", "", - "m\303\241latvuopmi.no", "", "", "", "", "", "", - "musica.ar", "", "", "", @@ -39361,6 +40650,292 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "fedorapeople.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kaszuby.pl", + "", + "s\303\270gne.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mond.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tourism.tn", + "elblag.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "framer.photos", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ilovecollege.info", + "malopolska.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "static.land", + "org.az", + "", + "", + "", + "", + "malbork.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "venice.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mobi.tt", + "", + "", + "", + "ra.it", + "vega.no", + "", + "", + "", + "\347\247\213\347\224\260.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "massa-carrara.it", + "", + "", + "", + "", + "mycd.eu", + "", + "\330\247\331\212\330\261\330\247\331\206.ir", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ppg.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "dyn-vpn.de", "", "", @@ -39379,14 +40954,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pol.ht", "", "", "", - "military.museum", "", "", "", + "rl.no", "", + "pa.it", "", "", "", @@ -39402,25 +40979,29 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "rel.ht", "", "", "", "", - "kongsvinger.no", + "internet.in", "", "", "", "", + "writesthisblog.com", "", "", "", "", "", + "oz.au", "", "", "", "", "", + "tychy.pl", "", "", "", @@ -39428,6 +41009,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "perso.tn", "", "", "", @@ -39439,18 +41021,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "school.nz", "", "", "", "", "", "", + "andria-barletta-trani.it", "", - "tw.cn", "", "", - "mydissent.net", "", "", "", @@ -39474,7 +41054,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "free.hr", "", "", "", @@ -39491,10 +41070,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "drud.us", "", "", - "music.museum", "", "", "", @@ -39519,2356 +41096,55 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "parma.it", "", "", "", "", "", "", + "tv.in", "", "", "", "", + "deta.dev", "", "", "", "", - "\346\204\233\345\252\233.jp", + "lcube-server.de", + "discourse.group", "", "", "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\346\204\233\347\237\245.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\346\262\226\347\270\204.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "landes.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "evje-og-hornnes.no", - "", - "", - "", - "", - "tmp.br", - "", - "", - "", - "", - "h\303\244kkinen.fi", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentino-stirol.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "uslivinghistory.museum", - "", - "lakas.hu", - "", - "karate.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "boleslawiec.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "loten.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentins\303\274dtirol.it", - "", - "", - "", - "", - "", - "", - "", - "krakow.pl", - "", - "", - "", - "fastly-terrarium.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "historichouses.museum", - "", - "", - "masfjorden.no", - "", - "", - "", - "lunner.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "depot.museum", - "", - "asso.mc", - "", - "", - "", - "", - "", - "", - "dev-myqnapcloud.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "opencraft.hosting", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "homesecuritymac.com", - "", - "", - "", - "", - "porsgrunn.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mx.na", - "", - "", - "", - "", - "", - "1337.pictures", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lomza.pl", - "", - "", - "", - "plaza.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hyllestad.no", - "", - "", - "asso.km", - "", - "", - "", - "", - "", - "", - "", - "", - "gateway.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "homeftp.net", - "media.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "khplay.nl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "at-band-camp.net", - "health.vn", - "", - "", - "", - "", - "", - "", - "komvux.se", - "org.nz", - "per.nf", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "shimane.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cieszyn.pl", - "", - "", - "mil.mz", - "", - "", - "", - "", - "", - "name.eg", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.kz", - "", - "", - "", - "", - "", - "", - "", - "", - "feste-ip.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "poivron.org", - "", - "", - "", - "", - "", - "", - "", - "mywire.org", - "", - "", - "", - "", - "", - "", - "spjelkavik.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "intelligence.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "starachowice.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "police.uk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nordre-land.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "asso.ci", - "", - "", - "svizzera.museum", - "", - "", - "", - "point2this.com", - "", - "", - "", - "", - "", - "", - "", - "collegefan.org", - "", - "", - "", - "", - "", - "", - "blogspot.bg", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vladikavkaz.ru", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tashkent.su", - "", - "", - "hasvik.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kr\303\245anghke.no", - "", - "\347\273\204\347\273\207.hk", - "", - "", - "", - "", - "\347\265\204\347\273\207.hk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "paris.museum", - "", - "", - "gliwice.pl", - "", - "", - "", - "", - "bjarkoy.no", - "monzaebrianza.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "voss.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "auto.pl", - "", - "", - "", - "forl\303\254-cesena.it", - "", - "", - "", - "", - "my-vigor.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "demon.nl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ltd.ng", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "manchester.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-gone.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "scrapper-site.net", - "", - "horten.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "myfritz.net", - "", - "", - "", - "", - "", - "", - "", - "", - "hellas.museum", - "", - "", - "", - "", - "githubusercontent.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "asso.nc", - "", - "", - "", - "", - "", - "", - "oystre-slidre.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "maori.nz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "meteorapp.com", - "", - "", - "", - "", - "", - "", - "my-router.de", - "", - "", - "", - "", - "", - "", - "ivgu.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "name.ng", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ingatlan.hu", - "", - "in-dsl.org", - "", - "", - "", - "", - "", - "static-access.net", - "", - "", - "", - "", - "", - "", - "luster.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "worse-than.tv", - "", - "", - "", - "", - "", - "", - "tv.im", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "test-iserv.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mandal.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dinosaur.museum", - "", - "reggio-emilia.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kv\303\246nangen.no", - "", - "news.hu", - "", - "", - "", - "planetarium.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gob.mx", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "moskenes.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "onthewifi.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "krasnik.pl", - "", - "", - "", - "mil.ve", - "", - "", - "", - "", "dattorelay.com", "", "", "", - "mypets.ws", "", + "mywire.org", "", "", + "pointto.us", "", "", "", - "gouv.bj", "", "", "", + "cloudaccess.host", "", "", - "online.museum", "", - "b-data.io", "", "", "", "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "chesapeakebay.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tokke.no", - "", - "", - "herokussl.com", - "", - "to.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kyiv.ua", - "", - "", - "", - "", - "knx-server.net", - "osaka.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "shop.ht", - "", - "", - "gallery.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "uber.space", - "", - "ebiz.tw", - "", - "", - "lindas.no", - "", - "", - "", - "", - "", - "", - "", - "", - "public.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dontexist.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "filatelia.museum", - "", - "", - "", - "", - "", - "", - "mein-vigor.de", - "trentinsued-tirol.it", - "", - "kiev.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "olkusz.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "valdaosta.it", - "", - "", - "", - "", - "", - "vall\303\251eaoste.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "te.it", - "", - "", - "", - "", - "", - "", - "hemnes.no", - "", - "", - "", - "health.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "loyalist.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "xenapponazure.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "conf.au", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "knowsitall.info", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pavia.it", - "shop.th", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mymediapc.net", - "", - "barsy.club", - "", - "", - "dscloud.biz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "isleofman.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "watch-and-clock.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "oregontrail.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "aeroclub.aero", - "", - "", - "", - "", - "", - "ddns.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "swidnica.pl", - "", - "", - "", - "", - "mielec.pl", - "", - "", - "", - "krym.ua", - "", - "", - "", - "", - "", - "trentinsudtirol.it", - "", - "", - "", - "", - "tromso.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "veterinaire.fr", - "", - "vladikavkaz.su", - "", - "", - "", - "", - "", - "", - "", - "is-saved.org", - "", - "", - "mil.ng", "leadpages.co", "", "", @@ -41879,9 +41155,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "shizuoka.jp", "", - "hk.org", "", "", "", @@ -41892,781 +41166,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "tr.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sch.zm", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-student.com", - "", - "", - "", - "", - "hamar.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "miasta.pl", - "", - "", - "", - "", - "pimienta.org", - "", - "", - "", - "is-uberleet.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trust.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "liguria.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "defense.tn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mie.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "leksvik.no", - "", - "", - "", - "", - "", - "", - "", - "lib.ee", - "", - "", - "sciencecenter.museum", - "sciencecenters.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentinsuedtirol.it", - "", - "okayama.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dynathome.net", - "", - "", - "", - "", - "", - "", - "", - "karasjok.no", - "", - "", - "", - "", - "", - "wiki.br", - "", - "", - "", - "", - "wiki.bo", - "", - "", - "", - "", - "", - "", - "", - "govt.nz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "plantation.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "linkyard.cloud", - "", - "", - "", - "", - "", - "krodsherad.no", - "", - "", - "", - "", - "", - "", - "", - "", - "haram.no", - "tn.it", - "", - "", - "", - "", - "", - "m.bg", - "", - "", - "", - "", - "", - "", - "", - "", - "dellogliastra.it", - "", - "trentino-aadige.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "myphotos.cc", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.tz", - "", - "", - "", - "", - "photography.museum", - "marker.no", - "vpndns.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "loabat.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bronnoysund.no", - "", - "", - "", - "", - "", - "sweetpepper.org", - "", - "", - "", - "", - "", - "2038.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "viking.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tysfjord.no", - "", - "pgafan.net", - "org.sb", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.bb", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "firenze.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "web.za", - "", - "scienceandhistory.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "azure-mobile.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dontexist.com", - "", - "", - "", - "", - "", - "fusa.no", - "", - "", - "", - "", - "", - "", - "", - "homelinux.org", - "", - "", - "", - "", - "", - "", - "guam.gu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "friuli-veneziagiulia.it", - "friuli-venezia-giulia.it", - "", - "", - "", - "fbxos.fr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "p.bg", - "", - "", - "hamburg.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dnsking.ch", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dominic.ua", - "", "", "", "", @@ -42680,6 +41179,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "qualifioapp.com", "", "", "", @@ -42718,1593 +41218,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "liguria.it", "", - "meldal.no", - "mragowo.pl", "", "", "", "", "", "", - "vestre-slidre.no", - "", - "", - "", - "swiebodzin.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ftpaccess.cc", - "", - "", - "", - "", - "", - "", - "", - "transporte.bo", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trustee.museum", - "", - "", - "", - "", - "", - "", - "", - "lerdal.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.az", - "", - "", - "", - "", - "", - "", - "", - "linkitools.space", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lucerne.museum", - "", - "", - "", - "", - "", - "maritimo.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "honefoss.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lea\305\213gaviika.no", - "", - "", - "", - "", - "", - "", - "", - "", - "mielno.pl", - "", - "yamagata.jp", - "", - "", - "", - "", - "", - "sc.tz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "microlight.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "friuli-vgiulia.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "legnica.pl", - "", - "", - "palermo.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "arts.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lillehammer.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ddns.net", - "", - "", - "", - "", - "", - "", - "", - "servehumour.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "maritime.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "h\303\270ylandet.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mb.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lab.ms", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dattolocal.net", - "", - "", - "", - "", - "", - "", - "", - "paleo.museum", - "", - "", - "", - "", - "", - "fedorapeople.org", - "", - "", - "", - "", - "tranby.no", - "", - "mex.com", - "", - "", - "", - "media.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\344\275\220\350\263\200.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "marine.ru", - "", - "", - "", - "", - "", - "", - "", - "", - "\346\273\213\350\263\200.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "homebuilt.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "vall\303\251edaoste.it", - "", - "", - "", - "hopto.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "museum.mw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vagan.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ts.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tv.it", - "", - "", - "", - "", - "", - "kids.us", - "", - "", - "", - "", - "", - "", - "", - "org.lb", - "", - "", - "", - "", - "", - "kicks-ass.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cable-modem.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "treviso.it", - "", - "", - "", - "", - "", - "mykolaiv.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "inderoy.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lpusercontent.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hurdal.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ta.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "valleedaoste.it", - "", - "leangaviika.no", - "", - "", - "", - "", - "", - "", - "", - "", - "luzern.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lewismiller.museum", - "", - "hicam.net", - "", - "", - "", - "", - "", - "", - "", - "", - "fuel.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "v-info.info", - "", - "", - "", - "", - "", - "", - "minnesota.museum", - "", - "horology.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "jelenia-gora.pl", - "", - "trentino-altoadige.it", - "trentino-alto-adige.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "travel.tt", - "", - "", - "", - "", - "", - "dnsdojo.org", - "", - "", - "", - "", - "", - "huissier-justice.fr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "alpha-myqnapcloud.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "historisch.museum", - "", - "palmsprings.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bozen-suedtirol.it", - "", - "", - "", - "", - "", - "", - "", - "", - "hareid.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bounty-full.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "official.academy", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "panama.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "club.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "online.th", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dd-dns.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "historisches.museum", - "nagano.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "leitungsen.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vegarshei.no", - "", - "", - "", - "", - "", - "", - "", - "", - "pilots.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hembygdsforbund.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tromsa.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "spydeberg.no", - "", - "", - "", - "", - "", - "", - "viterbo.it", - "", - "", - "", - "", - "", - "coal.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "halloffame.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hvaler.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "paderborn.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blogspot.jp", - "", - "", - "", - "", - "", - "", - "piacenza.it", - "", - "", - "", "", "", "", @@ -44316,6 +41237,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "slupsk.pl", "", "", "", @@ -44347,7 +41269,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "lutsk.ua", "", "", "", @@ -44356,7 +41277,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "v\303\246r\303\270y.no", "", "", "", @@ -44364,17 +41284,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "org.af", "", "", "", "", "", "", + "lea\305\213gaviika.no", "", "", "", "", "", + "desa.id", "", "", "", @@ -44385,6 +41308,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lelux.site", "", "", "", @@ -44423,9 +41347,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "lukow.pl", "", "", + "vestby.no", "", "", "", @@ -44434,12 +41358,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "aomori.jp", "", "", "", "", "", - "londrina.br", "", "", "", @@ -44448,10 +41372,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "b-data.io", "", "", "", "", + "trentinos\303\274d-tirol.it", "", "", "", @@ -44464,9 +41390,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "nesseby.no", "", "", + "dyndns.ws", "", "", "", @@ -44514,17 +41440,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "leirvik.no", "", "", "", "", - "caxias.br", "", "", "", "", - "express.aero", "", "", "", @@ -44541,6 +41464,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "o.bg", "", "", "", @@ -44549,9 +41473,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "leirfjord.no", "", + "vestv\303\245g\303\270y.no", "", + "tgory.pl", + "dsmynas.org", "", "", "", @@ -44559,8 +41485,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "myshopblocks.com", "", "", + "senasa.ar", "", "", "", @@ -44574,15 +41502,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "trentinosued-tirol.it", + "starachowice.pl", "", - "malbork.pl", "", "", + "osen.no", "", "", "", "", "", + "discordsez.com", "", "", "", @@ -44590,7 +41521,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "indianapolis.museum", "", "", "", @@ -44623,18 +41553,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "transport.museum", "", "", + "selfip.net", "", "", "", - "zgorzelec.pl", "", "", "", "", - "agro.pl", "", "", "", @@ -44653,6 +41581,384 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "vall\303\251e-d-aoste.it", + "", + "", + "vads\303\270.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sweetpepper.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "oyer.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tselinograd.su", + "", + "", + "", + "", + "selfip.com", + "", + "", + "yamaguchi.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vxl.sh", + "", + "", + "", + "", + "", + "", + "vipsinaapp.com", + "", + "", + "", + "", + "", + "", + "", + "", + "dellogliastra.it", + "", + "", + "", + "org.vu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "luroy.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "porsangu.no", + "", + "", + "", + "", + "", + "dyndns-remote.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "perso.sn", + "", + "", + "", + "", + "okinawa.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sondre-land.no", + "", + "", + "appudo.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tjome.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tiengiang.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "veneto.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "film.hu", + "yandexcloud.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.ve", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "", "", "", @@ -44676,26 +41982,28 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "stackhero-network.com", "", "", "", "", + "lib.ee", "", "", "", "", "", + "servehumour.com", "", + "vard\303\270.no", "", "", "", "", "", + "trentinosudtirol.it", "", "", "", - "friuli-v-giulia.it", "", "", "", @@ -44703,11 +42011,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "iglesias-carbonia.it", - "laquila.it", "", "", "", + "cuiaba.br", "", "", "", @@ -44715,10 +42022,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "bern.museum", "", "", - "kvam.no", "", "", "", @@ -44727,8 +42032,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "iglesiascarbonia.it", "", - "valle-aosta.it", "", "", "", @@ -44736,9 +42041,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "vagan.no", "", "", - "pol.dz", "", "", "", @@ -44748,6 +42053,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pescara.it", "", "", "", @@ -44756,13 +42062,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "agro.bo", "", "", "", "", "", - "bolzano-altoadige.it", "", "", "", @@ -44770,6 +42074,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "rel.pl", "", "", "", @@ -44810,1039 +42115,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "tourism.tn", "", "", "", "", - "zaporizhzhe.ua", - "", - "", - "", - "lardal.no", - "", - "", - "", - "mil.tj", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bonn.museum", - "", - "", - "", - "", - "", - "", - "", - "tranoy.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "poznan.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.zm", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "priv.no", - "", - "", - "", - "", - "", - "", - "", - "phoenix.museum", - "troitsk.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dynu.net", - "", - "", - "", - "org.zw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-doctor.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "uvic.museum", - "", - "dell-ogliastra.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "myddns.rocks", - "", - "", - "iamallama.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pgfog.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "blog.br", - "", - "", - "", - "", - "blog.bo", - "midatlantic.museum", - "fbx-os.fr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "e164.arpa", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "botanicgarden.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "geek.nz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "toscana.it", - "", - "", - "", - "", - "", - "moss.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tselinograd.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "est-le-patron.com", - "", - "", - "", - "", - "", - "", - "", - "lavagis.no", - "", - "", - "", - "", - "", - "", - "salangen.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tydal.no", - "", - "", - "", - "cnpy.gdn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mo-siemens.io", - "", - "", - "", - "tjome.no", - "", - "", - "sells-for-u.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "herad.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gorizia.it", - "", - "", - "", - "pueblo.bo", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "andria-trani-barletta.it", - "", - "", - "", - "", - "", - "", - "", - "", - "debian.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "konyvelo.hu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tolga.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "holtalen.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "konin.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "powiat.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "utah.museum", - "duckdns.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tempioolbia.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "asso.gp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mazury.pl", - "", - "teramo.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "zaporizhzhia.ua", - "", - "", - "lierne.no", - "", - "school.na", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "plurinacional.bo", - "info.et", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "palace.museum", - "", - "", - "", - "tysvar.no", - "", - "nagasaki.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "starnberg.museum", - "", - "lahppi.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tynset.no", - "", - "", - "", - "", - "", - "", - "", - "info.tt", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "l\303\270renskog.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "traniandriabarletta.it", - "", - "", - "", - "", - "", - "", - "", - "leikanger.no", - "", - "sula.no", - "", - "", - "health.nz", - "", - "sola.no", - "", "", "", "", @@ -45868,6 +42144,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "modelling.aero", "", + "principe.st", "", "", "", @@ -45891,6900 +42168,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "orkanger.no", - "", - "", - "", - "", - "", - "priv.at", - "", - "", - "", - "", - "", - "", - "", - "", - "savannahga.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hopto.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.tr", - "", - "", - "", - "", - "", - "", - "lyngen.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ilawa.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "hotel.tz", - "", - "", - "", - "", - "", - "", - "", - "hjartdal.no", - "", - "", - "is-a-conservative.com", - "", - "", - "", - "barreau.bj", - "", - "", - "", - "", - "", - "", - "", - "val-d-aosta.it", - "", - "", - "", - "", - "", - "", - "mil.vc", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ptplus.fit", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "repl.run", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "turen.tn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "medicina.bo", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "obninsk.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "beskidy.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trysil.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "podhale.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hb.cn", - "", - "", - "", - "is-lost.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.ro", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "l.bg", - "", - "", - "", - "livorno.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "laakesvuemie.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "karlsoy.no", - "", - "", - "hadsel.no", - "", - "", - "", - "", - "herokuapp.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "colonialwilliamsburg.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pb.ao", - "", - "", - "", - "", - "overhalla.no", - "", - "in-brb.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lebtimnetz.de", - "", - "", - "", - "", - "\347\245\236\345\245\210\345\267\235.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "langev\303\245g.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "podzone.org", - "", - "", - "", - "", - "", - "", - "", - "", - "gniezno.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-nascarfan.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kvitsoy.no", - "", - "", - "", - "", - "", - "", - "oppdal.no", - "", - "", - "", - "", - "", - "maceio.br", - "", - "", - "", - "", - "", - "", - "", - "", - "hawaii.museum", - "", - "", - "", - "info.nr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "graz.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "homeip.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "001www.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nysa.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "loginto.me", - "", - "", - "", - "", - "", - "", - "", - "tychy.pl", - "", - "", - "", - "", - "", - "", - "", - "sevastopol.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bolt.hu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "szczecin.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "pasadena.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vang.no", - "oxford.museum", - "", - "is-a-chef.net", - "", - "", - "", - "", - "", - "", - "", - "", - "juif.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "fukushima.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "york.museum", - "", - "", - "", - "touch.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pharmacien.fr", - "", - "", - "", - "", - "", - "", - "hapmir.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "aparecida.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\320\276\321\200\320\263.\321\201\321\200\320\261", - "", - "", - "", - "definima.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "maringa.br", - "", - "", - "", - "", - "", - "", - "", - "", - "trader.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "h.bg", - "", - "", - "", - "", - "", - "halden.no", - "", - "", - "", - "", - "", - "", - "org.mx", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.mv", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tksat.bo", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tv.bb", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "m\303\245s\303\270y.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "gifu.jp", - "", - "", - "", - "", - "", - "prochowice.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "scrapping.cc", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-caterer.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "karm\303\270y.no", - "", - "is-a-candidate.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pagespeedmobilizer.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mj\303\270ndalen.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tj.cn", - "", - "", - "", - "", - "", - "", - "", - "dontexist.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "shop.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "fhsk.se", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "homesecuritypc.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-painter.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "definima.net", - "myds.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.at", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "h\303\270yanger.no", - "", - "", - "", - "", - "", - "", - "logistics.aero", - "", - "mining.museum", - "", - "", - "", - "", - "", - "latina.it", - "", - "", - "", - "", - "", - "clan.rip", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "togliatti.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "heritage.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "podlasie.pl", - "hammarfeasta.no", - "", - "", - "", - "mil.nz", - "", - "or.tz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "firebaseapp.com", - "vestre-toten.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "linkyard-cloud.ch", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sondre-land.no", - "", - "turek.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hobol.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.tn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-cpa.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "port.fr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "portlligat.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "harvestcelebration.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "orx.biz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lund.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lorenskog.no", - "", - "is-a-chef.com", - "name.az", - "", - "odda.no", - "", - "", - "", - "", - "", - "", - "milano.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "technology.museum", - "", - "", - "", - "", - "", - "", - "", - "vicenza.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "termez.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "osteroy.no", - "", - "", - "", - "tysnes.no", - "", - "", - "", - "", - "", - "", - "", - "info.na", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tec.ve", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.co", - "", - "", - "", - "", - "", - "", - "pharmaciens.km", - "", - "", - "", - "", - "myjino.ru", - "gleeze.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "city.hu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-llama.com", - "", - "", - "", - "", - "", - "orland.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "to.work", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tj\303\270me.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "parliament.nz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-by.us", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-green.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cody.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hyogo.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "patria.bo", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "stalowa-wola.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "myshopcolumnss.com", - "blogspot.bj", - "", - "", - "", - "", - "", - "tp.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lombardia.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tra.kp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kongsberg.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "schweiz.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "folkebibl.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tgory.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\345\245\210\350\211\257.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lazio.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tx.us", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "palmas.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "knightpoint.systems", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sciencehistory.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "thruhere.net", - "", - "pacific.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "karpacz.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "betainabox.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "priv.me", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "torino.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "org.za", - "", - "", - "", - "", - "", - "", - "blogspot.mx", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "sells-for-less.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ogliastra.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kicks-ass.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "olawa.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "philadelphia.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tranibarlettaandria.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bill.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ivano-frankivsk.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "yamaguchi.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "info.vn", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "livinghistory.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "leasing.aero", - "", - "", - "", - "", - "", - "", - "padova.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pubol.museum", - "", - "", - "", - "", - "flog.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "friuli-vegiulia.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "asso.dz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-rockstar.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "potenza.it", - "", - "", - "fastvps-server.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "nyny.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "glas.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "telebit.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mangyshlak.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "laspezia.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lier.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "home-webserver.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ibaraki.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "philately.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pagefrontapp.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vallee-d-aoste.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kraanghke.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pixolino.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "potager.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentinos\303\274d-tirol.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "oslo.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "logoip.de", - "", - "", - "", - "", - "", - "", - "tr\303\246na.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pomorskie.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vestvagoy.no", - "", - "lviv.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dyndns-ip.com", - "", - "", - "wakayama.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentinosued-tirol.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dsmynas.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "karaganda.su", - "", - "", - "", - "", - "", - "trentin-s\303\274dtirol.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentin-sud-tirol.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trondheim.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.sd", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dynamic-dns.info", - "", - "", - "", - "", - "", - "pulawy.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "targi.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.ki", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "katowice.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentin-suedtirol.it", - "", - "", - "", - "info.ht", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "milan.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lublin.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "traeumtgerade.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lanbib.se", - "", - "", - "\320\276\320\261\321\200.\321\201\321\200\320\261", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.ke", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hagebostad.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "society.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vapor.cloud", - "", - "", - "", - "", - "", - "", - "", - "school.za", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dyndns-remote.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentinos-tirol.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "crew.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "skoczow.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lajolla.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "olbiatempio.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentin-sudtirol.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.ni", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "holmestrand.no", - "philadelphiaarea.museum", - "", - "", - "", - "", - "", - "", - "khakassia.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hob\303\270l.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "bahn.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "morena.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "leczna.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "asso.bj", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "repbody.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tozsde.hu", - "", - "", - "info.la", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "eating-organic.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "satx.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-techie.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "franziskaner.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kvinnherad.no", - "t.bg", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lodingen.no", - "", - "", - "", - "info.nf", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "rzeszow.pl", - "", - "trapani.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "embaixada.st", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trieste.it", - "", - "", - "vibo-valentia.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentino-sud-tirol.it", - "", - "", - "", - "", - "", - "turin.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "torino.museum", - "", - "farm.museum", - "", - "", - "", - "kalisz.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "heimatunduhren.museum", - "homeunix.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lima-city.de", - "", - "", - "", - "", - "", - "", - "", - "is-a-photographer.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hashbang.sh", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentino-a-adige.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.zm", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "labour.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.zw", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hepforge.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-teacher.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kumamoto.jp", - "marylhurst.museum", - "", - "", - "", - "", - "pomorze.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "arvo.network", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", + "isernia.it", "", "", "", @@ -52860,7 +42244,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ipifony.net", "", "", "", @@ -52883,6 +42266,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "selfip.info", "", "", "", @@ -52918,13 +42302,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "living.museum", "", "", "", "", "", "", + "hadsel.no", "", "", "", @@ -52934,7 +42318,1519 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "przeworsk.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dynamic-dns.info", + "", + "", + "betainabox.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lyngdal.no", + "", + "barrel-of-knowledge.info", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "l\303\270renskog.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tolga.no", + "", + "vall\303\251e-aoste.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "inderoy.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "genova.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "blogspot.mx", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "opole.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "malvik.no", + "mediocampidano.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "valleedaoste.it", + "", + "", + "tran\303\270y.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.gp", + "", + "prequalifyme.today", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tickets.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "stripper.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fireweb.app", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "supersale.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "turek.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kalisz.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lund.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "czeladz.pl", + "", + "", + "ovre-eiker.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-student.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pp.ua", + "", + "", + "", + "", + "", + "rauma.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tra.kp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tec.ve", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "varese.it", + "", + "szex.hu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "og.ao", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "wegrow.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "health.nz", + "", + "", + "", + "", + "", + "", + "", + "", + "r\303\270yrvik.no", + "yamanashi.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "iwi.nz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "matera.it", + "", + "", + "", + "mozilla-iot.org", + "", + "", + "", + "", + "", + "", + "", + "", + "linkyard.cloud", + "", + "", + "", + "", + "piedmont.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vall\303\251eaoste.it", + "", + "", + "", + "", + "", + "", + "", + "turin.it", + "", + "", + "", + "", + "", + "", + "", + "reservd.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "venezia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "v\303\245g\303\245.no", + "", + "temp-dns.com", + "", + "lindesnes.no", + "lukow.pl", + "", + "", + "", + "", + "", + "", + "", + "tashkent.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vicenza.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "r\303\245holt.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lpusercontent.com", + "", + "", + "", + "t.bg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.fj", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "gloomy.jp", + "", + "", + "", + "", + "snoasa.no", + "", + "", + "", + "", + "", + "vaga.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "poltava.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "savona.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "off.ai", + "", + "", + "", + "", + "", + "", + "toolforge.org", + "", + "halden.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "snaase.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mcpe.me", + "", + "", + "", + "", + "", + "", + "", + "intl.tn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trentino-sud-tirol.it", + "", + "", + "", + "", + "", + "imperia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hareid.no", + "", + "", + "", + "", + "pc.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ownprovider.com", + "", + "", + "", + "", + "", + "", + "perugia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "govt.nz", + "", + "", + "", + "r\303\270yken.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "loginto.me", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "definima.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "friuli-vgiulia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pro.na", + "", + "", + "", + "", + "", + "", + "or.tz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "targi.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-chef.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rissa.no", + "", + "", + "", + "", + "", + "", "", "myfirewall.org", "", @@ -52942,10 +43838,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "prvcy.page", "", "", "", + "shoparena.pl", "", + "legnica.pl", "", "", "", @@ -52970,6 +43869,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "l.bg", "", "", "", @@ -52993,18 +43893,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "xs4all.space", "", "", "", - "nara.jp", "", "", "", "", + "is-a-chef.net", "", "", "", "", + "r\303\270d\303\270y.no", "", "", "", @@ -53015,7 +43917,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "iobb.net", "", "", "", @@ -53027,6 +43928,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "elasticbeanstalk.com", "", "", "", @@ -53041,7 +43943,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "myqnapcloud.com", "", "", "", @@ -53077,7 +43978,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "info.ve", "", "", "", @@ -53085,10 +43985,2076 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "daplie.me", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ox.rs", + "", + "", + "", + "", + "", + "org.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "press.ma", + "", + "", + "", + "", + "", + "", + "", + "verdal.no", + "", + "", + "mc.ax", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pz.it", + "", + "", + "", + "", + "", + "raisa.no", + "", + "", + "", + "", + "heteml.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "loisirs.bj", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kirovograd.ua", + "", + "", + "oygarden.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "nikita.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "trentins\303\274dtirol.it", + "", + "", + "", + "mielec.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "porsanger.no", + "", + "", + "", + "", + "", + "friuli-veneziagiulia.it", + "", + "", + "", + "", + "", + "", + "", + "verran.no", + "", + "", + "main.jp", + "", + "", + "rennes\303\270y.no", + "", + "", + "", + "", + "", + "", + "", + "", + "mobi.na", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tinn.no", + "", + "", + "", + "", + "", + "", + "", + "", + "tydal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pokrovsk.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tx.us", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "roros.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.vc", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "definima.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "prato.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "melhus.no", + "", + "", + "", + "", + "iserv.dev", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "stackhero-network.com", + "", + "", + "", + "langev\303\245g.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "telebit.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "langson.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "meldal.no", + "", + "", + "t3l3p0rt.net", + "", + "", + "", + "", + "vallee-d-aoste.it", + "", + "", + "", + "", + "", + "", + "rendalen.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dyndns-ip.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vinhphuc.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vercel.dev", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "isteingeek.de", + "", + "", + "", + "", + "cesena-forl\303\254.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "my-firewall.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "issmarterthanyou.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dyndns.org", + "", + "", + "", + "", + "dvrdns.org", + "", + "", + "", + "gitapp.si", + "", + "", + "", + "", + "embaixada.st", + "", + "museum.no", + "", + "oystre-slidre.no", + "", + "", + "", + "", + "", + "", + "", + "", + "leirfjord.no", + "", + "", + "", + "", + "africa.bj", + "", + "health-carereform.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lab.ms", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hapmir.no", + "", + "livorno.it", + "", + "", + "", + "", + "", + "", + "", + "", + "podzone.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rodoy.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mandal.no", + "", + "pstmn.io", + "ostre-toten.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "d\303\270nna.no", + "lolipop.io", + "", + "", + "", + "", + "", + "", + "", + "", + "mobi.ke", + "plesk.page", + "", + "", + "secaas.hk", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.za", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "musica.ar", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trentinsuedtirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "marker.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "operaunite.com", + "", + "", + "", + "kanagawa.jp", + "", + "", + "", + "", + "", + "", + "", + "deatnu.no", + "", + "", + "", + "", + "pages.dev", + "", + "", + "", + "pors\303\241\305\213gu.no", + "", + "", + "", + "", + "", + "", + "", + "gleeze.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pro.az", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dyndns-server.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "midtre-gauldal.no", + "", + "", + "", + "", + "", + "", + "tm.dz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "kawaiishop.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "marine.ru", + "", + "", + "", + "trentino-alto-adige.it", + "", + "", + "", + "", + "", + "", + "", + "", + "piemonte.it", + "", + "", + "leikanger.no", + "", + "trentin-s\303\274d-tirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vlog.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dyndns-office.com", + "", + "", + "", + "", + "", + "", + "recht.pro", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ingatlan.hu", + "", + "", + "", + "", + "", + "is-gone.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dyndns-at-work.com", + "", + "omasvuotna.no", + "org.dz", + "", + "", + "", + "", + "", + "", + "", + "rhcloud.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trentin-sued-tirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "is-found.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "flight.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "redirectme.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "withyoutube.com", + "", + "", + "", + "lviv.ua", + "", + "", + "", + "", + "", + "", + "", + "l-o-g-i-n.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-conservative.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hanggliding.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pordenone.it", + "", + "", + "leksvik.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.nz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "suli.hu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "v\303\245gan.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pavia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "onrender.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lamdong.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rakkestad.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "walbrzych.pl", + "", + "", + "", + "", + "radio.am", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mongolian.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tempioolbia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "oxa.cloud", + "", + "tv.tz", + "", + "", + "", + "", + "", + "", + "stalowa-wola.pl", + "", + "trentin-sudtirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "rygge.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "secret.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "production.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "onporter.run", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "deci.jp", + "", + "", + "", + "", "", "is-a-chef.org", "", "", + "friuli-vegiulia.it", "", "", "", @@ -53097,7 +46063,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "info.ec", "", "", "", @@ -53121,14 +46086,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "pcloud.host", "", "", "", "", "", "", - "h\303\246gebostad.no", + "akamaized-staging.net", "", "", "", @@ -53137,6 +46101,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "org.vi", "", "", "", @@ -53152,6 +46117,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lier.no", "", "", "", @@ -53222,17 +46188,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "odesa.ua", "", "", "", "", "", "", + "remotewd.com", "", "", "", "", "", + "osteroy.no", "", "", "", @@ -53247,8 +46216,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "myfritz.net", "", - "manaus.br", "", "", "", @@ -53267,6 +46236,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "trentinos\303\274dtirol.it", "", "", "", @@ -53275,19 +46245,20 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "dell-ogliastra.it", + "trentinosud-tirol.it", + "geek.nz", "", "", "", "", - "lowicz.pl", "", - "is-a-therapist.com", "", "", "", + "holy.jp", "", "", - "info.pr", "", "", "", @@ -53296,6 +46267,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "protonet.io", "", "", "", @@ -53311,14 +46283,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "valle-daosta.it", "", "", + "\321\217.\321\200\321\203\321\201", "", "", + "modena.it", "", "", "", "", + "loten.no", "", "", "", @@ -53337,12 +46313,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "netlify.app", "", "", "", "", "", "", + "lavagis.no", "", "", "", @@ -53357,6 +46335,1701 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "strand.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trentinosuedtirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "m\303\241tta-v\303\241rjjat.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "friuli-venezia-giulia.it", + "", + "", + "", + "", + "", + "", + "vapor.cloud", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "myhome-server.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "thick.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "laquila.it", + "", + "", + "", + "", + "", + "", + "", + "", + "trieste.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "warszawa.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pg.in", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hasura-app.io", + "independent-inquest.uk", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pub.sa", + "", + "", + "", + "", + "", + "is-a-cpa.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "industria.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pymnt.uk", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trento.it", + "dnsfor.me", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rotorcraft.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "myshopify.com", + "appchizi.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "homeip.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pilot.aero", + "", + "", + "saga.jp", + "", + "", + "", + "", + "leitungsen.de", + "", + "", + "", + "", + "", + "", + "", + "radio.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "onred.one", + "", + "miasta.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rocky.page", + "", + "", + "", + "", + "", + "", + "ringsaker.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "isa-hockeynut.com", + "dyndns.info", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "translated.page", + "", + "", + "", + "vestvagoy.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.sb", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "org.bb", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "safety.aero", + "mypets.ws", + "", + "", + "", + "", + "mielno.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-caterer.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "onthewifi.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "firebaseapp.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "jozi.biz", + "", + "", + "", + "hob\303\270l.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rec.ve", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "voagat.no", + "", + "", + "", + "tunk.org", + "", + "trani-andria-barletta.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "shop.hu", + "", + "", + "test.tj", + "", + "", + "trondheim.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lutsk.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lombardia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\347\265\204\347\271\224.\351\246\231\346\270\257", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\346\225\231\350\202\262.\351\246\231\346\270\257", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "perma.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "penne.jp", + "", + "is-uberleet.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "loppa.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "t\303\270nsberg.no", + "", + "", + "", + "", + "", + "", + "rdy.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rost.no", + "", + "", + "", + "", + "", + "", + "", + "", + "osaka.jp", + "", + "", + "", + "trading.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-rockstar.com", + "", + "", + "", + "", + "", + "", + "radoy.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pimienta.org", + "", + "", + "", + "", + "", + "", + "dd-dns.de", + "", + "r.bg", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tech.orange", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "myjino.ru", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pro.fj", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "p.bg", + "", + "", + "", + "", + "", + "", + "", + "lodi.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-painter.com", + "", + "", + "", + "", + "", + "", + "", + "traeumtgerade.de", + "", + "", + "", + "", + "szkola.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "roan.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ravendb.community", + "", + "", + "", + "", + "", + "", + "law.za", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "orangecloud.tn", + "", + "", + "", + "", + "sebastopol.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "leasing.aero", + "", + "", + "", + "okayama.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hatinh.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "realestate.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "veterinaire.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "e164.arpa", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mazury.pl", + "oppeg\303\245rd.no", + "trapani.it", + "", + "", + "", + "", + "", + "translate.goog", + "", + "", + "", + "valle-d-aosta.it", + "priv.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "radio.fm", + "", + "", + "kolobrzeg.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "vibo-valentia.it", + "is-lost.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "trentino-s-tirol.it", "", "", @@ -53375,7 +48048,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-a-lawyer.com", "", "", "", @@ -53383,7 +48055,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "desa.id", "", "", "", @@ -53403,7 +48074,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "iron.museum", "", "", "", @@ -53427,15 +48097,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "karasjohka.no", - "localhistory.museum", "", "", "", "", "", "", - "privatizehealthinsurance.net", "", "", "", @@ -53459,9 +48126,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "flynnhub.com", + "discordsays.com", "", "", + "teaches-yoga.com", "", "", "", @@ -53483,6 +48151,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "floppy.jp", "", "", "", @@ -53493,19 +48162,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ostrowwlkp.pl", "", "", "", "", - "ascoli-piceno.it", "", "", + "is-a-nascarfan.com", "", "", "", "", "", + "itigo.jp", "", "", "", @@ -53526,7 +48195,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "barrel-of-knowledge.info", "", "", "", @@ -53543,7 +48211,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "tvedestrand.no", "", "", "", @@ -53556,6 +48223,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "org.lb", "", "", "", @@ -53567,6 +48235,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "kobierzyce.pl", "", "", "", @@ -53579,10 +48248,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "tottori.jp", "", "", "", + "karpacz.pl", "", "", "", @@ -53591,12 +48260,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "homeunix.com", "", "", "", "", - "film.hu", "", "", "", @@ -53622,14 +48289,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lodingen.no", "", "", "", "", "", + "myddns.rocks", "", "", "", + "framer.app", "", "", "", @@ -53674,6 +48344,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "sadist.jp", "", "", "", @@ -53691,6 +48362,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "point2this.com", "", "", "", @@ -53699,6 +48371,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "trentino-sudtirol.it", "", "", "", @@ -53713,10 +48386,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pro.vn", "", "", "", "", + "trentins\303\274d-tirol.it", "", "", "", @@ -53734,961 +48409,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "twmail.cc", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "musica.bo", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "monza-brianza.it", - "", - "townnews-staging.com", - "", - "", - "", - "", - "stuff-4-sale.us", - "", - "", - "", - "", - "", - "", - "tourism.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.ls", - "", - "", - "", - "", - "", - "theater.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "reggio-calabria.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "me.tz", - "", - "", - "", - "", - "", - "", - "", - "", - "vega.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "volyn.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.pk", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "miyagi.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "name.my", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "malopolska.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "prof.pr", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-geek.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "science-fiction.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentinoaadige.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "meland.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lcube-server.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ternopil.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tinn.no", - "", - "", - "humanities.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-nurse.com", - "", - "", - "", - "", - "", - "", - "", "", "", "", @@ -54736,7 +48456,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "habmer.no", "", "", "", @@ -54748,6 +48467,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "info.et", "", "", "", @@ -54763,10 +48483,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "bydgoszcz.pl", "", "", "", - "lima-city.at", "", "", "", @@ -54783,17 +48503,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "myravendb.com", "", "", "", "", "", "", - "roma.museum", "", "", "", + "info.tt", "", "", "", @@ -54806,9 +48525,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "telekommunikation.museum", "", - "lodi.it", "", "", "", @@ -54837,6 +48554,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "tochigi.jp", "", "", "", @@ -54844,10 +48562,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "hasura.app", "", "", - "tokyo.jp", + "odda.no", "", "", "", @@ -54857,6 +48574,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "trentino-stirol.it", "", "", "", @@ -54876,7 +48594,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "szex.hu", + "risor.no", "", "", "", @@ -54896,19 +48614,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "vibovalentia.it", "", "", "", - "frog.museum", "", - "hattfjelldal.no", "", "", "", "", "", - "profesional.bo", "", "", "", @@ -54922,6 +48636,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "independent-inquiry.uk", "", "", "", @@ -54931,7 +48646,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "vlaanderen.museum", "", "", "", @@ -54950,7 +48664,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "historical.museum", "", "", "", @@ -54986,6 +48699,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-into-cars.com", "", "", "", @@ -55030,6 +48744,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ostroda.pl", "", "", "", @@ -55048,6 +48763,227 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "info.tr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rar.ve", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-doctor.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trentinsud-tirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mobi.ng", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ravendb.me", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lillehammer.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "opencraft.hosting", + "", + "", + "\345\262\251\346\211\213.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "laakesvuemie.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "run.app", + "", + "", + "", + "", + "", + "", + "", + "", "bieszczady.pl", "", "", @@ -55066,6 +49002,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "time.no", "", "", "", @@ -55083,7 +49020,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "homeftp.org", "", "", "", @@ -55115,7 +49051,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "shop.hu", "", "", "", @@ -55126,17 +49061,18 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "labor.museum", "", "", "", "", "", "", + "info.ro", "", "", "", "", + "info.mv", "", "", "", @@ -55165,12 +49101,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "twmail.net", "", "", "", "", - "film.museum", "", "", "", @@ -55188,6 +49122,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "priv.at", "", "", "", @@ -55198,7 +49133,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "homedns.org", "", "", "", @@ -55212,7 +49146,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "ostroleka.pl", "", "", "", @@ -55225,6 +49158,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ostrowiec.pl", "", "", "", @@ -55244,9 +49178,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "travel.pl", "", "", + "trentino-a-adige.it", "", "", "", @@ -55275,6 +49209,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "design.aero", "", "", "", @@ -55282,7 +49217,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "mlbfan.org", "", "", "", @@ -55292,6 +49226,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "troandin.no", "", "", "", @@ -55309,11 +49244,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-a-green.com", "", "", "", "", - "andria-barletta-trani.it", "", "", "", @@ -55326,6 +49261,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "restaurant.bj", "", "", "", @@ -55342,8 +49278,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-a-democrat.com", "", + "verona.it", "", "", "", @@ -55356,7 +49292,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "homeunix.org", "", "", "", @@ -55366,10 +49301,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "hiroshima.jp", "", "", "", "", + "radom.pl", "", "", "", @@ -55386,1314 +49323,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "pesarourbino.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "topology.museum", - "", - "", - "", - "", - "trentinsud-tirol.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-geek.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "yamanashi.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "stuff-4-sale.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "priv.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "t\303\270nsberg.no", - "", - "", - "", - "", - "", - "", - "logoip.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "doesntexist.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentinoaltoadige.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "coop.py", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "macapa.br", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dyndns-office.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dyndns-free.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trading.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kagawa.jp", - "", - "", - "", - "", - "bale.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vaga.no", - "", - "", - "", - "", - "", - "", - "", - "muni.il", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\340\271\200\340\270\231\340\271\207\340\270\225.\340\271\204\340\270\227\340\270\242", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dyndns-server.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "cesena-forli.it", - "cesena-forl\303\254.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "likes-pie.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "haugesund.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "iraq.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "politica.bo", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "barrell-of-knowledge.info", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "m\303\241tta-v\303\241rjjat.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "abruzzo.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hoyanger.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-landscaper.com", - "", - "", - "", - "", - "", - "sebastopol.ua", - "", - "", - "", - "webhop.biz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "massa-carrara.it", - "", - "", - "", - "", - "", - "", - "mesaverde.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "posts-and-telecommunications.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "", "", "", + "trentinsued-tirol.it", + "vagsoy.no", "", "", "", @@ -56736,14 +49370,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "name.cy", + "websozai.jp", "", "", "", @@ -56769,11 +49396,895 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "lucania.it", "", "", "", - "mail.pl", + "trentinsudtirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "platterp.us", + "", + "", + "presse.km", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "logistics.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tottori.jp", + "", + "", + "", + "", + "mine.nu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "taifun-dns.de", + "rgr.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-llama.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lorenskog.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "info.nr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "orkanger.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "leirvik.no", + "", + "", + "", + "lebtimnetz.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "whitesnow.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "plurinacional.bo", + "volyn.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-geek.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "oppegard.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "info.co", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tuscany.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lovepop.jp", + "", + "is-a-geek.net", + "", + "habmer.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hiho.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "primetel.cloud", + "palermo.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "v-info.info", + "", + "is-by.us", + "", + "", + "cesena-forli.it", + "deca.jp", + "", + "pya.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trentin-s\303\274dtirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trentin-sud-tirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "myqnapcloud.com", + "", + "", + "indigena.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "wakayama.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trentin-suedtirol.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "", "", "", @@ -56807,7 +50318,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "vall\303\251e-aoste.it", "", "", "", @@ -56829,6 +50339,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "trentinos-tirol.it", "", "", "", @@ -56849,7 +50360,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "valle-daosta.it", "", "", "", @@ -56858,726 +50368,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "mo-i-rana.no", "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "polkowice.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-knight.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mil.za", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-anarchist.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ham-radio-op.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "iris.arpa", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "axis.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "warszawa.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "info.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "troandin.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "saves-the-whales.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.bo", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lima-city.rocks", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trani-barletta-andria.it", - "", + "pistoia.it", "", "", "", @@ -57589,7 +50383,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "dyndns-wiki.com", "", - "lpages.co", "", "", "", @@ -57608,7 +50401,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "hole.no", "", "", "", @@ -57622,6 +50414,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "rep.kp", "", "", "", @@ -57648,6 +50441,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "hungry.jp", "", "", "", @@ -57668,6 +50462,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lima-city.de", "", "", "", @@ -57682,6 +50477,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "port.fr", "", "", "", @@ -57691,12 +50487,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "kagawa.jp", "", "", "", "", "", "", + "thanhhoa.vn", "", "", "", @@ -57721,10 +50519,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "kiwi.nz", "", "", "", "", + "urbino-pesaro.it", "", "", "", @@ -57732,7 +50532,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "law.za", "", "", "", @@ -57748,13 +50547,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "friuli-ve-giulia.it", "", "", "", "", "", + "poivron.org", "", + "per.nf", "", "", "", @@ -57763,6 +50563,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "show.aero", + "magazine.aero", "", "", "", @@ -57779,7 +50581,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "tourism.bj", "", + "snowflake.app", "", "", "", @@ -57794,7 +50598,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "lugansk.ua", "", "", "", @@ -57810,7 +50613,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "levanger.no", "", "", "", @@ -57833,9 +50635,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "iamallama.com", "", "", - "pisa.it", "", "", "", @@ -57859,8 +50661,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-a-teacher.com", "", "", + "tourism.pl", "", "", "", @@ -57871,7 +50675,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-a-patsfan.org", "", "", "", @@ -57879,6 +50682,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "oslo.no", "", "", "", @@ -57907,6 +50711,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ravendb.cloud", "", "", "", @@ -57920,6 +50725,190 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "independent-review.uk", + "", + "", + "", + "", + "", + "", + "", + "", + "mobi.gp", + "", + "", + "trycloudflare.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "londrina.br", + "", + "", + "", + "", + "riopreto.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "watson.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "priv.me", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "overhalla.no", + "pantheonsite.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "info.at", + "", "", "", "", @@ -57946,7 +50935,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "poniatowa.pl", "", "", "", @@ -57963,7 +50951,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "pisz.pl", "", "", "", @@ -57979,7 +50966,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "isla.pr", "", "", "", @@ -58130,7 +51116,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "twmail.org", "", "", "", @@ -58147,8 +51132,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "leka.no", "", "", + "perspecta.cloud", "", "", "", @@ -58159,21 +51146,24 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "punyu.jp", "", "", + "resto.bj", "", "", "", + "dyndns-free.com", "", "", "", "", "", "", + "lolipopmc.jp", "", "", "", - "t3l3p0rt.net", "", "", "", @@ -58207,14 +51197,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "tcp4.me", "", "", "", "", - "venezia.it", "", "", + "trentino-aadige.it", "", + "museum.mw", "", "", "", @@ -58244,8 +51236,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-a-personaltrainer.com", - "tingvoll.no", "", "", "", @@ -58255,7 +51245,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "hobby-site.com", "", "", "", @@ -58263,6 +51252,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "porsgrunn.no", "", "", "", @@ -58274,6 +51264,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lazio.it", "", "", "", @@ -58286,7 +51277,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "on-aptible.com", "", "", "", @@ -58294,6 +51284,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "tvedestrand.no", "", "", "", @@ -58314,6 +51305,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "royrvik.no", "", "", "", @@ -58342,6 +51334,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "rec.nf", "", "", "", @@ -58352,6 +51345,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "in-vpn.org", "", "", "", @@ -58363,7 +51357,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "kaas.gg", "", "", "", @@ -58371,586 +51364,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "travel.tt", "", "", + "asso.dz", "", "", "", "", "", "", - "lebork.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "selfip.biz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "walbrzych.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-player.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "my-wan.de", - "", - "mycd.eu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "ketrzyn.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tochigi.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "time.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "textile.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "passenger-association.aero", - "", - "", - "", - "", - "", + "olsztyn.pl", "", "", "", @@ -58980,6 +51404,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lur\303\270y.no", "", "", "", @@ -58994,10 +51419,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "official.ec", "", "", "", "", + "in-vpn.de", "", "", "", @@ -59017,10 +51444,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "maceio.br", "", "", "", "", + "presse.ml", "", "", "", @@ -59043,6 +51472,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "tokyo.jp", "", "", "", @@ -59071,8 +51501,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "traniandriabarletta.it", "", "", + "dgca.aero", "", "", "", @@ -59102,6 +51534,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "podlasie.pl", "", "", "", @@ -59116,6 +51549,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "school.nz", "", "", "", @@ -59134,9 +51568,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\345\200\213\344\272\272.\351\246\231\346\270\257", "", "", "", + "is-a-democrat.com", "", "", "", @@ -59161,6 +51597,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "framer.website", "", "", "", @@ -59175,6 +51612,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "repl.co", "", "", "", @@ -59187,10 +51625,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "obninsk.su", "", "", "", "", + "iobb.net", "", "", "", @@ -59213,6 +51653,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "townnews-staging.com", + "laocai.vn", "", "", "", @@ -59220,7 +51662,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "vlog.br", "", "", "", @@ -59228,7 +51669,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "leka.no", "", "", "", @@ -59277,9 +51717,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ternopil.ua", "", "", "", + "sblo.jp", "", "", "", @@ -59318,10 +51760,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "itcouldbewor.se", "", "", + "is-very-sweet.org", "", - "thingdustdata.com", "", "", "", @@ -59342,6 +51785,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "linkyard-cloud.ch", "", "", "", @@ -59350,10 +51794,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "dyndns-web.com", "", "", "", "", + "playstation-cloud.com", "", "", "", @@ -59363,7 +51809,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "fantasyleague.cc", "", "", "", @@ -59380,7 +51825,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "dyndns.biz", "", "", "", @@ -59405,6 +51849,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "tromso.no", "", "", "", @@ -59427,6 +51872,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "info.na", "", "", "", @@ -59435,6 +51881,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "panel.gg", + "rome.it", "", "", "", @@ -59453,6 +51901,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "realm.cz", "", "", "", @@ -59473,6 +51922,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "name.az", "", "", "", @@ -59482,7 +51932,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "info.zm", "", "", "", @@ -59522,6 +51971,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "dyndns-at-home.com", "", "", "", @@ -59552,6 +52002,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "versus.jp", "", "", "", @@ -59584,13 +52035,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "khmelnytskyi.ua", "", "", - "kolobrzeg.pl", "", "", - "silk.museum", "", "", "", @@ -59640,6 +52088,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "recreation.aero", "", "", "", @@ -59647,73 +52096,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-a-designer.com", "", "", "", "", - "historicalsociety.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-into-cars.com", - "", - "", - "", - "", - "", - "", - "", - "", - "is-into-cartoons.com", - "", - "", - "", - "", - "", - "tran\303\270y.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "publishproxy.com", "", "", "", @@ -59728,6 +52114,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ravendb.run", "", "", "", @@ -59747,6 +52134,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "likes-pie.com", "", "", "", @@ -59776,6 +52164,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "danang.vn", "", "", "", @@ -59799,620 +52188,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-blogger.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vallee-aoste.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lavangen.no", - "", - "", - "", - "", - "", - "", - "", - "klodzko.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hanggliding.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "history.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mypi.co", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "m\304\201ori.nz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "is-a-geek.org", "", "", @@ -60436,1556 +52211,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "suli.hu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "opoczno.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hosting-cluster.nl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "matta-varjjat.no", - "", - "", - "", - "", - "", - "lima-city.ch", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pittsburgh.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kids.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "motorcycle.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-socialist.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dlugoleka.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "vall\303\251e-d-aoste.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "loginline.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "my-gateway.de", - "paragliding.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "publ.pt", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.au", - "", - "", - "ishikawa.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "malatvuopmi.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trolley.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dyndns-home.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hokkaido.jp", - "", - "", - "", - "", - "", - "", - "", - "taifun-dns.de", - "", - "", - "", - "", - "", - "mobi.tt", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-guru.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "test.tj", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dyndns-at-work.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "babia-gora.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "", "", "", @@ -62003,11 +52228,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "deta.app", "", "", "", "", "", + "medio-campidano.it", "", "", "", @@ -62040,17 +52267,22 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ogliastra.it", "", "", + "lenvik.no", "", + "mlbfan.org", "", "", "", "", "", + "ringerike.no", "", "", "", + "info.sd", "", "", "", @@ -62061,7 +52293,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-a-candidate.org", "", + "vallee-aoste.it", + "theshop.jp", "", "", "", @@ -62086,6 +52321,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "prochowice.pl", "", "", "", @@ -62238,6 +52474,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pol.dz", "", "", "", @@ -62275,6 +52512,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "prof.pr", "", "", "", @@ -62323,7 +52561,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "lezajsk.pl", "", "", "", @@ -62354,6 +52591,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "levanger.no", "", "", "", @@ -62385,223 +52623,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "kobierzyce.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dyndns-web.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "loginline.site", - "", - "", - "", - "of.football", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-an-accountant.com", - "", - "", - "", - "", - "", - "", - "", - "", - "\320\260\320\272.\321\201\321\200\320\261", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\320\276\320\264.\321\201\321\200\320\261", - "", - "", - "", - "", - "", "", "", "", @@ -62623,10 +52644,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "luxembourg.museum", "", + "insurance.aero", "", - "tjeldsund.no", "", "", "", @@ -62646,6 +52666,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "milano.it", "", "", "", @@ -62654,8 +52675,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "info.ke", "", - "hobby-site.org", "", "", "", @@ -62667,6 +52688,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ilawa.pl", "", "", "", @@ -62675,6 +52697,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "info.ht", "", "", "", @@ -62707,8 +52730,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-a-hunter.com", "", + "is-a-therapist.com", "", "", "", @@ -62757,12 +52780,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ipifony.net", "", "", "", "", "", "", + "readmyblog.org", "", "", "", @@ -62770,6 +52795,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "in-vpn.net", "", "", "", @@ -62821,7 +52847,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-not-certified.com", "", "", "", @@ -62859,7 +52884,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "info.cx", "", "", "", @@ -62934,6 +52958,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "trentinoaltoadige.it", + "caxias.br", "", "", "", @@ -62964,6 +52990,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "in-dsl.org", "", "", "", @@ -63029,24 +53056,26 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "info.gu", "", "", "", "", "", "", + "lima-city.at", "", "", "", "", "", "", + "in-dsl.de", "", "", "", "", "", + "nagano.jp", "", "", "", @@ -63065,7 +53094,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "toyama.jp", "", "", "", @@ -63107,6 +53135,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lucania.it", "", "", "", @@ -63155,7 +53184,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "info.tz", "", "", "", @@ -63170,7 +53198,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "priv.hu", "", "", "", @@ -63207,6 +53234,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "babyblue.jp", "", "", "", @@ -63227,11 +53255,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "readymade.jp", "", "", "", "", "", + "webhop.org", "", "", "", @@ -63263,6 +53293,784 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "dyndns-home.com", + "", + "", + "tingvoll.no", + "", + "", + "idrett.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "webhop.me", + "", + "", + "", + "", + "leangaviika.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-an-actress.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "magnet.page", + "tranby.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ivano-frankivsk.ua", + "", + "", + "", + "", + "", + "", + "", + "info.ve", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dlugoleka.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "longan.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "info.ki", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "orkdal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "doesntexist.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rade.no", + "", + "", + "", + "", + "", + "", + "tynset.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tranibarlettaandria.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "olecko.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "l\303\270ten.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "independent-commission.uk", + "", + "", + "info.la", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trani-barletta-andria.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "reklam.hu", + "", + "", + "debian.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "dyndns-blog.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "larvik.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tana.no", + "", "", "", "", @@ -63311,6 +54119,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "other.nf", "", "", "", @@ -63373,6 +54182,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "puglia.it", "", "", "", @@ -63407,6 +54217,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "trysil.no", "", "", "", @@ -63421,12 +54232,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "saga.jp", "", "", "", "", "", + "velvet.jp", "", "", "", @@ -63440,12 +54251,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "potager.org", "", "", "", "", "", "", + "ravenna.it", "", "", "", @@ -63455,7 +54268,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "timekeeping.museum", "", "", "", @@ -63467,6 +54279,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lunner.no", "", "", "", @@ -63485,7 +54298,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "mine.nu", "", "", "", @@ -63533,6 +54345,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "hlx3.page", "", "", "", @@ -63546,6 +54359,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "trentino-altoadige.it", "", "", "", @@ -63557,7 +54371,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-with-theband.com", "", "", "", @@ -63579,9 +54392,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-a-bookkeeper.com", "", "", + "travel.in", "", "", "", @@ -63601,6 +54414,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "tromsa.no", "", "", "", @@ -63626,7 +54440,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "in-dsl.net", "", + "info.ls", "", "", "", @@ -63678,6 +54494,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "tysvar.no", "", "", "", @@ -63691,7 +54508,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "dyndns-blog.com", "", "", "", @@ -63748,7 +54564,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "loseyourip.com", "", "", "", @@ -63757,11 +54572,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "lubin.pl", "", "", "", "", + "ibaraki.jp", "", "", "", @@ -63807,6 +54622,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-a-designer.com", "", "", "", @@ -63871,10 +54687,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "info.ni", "", "", "", "", + "podzone.org", "", "", "", @@ -63893,7 +54711,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "webhop.net", + "info.pr", "", + "pussycat.jp", "", "", "", @@ -63928,507 +54749,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "show.aero", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "medio-campidano.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-celticsfan.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kartuzy.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "loginline.dev", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentin-s\303\274d-tirol.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trani-andria-barletta.it", - "", - "", - "", - "", - "", - "", - "mobi.na", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tunk.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "trentin-sued-tirol.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "theworkpc.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "", "", "", @@ -64445,6 +54765,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "vpndns.net", "", "", "", @@ -64471,6 +54792,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "laspezia.it", "", "", "", @@ -64491,11 +54813,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-into-cartoons.com", "", "", "", "", "", + "lolitapunk.jp", "", "", "", @@ -64534,2248 +54858,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "loginline.services", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "in-addr.arpa", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mmafan.biz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.hu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "info.az", - "", - "", - "", - "", - "", - "", - "", - "texas.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-an-actress.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "midtre-gauldal.no", - "", - "", - "", - "", - "olbia-tempio.it", - "", - "", - "", - "", - "", - "", - "", - "trentinoa-adige.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-an-actor.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "dyndns-mail.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "oita.jp", - "", - "tv.tz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "half.host", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "myhome-server.de", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "\321\203\320\277\321\200.\321\201\321\200\320\261", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kiwi.nz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kazimierz-dolny.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kirovograd.ua", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "urbino-pesaro.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "manx.museum", - "", - "", - "", - "historyofscience.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "monza-e-della-brianza.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pruszkow.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-soxfan.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-libertarian.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hamaroy.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pony.club", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tempio-olbia.it", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lubartow.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hasura-app.io", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-financialadvisor.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", + "is-a-blogger.com", "", "", "", @@ -66849,14 +54932,17 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "oksnes.no", "", "", "", "", + "\347\273\204\347\271\224.hk", "", "", "", "", + "\347\265\204\347\271\224.hk", "", "", "", @@ -66866,6 +54952,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\346\225\231\350\202\262.hk", "", "", "", @@ -66900,6 +54987,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\346\225\216\350\202\262.hk", "", "", "", @@ -66918,6 +55006,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "on-aptible.com", "", "", "", @@ -66936,7 +55025,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "trentinoalto-adige.it", "", "", "", @@ -66979,14 +55067,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "dyndns-at-home.com", "", + "littlestar.jp", "", + "lugansk.ua", "", "", "", "", "", + "research.aero", "", "", "", @@ -67071,7 +55161,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "la-spezia.it", "", "", "", @@ -67086,12 +55175,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "post.in", "", "", "", "", "", "", + "pgfog.com", + "olawa.pl", "", "", "", @@ -67108,6 +55200,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "opensocial.site", "", "", "", @@ -67154,7 +55247,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "mozilla-iot.org", "", "", "", @@ -67171,6 +55263,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "org.mx", "", "", "", @@ -67208,6 +55301,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "musica.bo", "", "", "", @@ -67225,11 +55319,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "termez.su", "", "", "", "", "", + "info.tn", "", "", "", @@ -67270,7 +55366,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-a-bruinsfan.org", "", "", "", @@ -67281,6 +55376,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "podhale.pl", "", "", "", @@ -67290,6 +55386,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "teramo.it", "", "", "", @@ -67303,7 +55400,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "tcp4.me", "", "", "", @@ -67324,16 +55420,16 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-a-musician.com", "", "", + "webhop.info", "", + "luster.no", "", "", "", "", "", - "mobi.ke", "", "", "", @@ -67348,12 +55444,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "roma.it", "", "", "", "", "", "", + "my-wan.de", "", "", "", @@ -67364,7 +55462,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "on-web.fr", "", "", "", @@ -67373,12 +55470,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "info.pk", "", "", "", "", + "pagespeedmobilizer.com", "", "", + "rivne.ua", "", "", "", @@ -67389,6 +55489,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "parliament.nz", "", "", "", @@ -67400,6 +55501,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "meland.no", "", "", "", @@ -67408,13 +55510,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-a-liberal.com", + "thuathienhue.vn", "", "", "", "", "", "", + "info.ec", "", "", "", @@ -67434,6 +55537,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "thaibinh.vn", "", "", "", @@ -67512,6 +55616,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ondigitalocean.app", "", "", "", @@ -67547,6 +55652,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "priv.pl", "", "", "", @@ -67555,556 +55661,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-very-bad.org", "", "", "", "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tana.no", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "television.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mazowsze.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "teaches-yoga.com", - "losangeles.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "office-on-the.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "kanagawa.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", + "potenza.it", "", "", "", @@ -68150,16 +55714,19 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\340\271\200\340\270\231\340\271\207\340\270\225.\340\271\204\340\270\227\340\270\242", "", "", "", "", "", "", + "is-a-nurse.com", "", "", "", "", + "lavangen.no", "", "", "", @@ -68180,12 +55747,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "riik.ee", "", "", "", "", "", "", + "tj\303\270me.no", + "is-with-theband.com", "", "", "", @@ -68225,6 +55795,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "dopaas.com", "", "", "", @@ -68317,6 +55888,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "zaporizhzhia.ua", "", "", "", @@ -68339,7 +55911,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "mill.museum", "", "", "", @@ -68394,7 +55965,9 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "info.vn", "", + "is-a-photographer.com", "", "", "", @@ -68447,6 +56020,1812 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ibxos.it", + "", + "", + "", + "", + "", + "", + "lima-city.ch", + "", + "", + "is-a-lawyer.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "digick.jp", + "", + "", + "", + "", + "", + "", + "", + "odessa.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ruovat.no", + "", + "", + "", + "", + "lerdal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "omniwe.site", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "loabat.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hasura.app", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tonkotsu.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "isla.pr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "osoyro.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "morena.br", + "", + "", + "iopsys.se", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-socialist.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "loginline.dev", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "activetrail.biz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-personaltrainer.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tysnes.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pb.ao", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "read-books.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "school.na", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "laichau.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-techie.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "randaberg.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tokushima.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-guru.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "manaus.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "presse.ci", + "", + "", + "", + "", + "", + "", + "", + "myfast.space", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rad\303\270y.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "loginline.io", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trentinoaadige.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "torino.it", + "", + "rovno.ua", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trader.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "info.in", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ragusa.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "info.nf", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tjeldsund.no", + "", + "", + "", + "", + "info.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\320\260\320\272.\321\201\321\200\320\261", + "", + "", + "", + "", + "\320\276\320\264.\321\201\321\200\320\261", + "", + "", + "", + "lohmus.me", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rahkkeravju.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "thingdustdata.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pagefrontapp.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pruszkow.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "oops.jp", + "", + "", + "dyndns-mail.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "onflashdrive.app", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\320\276\321\200\320\263.\321\201\321\200\320\261", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "opoczno.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lima-city.rocks", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ralingen.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vercel.app", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "", "", "", @@ -68524,6 +57903,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lubin.pl", "", "", "", @@ -68598,6 +57978,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "macapa.br", + "lindas.no", "", "", "", @@ -68657,6 +58039,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "rovigo.it", "", "", "", @@ -68691,6 +58074,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "info.bo", "", "", "", @@ -68718,11 +58102,13 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "rennebu.no", "", "", "", "", "", + "myfast.host", "", "", "", @@ -68756,7 +58142,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "info.bb", "", "", "", @@ -68784,6 +58169,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pigboat.jp", "", "", "", @@ -68851,6 +58237,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "vibovalentia.it", "", "", "", @@ -68865,10 +58252,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "dgca.aero", "", "", "", + "onfabrica.com", "", "", "", @@ -68945,6 +58332,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "postman-echo.com", "", "", "", @@ -68977,9 +58365,11 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-an-actor.com", "", "", "", + "rana.no", "", "", "", @@ -68991,7 +58381,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "kagoshima.jp", "", "", "", @@ -69030,6 +58419,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "twmail.cc", "", "", "", @@ -69097,6 +58487,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pisz.pl", "", "", "", @@ -69116,6 +58507,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\345\272\203\345\263\266.jp", "", "", "", @@ -69124,8 +58516,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "iris.arpa", "", "", + "lardal.no", "", "", "", @@ -69138,6 +58532,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\346\240\203\346\234\250.jp", "", "", "", @@ -69164,6 +58559,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "official.academy", "", "", "", @@ -69175,6 +58571,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "osasco.br", "", "", "", @@ -69209,6 +58606,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "thanhphohochiminh.vn", "", "", "", @@ -69239,6 +58637,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-into-anime.com", "", "", "", @@ -69303,7 +58702,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-an-entertainer.com", "", "", "", @@ -69324,6 +58722,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\345\215\203\350\221\211.jp", "", "", "", @@ -69492,6 +58891,2441 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pdns.page", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "przeworsk.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "reggioemilia.it", + "", + "", + "", + "", + "", + "", + "", + "phutho.vn", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "saloon.jp", + "", + "", + "", + "", + "is-very-nice.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\345\200\213\344\272\272.hk", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pythonanywhere.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tranoy.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pomorskie.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "reggio-emilia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tr\303\246na.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "poniatowa.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "police.uk", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lyngen.no", + "", + "", + "", + "", + "publ.pt", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "info.fj", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "office-on-the.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "m\304\201ori.nz", + "ris\303\270r.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ladesk.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "malatvuopmi.no", + "", + "", + "lomo.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "vivian.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-very-good.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "info.zm", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pharmacien.fr", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tank.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tozsde.hu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pisa.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "privatizehealthinsurance.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-an-accountant.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tuxfamily.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "olkusz.pl", + "", + "", + "", + "is-a-knight.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "olbiatempio.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "loginline.services", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "travel.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "selfip.biz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-anarchist.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ribeirao.br", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lubartow.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-landscaper.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pomorze.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "theworkpc.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "in-brb.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lovesick.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "royken.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ponpes.id", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rollag.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\344\275\220\350\263\200.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\345\225\206\346\245\255.tw", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "fbx-os.fr", + "", + "", + "iglesias-carbonia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ostrowwlkp.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\347\265\204\347\271\224.tw", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "miyazaki.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "land-4-sale.us", "", "", @@ -69567,6 +61401,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "school.za", "", "", "", @@ -69645,6 +61480,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-a-player.com", "", "", "", @@ -69659,6 +61495,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "riobranco.br", "", "", "", @@ -69749,7 +61586,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "linz.museum", "", "", "", @@ -69770,6 +61606,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "politica.bo", "", "", "", @@ -69848,6 +61685,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "twmail.org", "", "", "", @@ -69876,6 +61714,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lierne.no", "", "", "", @@ -69929,7 +61768,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "my-firewall.org", "", "", "", @@ -70035,7 +61873,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "magazine.aero", "", "", "", @@ -70099,6 +61936,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-a-patsfan.org", "", "", "", @@ -70178,6 +62016,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "trentinoa-adige.it", "", "", "", @@ -70188,6 +62027,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "icurus.jp", "", "", "", @@ -70232,6 +62072,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "orskog.no", "", "", "", @@ -70274,7 +62115,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "lans.museum", "", "", "", @@ -70376,6 +62216,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "rawa-maz.pl", "", "", "", @@ -70396,6 +62237,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "tlon.network", "", "", "", @@ -70413,6 +62255,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-a-bruinsfan.org", "", "", "", @@ -70435,10 +62278,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\347\273\204\347\273\207.hk", "", "", "", "", + "\347\265\204\347\273\207.hk", "", "", "", @@ -70489,6 +62334,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "phuyen.vn", "", "", "", @@ -70561,6 +62407,8 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "readthedocs.io", + "lezajsk.pl", "", "", "", @@ -70576,6 +62424,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "in-addr.arpa", "", "", "", @@ -70608,6 +62457,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "mmafan.biz", "", "", "", @@ -70619,6 +62469,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "zaporizhzhe.ua", "", "", "", @@ -70658,6 +62509,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "r\303\241isa.no", "", "", "", @@ -70697,6 +62549,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "rindal.no", "", "", "", @@ -70742,9 +62595,10 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pharmaciens.km", "", "", - "is-into-anime.com", + "tempio-olbia.it", "", "", "", @@ -70847,7 +62701,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "dali.museum", "", "", "", @@ -70891,6 +62744,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "latina.it", "", "", "", @@ -70994,7 +62848,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "tokushima.jp", "", "", "", @@ -71020,6 +62873,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "ostroleka.pl", "", "", "", @@ -71035,6 +62889,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "miyagi.jp", "", "", "", @@ -71061,7 +62916,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "tuscany.it", "", "", "", @@ -71111,7 +62965,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "pesaro-urbino.it", "", "", "", @@ -71143,6 +62996,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "independent-panel.uk", "", "", "", @@ -71165,10 +63019,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "profesional.bo", "", "", "", "", + "info.bj", "", "", "", @@ -71245,7 +63101,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "pila.pl", "", "", "", @@ -71264,6 +63119,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "oita.jp", "", "", "", @@ -71279,6 +63135,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "twmail.net", "", "", "", @@ -71325,7 +63182,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "limanowa.pl", "", "", "", @@ -71412,7 +63268,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "parachuting.aero", "", "", "", @@ -71437,7 +63292,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "kaszuby.pl", "", "", "", @@ -71575,7 +63429,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "l-o-g-i-n.de", "", "", "", @@ -71637,6 +63490,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pgafan.net", "", "", "", @@ -71653,6 +63507,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lapy.pl", "", "", "", @@ -71674,6 +63529,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "info.gu", "", "", "", @@ -71711,12 +63567,14 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "loginline.site", "", "", "", "", "", "", + "lublin.pl", "", "", "", @@ -71761,6 +63619,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "powiat.pl", "", "", "", @@ -71772,6 +63631,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-an-entertainer.com", "", "", "", @@ -71782,6 +63642,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "001www.com", "", "", "", @@ -71816,6 +63677,1179 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "royal-commission.uk", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "logoip.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "hoplix.shop", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-liberal.com", + "", + "", + "", + "", + "", + "online.th", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ishikawa.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "leczna.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "polkowice.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "info.cx", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lowicz.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "loseyourip.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "info.au", + "", + "", + "", + "", + "priv.hu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "olbia-tempio.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pupu.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "router.management", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "iservschule.de", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-very-evil.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pixolino.com", + "", + "ringebu.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "parasite.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "turystyka.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-financialadvisor.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\346\262\226\347\270\204.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\346\204\233\345\252\233.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\346\204\233\347\237\245.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-an-engineer.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "poznan.pl", "", "", "", @@ -71973,6 +65007,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "r\303\270ros.no", "", "", "", @@ -71985,11 +65020,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "health-carereform.com", + "lebesby.no", "", "", "", @@ -72033,6 +65064,710 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "rulez.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "pp.az", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "on-the-web.tv", + "", + "is-a-soxfan.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-musician.com", + "dyndns.biz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "logoip.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-an-anarchist.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "oppdal.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "trentinoalto-adige.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "matrix.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tuva.su", + "", + "", + "", + "", + "", + "", + "", + "passenger-association.aero", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "rimini.it", + "", + "pueblo.bo", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-bookkeeper.com", + "", + "", + "", + "", + "", + "", + "", + "", "", "", "", @@ -72106,7 +65841,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "oceanographique.museum", "", "", "", @@ -72197,6 +65931,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "tuyenquang.vn", "", "", "", @@ -72264,6 +65999,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "publishproxy.com", "", "", "", @@ -72323,6 +66059,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "info.hu", "", "", "", @@ -72428,6 +66165,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "repl.run", "", "", "", @@ -72469,1564 +66207,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "town.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "hiroshima.jp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "oceanographic.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "pp.az", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-a-republican.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mobi.ng", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", + "is-a-hunter.com", "", "", "", @@ -74071,6 +66252,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pubtls.org", "", "", "", @@ -74085,6 +66267,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "rybnik.pl", "", "", "", @@ -74096,1707 +66279,15 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "rackmaze.com", "", "", + "lahppi.no", "", "", "", "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-an-engineer.com", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "valle-d-aosta.it", - "", - "", - "", - "lapy.pl", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "moma.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mobi.gp", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "medizinhistorisches.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tuxfamily.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-very-sweet.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", + "resindevice.io", "", "", "", @@ -75883,6 +66374,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "rackmaze.net", "", "", "", @@ -75956,6 +66448,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-not-certified.com", "", "", "", @@ -76011,6 +66504,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "lanbib.se", "", "", "", @@ -76089,6 +66583,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "mazowsze.pl", "", "", "", @@ -76301,6 +66796,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "limanowa.pl", "", "", "", @@ -76538,7 +67034,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-an-anarchist.com", "", "", "", @@ -76611,6 +67106,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "reserve-online.com", "", "", "", @@ -76710,6 +67206,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "info.bb", "", "", "", @@ -76766,6 +67263,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "repbody.aero", "", "", "", @@ -76816,6 +67314,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "raindrop.jp", "", "", "", @@ -76833,6 +67332,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-a-libertarian.com", "", "", "", @@ -76886,6 +67386,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "reserve-online.net", "", "", "", @@ -77113,6 +67614,2001 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "rzeszow.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "orland.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "tula.su", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pila.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "piacenza.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lpages.co", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "2000.hu", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\345\245\210\350\211\257.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pesarourbino.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-celticsfan.org", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "lebork.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "public-inquiry.uk", + "", + "", + "raholt.no", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\346\273\213\350\263\200.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "typedream.app", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mimoza.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "mobi.tz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pulawy.pl", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "", "", "", @@ -77419,6 +69915,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pesaro-urbino.it", "", "", "", @@ -77524,6 +70021,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "paragliding.aero", "", "", "", @@ -77668,6 +70166,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "palmas.br", "", "", "", @@ -78050,4664 +70549,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-very-nice.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tree.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "on-the-web.tv", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "time.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "mobi.tz", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-very-bad.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "lebesby.no", - "in-the-band.net", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-very-good.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tank.museum", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "tuva.su", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "2000.hu", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "is-very-evil.org", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", "", "", "", @@ -82768,6 +70609,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "patria.bo", "", "", "", @@ -82874,6 +70716,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "taxi.br", "", "", "", @@ -83008,6 +70851,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "recife.br", "", "", "", @@ -83043,6 +70887,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "reggiocalabria.it", "", "", "", @@ -83091,6 +70936,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "orx.biz", "", "", "", @@ -83147,6 +70993,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "thainguyen.vn", "", "", "", @@ -83233,6 +71080,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "platter-app.dev", "", "", "", @@ -83328,6 +71176,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pcloud.host", "", "", "", @@ -83652,7 +71501,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "tula.su", "", "", "", @@ -83977,6 +71825,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "platter-app.com", "", "", "", @@ -84013,6 +71862,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\320\276\320\261\321\200.\321\201\321\200\320\261", "", "", "", @@ -84223,6 +72073,2859 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "platform0.app", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "toyama.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "la-spezia.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "in-the-band.net", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "webhop.biz", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pagexl.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "parallel.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "is-a-bulls-fan.com", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pinoko.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "padova.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "pecori.jp", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "reggio-calabria.it", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "is-a-linux-user.org", "", "", @@ -84945,6 +75648,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-a-republican.com", "", "", "", @@ -85157,6 +75861,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "tuleap-partners.com", "", "", "", @@ -85707,6 +76412,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "iliadboxos.it", "", "", "", @@ -86450,6 +77156,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "info.tz", "", "", "", @@ -86774,7 +77481,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-a-bulls-fan.com", "", "", "", @@ -87199,7 +77905,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "miyazaki.jp", "", "", "", @@ -87486,6 +78191,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "o0o0.jp", "", "", "", @@ -87580,6 +78286,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "is-a-cubicle-slave.com", "", "", "", @@ -87668,6 +78375,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "localzone.xyz", "", "", "", @@ -87887,7 +78595,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "taxi.br", "", "", "", @@ -88408,6 +79115,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "pepper.jp", "", "", "", @@ -88723,7 +79431,6 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "turystyka.pl", "", "", "", @@ -88844,6 +79551,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "info.az", "", "", "", @@ -88950,6 +79658,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "parachuting.aero", "", "", "", @@ -89059,6 +79768,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "on-web.fr", "", "", "", @@ -89789,6 +80499,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "peewee.jp", "", "", "", @@ -89935,6 +80646,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\345\244\247\345\210\206.jp", "", "", "", @@ -89944,6 +80656,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\347\206\212\346\234\254.jp", "", "", "", @@ -90856,6 +81569,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\321\201\320\260\320\274\320\260\321\200\320\260.\321\200\321\203\321\201", "", "", "", @@ -91931,10 +82645,12 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\320\261\320\270\320\267.\321\200\321\203\321\201", "", "", "", "", + "\320\272\320\276\320\274.\321\200\321\203\321\201", "", "", "", @@ -92164,6 +82880,7 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", + "\320\274\321\201\320\272.\321\200\321\203\321\201", "", "", "", @@ -95798,7 +86515,24211 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) "", "", "", - "is-a-cubicle-slave.com"}; + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\321\201\320\276\321\207\320\270.\321\200\321\203\321\201", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\320\272\321\200\321\213\320\274.\321\200\321\203\321\201", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\320\276\321\200\320\263.\321\200\321\203\321\201", + "", + "", + "", + "", + "\321\201\320\277\320\261.\321\200\321\203\321\201", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\321\203\320\277\321\200.\321\201\321\200\320\261", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "\320\274\320\270\321\200.\321\200\321\203\321\201"}; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) { @@ -95814,4 +110735,4 @@ const char * TopLevelDomainLookupHash::isValid(const char * str, size_t len) } return nullptr; } -#line 4957 "tldLookup.gperf" +#line 5060 "tldLookup.gperf" diff --git a/src/Functions/URL/tldLookup.gperf b/src/Functions/URL/tldLookup.gperf index 9169770dd23..796835052e2 100644 --- a/src/Functions/URL/tldLookup.gperf +++ b/src/Functions/URL/tldLookup.gperf @@ -1,6 +1,6 @@ %language=C++ -%define lookup-function-name is_valid -%define class-name tldLookupHash +%define lookup-function-name isValid +%define class-name TopLevelDomainLookupHash %readonly-tables %includes %compare-strncmp @@ -70,7 +70,6 @@ exchange.aero express.aero federation.aero flight.aero -freight.aero fuel.aero gliding.aero government.aero @@ -144,15 +143,19 @@ og.ao co.ao pb.ao it.ao +bet.ar com.ar +coop.ar edu.ar gob.ar gov.ar int.ar mil.ar musica.ar +mutual.ar net.ar org.ar +senasa.ar tur.ar e164.arpa in-addr.arpa @@ -261,9 +264,26 @@ com.bi edu.bi or.bi org.bi -asso.bj -barreau.bj -gouv.bj +africa.bj +agro.bj +architectes.bj +assur.bj +avocats.bj +co.bj +com.bj +eco.bj +econo.bj +edu.bj +info.bj +loisirs.bj +money.bj +net.bj +org.bj +ote.bj +resto.bj +restaurant.bj +tourism.bj +univ.bj com.bm edu.bm gov.bm @@ -324,6 +344,7 @@ aju.br am.br anani.br aparecida.br +app.br arq.br art.br ato.br @@ -331,6 +352,7 @@ b.br barueri.br belem.br bhz.br +bib.br bio.br blog.br bmd.br @@ -345,14 +367,19 @@ cnt.br com.br contagem.br coop.br +coz.br cri.br cuiaba.br curitiba.br def.br +des.br +det.br +dev.br ecn.br eco.br edu.br emp.br +enf.br eng.br esp.br etc.br @@ -368,6 +395,7 @@ fot.br foz.br fst.br g12.br +geo.br ggf.br goiania.br gov.br @@ -383,6 +411,7 @@ jor.br jus.br leg.br lel.br +log.br londrina.br macapa.br maceio.br @@ -414,6 +443,7 @@ qsl.br radio.br rec.br recife.br +rep.br ribeirao.br rio.br riobranco.br @@ -424,6 +454,7 @@ santamaria.br santoandre.br saobernardo.br saogonca.br +seg.br sjc.br slg.br slz.br @@ -431,6 +462,7 @@ sorocaba.br srv.br taxi.br tc.br +tec.br teo.br the.br tmp.br @@ -497,9 +529,9 @@ md.ci gouv.ci *.ck !www.ck -gov.cl -gob.cl co.cl +gob.cl +gov.cl mil.cl co.cm com.cm @@ -575,6 +607,11 @@ org.cu net.cu gov.cu inf.cu +com.cv +edu.cv +int.cv +nome.cv +org.cv com.cw edu.cw net.cw @@ -586,10 +623,9 @@ com.cy ekloges.cy gov.cy ltd.cy -name.cy +mil.cy net.cy org.cy -parliament.cy press.cy pro.cy tm.cy @@ -608,14 +644,16 @@ net.do org.do sld.do web.do +art.dz +asso.dz com.dz +edu.dz +gov.dz org.dz net.dz -gov.dz -edu.dz -asso.dz pol.dz -art.dz +soc.dz +tm.dz com.ec info.ec net.ec @@ -662,8 +700,21 @@ name.et info.et net.et aland.fi -*.fj +ac.fj +biz.fj +com.fj +gov.fj +info.fj +mil.fj +name.fj +net.fj +org.fj +pro.fj *.fk +com.fm +edu.fm +net.fm +org.fm asso.fr com.fr gouv.fr @@ -685,6 +736,8 @@ notaires.fr pharmacien.fr port.fr veterinaire.fr +edu.gd +gov.gd com.ge edu.ge gov.ge @@ -849,6 +902,10 @@ k12.il muni.il net.il org.il +אקדמיה.ישראל +ישוב.ישראל +צהל.ישראל +ממשל.ישראל ac.im co.im com.im @@ -856,18 +913,47 @@ net.im org.im tt.im tv.im -co.in -firm.in -net.in -org.in -gen.in -ind.in -nic.in +5g.in +6g.in ac.in +ai.in +am.in +bihar.in +biz.in +business.in +ca.in +cn.in +co.in +com.in +coop.in +cs.in +delhi.in +dr.in edu.in -res.in +er.in +firm.in +gen.in gov.in +gujarat.in +ind.in +info.in +int.in +internet.in +io.in +me.in mil.in +net.in +nic.in +org.in +pg.in +post.in +pro.in +res.in +travel.in +tv.in +uk.in +up.in +us.in eu.int com.io gov.iq @@ -1498,11 +1584,10 @@ gov.kw ind.kw net.kw org.kw -edu.ky -gov.ky com.ky -org.ky +edu.ky net.ky +org.ky org.kz edu.kz net.kz @@ -1641,554 +1726,6 @@ gov.mu ac.mu co.mu or.mu -academy.museum -agriculture.museum -air.museum -airguard.museum -alabama.museum -alaska.museum -amber.museum -ambulance.museum -american.museum -americana.museum -americanantiques.museum -americanart.museum -amsterdam.museum -and.museum -annefrank.museum -anthro.museum -anthropology.museum -antiques.museum -aquarium.museum -arboretum.museum -archaeological.museum -archaeology.museum -architecture.museum -art.museum -artanddesign.museum -artcenter.museum -artdeco.museum -arteducation.museum -artgallery.museum -arts.museum -artsandcrafts.museum -asmatart.museum -assassination.museum -assisi.museum -association.museum -astronomy.museum -atlanta.museum -austin.museum -australia.museum -automotive.museum -aviation.museum -axis.museum -badajoz.museum -baghdad.museum -bahn.museum -bale.museum -baltimore.museum -barcelona.museum -baseball.museum -basel.museum -baths.museum -bauern.museum -beauxarts.museum -beeldengeluid.museum -bellevue.museum -bergbau.museum -berkeley.museum -berlin.museum -bern.museum -bible.museum -bilbao.museum -bill.museum -birdart.museum -birthplace.museum -bonn.museum -boston.museum -botanical.museum -botanicalgarden.museum -botanicgarden.museum -botany.museum -brandywinevalley.museum -brasil.museum -bristol.museum -british.museum -britishcolumbia.museum -broadcast.museum -brunel.museum -brussel.museum -brussels.museum -bruxelles.museum -building.museum -burghof.museum -bus.museum -bushey.museum -cadaques.museum -california.museum -cambridge.museum -can.museum -canada.museum -capebreton.museum -carrier.museum -cartoonart.museum -casadelamoneda.museum -castle.museum -castres.museum -celtic.museum -center.museum -chattanooga.museum -cheltenham.museum -chesapeakebay.museum -chicago.museum -children.museum -childrens.museum -childrensgarden.museum -chiropractic.museum -chocolate.museum -christiansburg.museum -cincinnati.museum -cinema.museum -circus.museum -civilisation.museum -civilization.museum -civilwar.museum -clinton.museum -clock.museum -coal.museum -coastaldefence.museum -cody.museum -coldwar.museum -collection.museum -colonialwilliamsburg.museum -coloradoplateau.museum -columbia.museum -columbus.museum -communication.museum -communications.museum -community.museum -computer.museum -computerhistory.museum -comunicações.museum -contemporary.museum -contemporaryart.museum -convent.museum -copenhagen.museum -corporation.museum -correios-e-telecomunicações.museum -corvette.museum -costume.museum -countryestate.museum -county.museum -crafts.museum -cranbrook.museum -creation.museum -cultural.museum -culturalcenter.museum -culture.museum -cyber.museum -cymru.museum -dali.museum -dallas.museum -database.museum -ddr.museum -decorativearts.museum -delaware.museum -delmenhorst.museum -denmark.museum -depot.museum -design.museum -detroit.museum -dinosaur.museum -discovery.museum -dolls.museum -donostia.museum -durham.museum -eastafrica.museum -eastcoast.museum -education.museum -educational.museum -egyptian.museum -eisenbahn.museum -elburg.museum -elvendrell.museum -embroidery.museum -encyclopedic.museum -england.museum -entomology.museum -environment.museum -environmentalconservation.museum -epilepsy.museum -essex.museum -estate.museum -ethnology.museum -exeter.museum -exhibition.museum -family.museum -farm.museum -farmequipment.museum -farmers.museum -farmstead.museum -field.museum -figueres.museum -filatelia.museum -film.museum -fineart.museum -finearts.museum -finland.museum -flanders.museum -florida.museum -force.museum -fortmissoula.museum -fortworth.museum -foundation.museum -francaise.museum -frankfurt.museum -franziskaner.museum -freemasonry.museum -freiburg.museum -fribourg.museum -frog.museum -fundacio.museum -furniture.museum -gallery.museum -garden.museum -gateway.museum -geelvinck.museum -gemological.museum -geology.museum -georgia.museum -giessen.museum -glas.museum -glass.museum -gorge.museum -grandrapids.museum -graz.museum -guernsey.museum -halloffame.museum -hamburg.museum -handson.museum -harvestcelebration.museum -hawaii.museum -health.museum -heimatunduhren.museum -hellas.museum -helsinki.museum -hembygdsforbund.museum -heritage.museum -histoire.museum -historical.museum -historicalsociety.museum -historichouses.museum -historisch.museum -historisches.museum -history.museum -historyofscience.museum -horology.museum -house.museum -humanities.museum -illustration.museum -imageandsound.museum -indian.museum -indiana.museum -indianapolis.museum -indianmarket.museum -intelligence.museum -interactive.museum -iraq.museum -iron.museum -isleofman.museum -jamison.museum -jefferson.museum -jerusalem.museum -jewelry.museum -jewish.museum -jewishart.museum -jfk.museum -journalism.museum -judaica.museum -judygarland.museum -juedisches.museum -juif.museum -karate.museum -karikatur.museum -kids.museum -koebenhavn.museum -koeln.museum -kunst.museum -kunstsammlung.museum -kunstunddesign.museum -labor.museum -labour.museum -lajolla.museum -lancashire.museum -landes.museum -lans.museum -läns.museum -larsson.museum -lewismiller.museum -lincoln.museum -linz.museum -living.museum -livinghistory.museum -localhistory.museum -london.museum -losangeles.museum -louvre.museum -loyalist.museum -lucerne.museum -luxembourg.museum -luzern.museum -mad.museum -madrid.museum -mallorca.museum -manchester.museum -mansion.museum -mansions.museum -manx.museum -marburg.museum -maritime.museum -maritimo.museum -maryland.museum -marylhurst.museum -media.museum -medical.museum -medizinhistorisches.museum -meeres.museum -memorial.museum -mesaverde.museum -michigan.museum -midatlantic.museum -military.museum -mill.museum -miners.museum -mining.museum -minnesota.museum -missile.museum -missoula.museum -modern.museum -moma.museum -money.museum -monmouth.museum -monticello.museum -montreal.museum -moscow.museum -motorcycle.museum -muenchen.museum -muenster.museum -mulhouse.museum -muncie.museum -museet.museum -museumcenter.museum -museumvereniging.museum -music.museum -national.museum -nationalfirearms.museum -nationalheritage.museum -nativeamerican.museum -naturalhistory.museum -naturalhistorymuseum.museum -naturalsciences.museum -nature.museum -naturhistorisches.museum -natuurwetenschappen.museum -naumburg.museum -naval.museum -nebraska.museum -neues.museum -newhampshire.museum -newjersey.museum -newmexico.museum -newport.museum -newspaper.museum -newyork.museum -niepce.museum -norfolk.museum -north.museum -nrw.museum -nuernberg.museum -nuremberg.museum -nyc.museum -nyny.museum -oceanographic.museum -oceanographique.museum -omaha.museum -online.museum -ontario.museum -openair.museum -oregon.museum -oregontrail.museum -otago.museum -oxford.museum -pacific.museum -paderborn.museum -palace.museum -paleo.museum -palmsprings.museum -panama.museum -paris.museum -pasadena.museum -pharmacy.museum -philadelphia.museum -philadelphiaarea.museum -philately.museum -phoenix.museum -photography.museum -pilots.museum -pittsburgh.museum -planetarium.museum -plantation.museum -plants.museum -plaza.museum -portal.museum -portland.museum -portlligat.museum -posts-and-telecommunications.museum -preservation.museum -presidio.museum -press.museum -project.museum -public.museum -pubol.museum -quebec.museum -railroad.museum -railway.museum -research.museum -resistance.museum -riodejaneiro.museum -rochester.museum -rockart.museum -roma.museum -russia.museum -saintlouis.museum -salem.museum -salvadordali.museum -salzburg.museum -sandiego.museum -sanfrancisco.museum -santabarbara.museum -santacruz.museum -santafe.museum -saskatchewan.museum -satx.museum -savannahga.museum -schlesisches.museum -schoenbrunn.museum -schokoladen.museum -school.museum -schweiz.museum -science.museum -scienceandhistory.museum -scienceandindustry.museum -sciencecenter.museum -sciencecenters.museum -science-fiction.museum -sciencehistory.museum -sciences.museum -sciencesnaturelles.museum -scotland.museum -seaport.museum -settlement.museum -settlers.museum -shell.museum -sherbrooke.museum -sibenik.museum -silk.museum -ski.museum -skole.museum -society.museum -sologne.museum -soundandvision.museum -southcarolina.museum -southwest.museum -space.museum -spy.museum -square.museum -stadt.museum -stalbans.museum -starnberg.museum -state.museum -stateofdelaware.museum -station.museum -steam.museum -steiermark.museum -stjohn.museum -stockholm.museum -stpetersburg.museum -stuttgart.museum -suisse.museum -surgeonshall.museum -surrey.museum -svizzera.museum -sweden.museum -sydney.museum -tank.museum -tcm.museum -technology.museum -telekommunikation.museum -television.museum -texas.museum -textile.museum -theater.museum -time.museum -timekeeping.museum -topology.museum -torino.museum -touch.museum -town.museum -transport.museum -tree.museum -trolley.museum -trust.museum -trustee.museum -uhren.museum -ulm.museum -undersea.museum -university.museum -usa.museum -usantiques.museum -usarts.museum -uscountryestate.museum -usculture.museum -usdecorativearts.museum -usgarden.museum -ushistory.museum -ushuaia.museum -uslivinghistory.museum -utah.museum -uvic.museum -valley.museum -vantaa.museum -versailles.museum -viking.museum -village.museum -virginia.museum -virtual.museum -virtuel.museum -vlaanderen.museum -volkenkunde.museum -wales.museum -wallonie.museum -war.museum -washingtondc.museum -watchandclock.museum -watch-and-clock.museum -western.museum -westfalen.museum -whaling.museum -wildlife.museum -williamsburg.museum -windmill.museum -workshop.museum -york.museum -yorkshire.museum -yosemite.museum -youth.museum -zoological.museum -zoology.museum -ירושלים.museum -иком.museum aero.mv biz.mv com.mv @@ -2219,13 +1756,14 @@ org.mx gob.mx edu.mx net.mx +biz.my com.my -net.my -org.my -gov.my edu.my +gov.my mil.my name.my +net.my +org.my ac.mz adv.mz co.mz @@ -3315,12 +2853,6 @@ edu.rs gov.rs in.rs org.rs -ac.ru -edu.ru -gov.ru -int.ru -mil.ru -test.ru ac.rw co.rw coop.rw @@ -3417,14 +2949,24 @@ org.sn perso.sn univ.sn com.so +edu.so +gov.so +me.so net.so org.so +biz.ss +com.ss +edu.ss +gov.ss +me.ss +net.ss +org.ss +sch.ss co.st com.st consulado.st edu.st embaixada.st -gov.st mil.st net.st org.st @@ -3482,21 +3024,14 @@ ens.tn fin.tn gov.tn ind.tn +info.tn intl.tn +mincom.tn nat.tn net.tn org.tn -info.tn perso.tn tourism.tn -edunet.tn -rnrt.tn -rns.tn -rnu.tn -mincom.tn -agrinet.tn -defense.tn -turen.tn com.to gov.to net.to @@ -3587,7 +3122,6 @@ cv.ua dn.ua dnepropetrovsk.ua dnipropetrovsk.ua -dominic.ua donetsk.ua dp.ua if.ua @@ -3602,6 +3136,7 @@ kiev.ua kirovograd.ua km.ua kr.ua +kropyvnytskyi.ua krym.ua ks.ua kv.ua @@ -3738,6 +3273,7 @@ gov.vc mil.vc edu.vc arts.ve +bib.ve co.ve com.ve e12.ve @@ -3749,7 +3285,9 @@ info.ve int.ve mil.ve net.ve +nom.ve org.ve +rar.ve rec.ve store.ve tec.ve @@ -3759,18 +3297,84 @@ com.vi k12.vi net.vi org.vi +ac.vn +ai.vn +biz.vn com.vn -net.vn -org.vn edu.vn gov.vn -int.vn -ac.vn -biz.vn -info.vn -name.vn -pro.vn health.vn +id.vn +info.vn +int.vn +io.vn +name.vn +net.vn +org.vn +pro.vn +angiang.vn +bacgiang.vn +backan.vn +baclieu.vn +bacninh.vn +baria-vungtau.vn +bentre.vn +binhdinh.vn +binhduong.vn +binhphuoc.vn +binhthuan.vn +camau.vn +cantho.vn +caobang.vn +daklak.vn +daknong.vn +danang.vn +dienbien.vn +dongnai.vn +dongthap.vn +gialai.vn +hagiang.vn +haiduong.vn +haiphong.vn +hanam.vn +hanoi.vn +hatinh.vn +haugiang.vn +hoabinh.vn +hungyen.vn +khanhhoa.vn +kiengiang.vn +kontum.vn +laichau.vn +lamdong.vn +langson.vn +laocai.vn +longan.vn +namdinh.vn +nghean.vn +ninhbinh.vn +ninhthuan.vn +phutho.vn +phuyen.vn +quangbinh.vn +quangnam.vn +quangngai.vn +quangninh.vn +quangtri.vn +soctrang.vn +sonla.vn +tayninh.vn +thaibinh.vn +thainguyen.vn +thanhhoa.vn +thanhphohochiminh.vn +thuathienhue.vn +tiengiang.vn +travinh.vn +tuyenquang.vn +vinhlong.vn +vinhphuc.vn +yenbai.vn com.vu edu.vu net.vu @@ -3798,7 +3402,12 @@ edu.ws ทหาร.ไทย เน็ต.ไทย องค์กร.ไทย -*.ye +com.ye +edu.ye +gov.ye +net.ye +mil.ye +org.ye ac.za agric.za alt.za @@ -3810,6 +3419,7 @@ law.za mil.za net.za ngo.za +nic.za nis.za nom.za org.za @@ -3835,30 +3445,87 @@ org.zw cc.ua inf.ua ltd.ua +611.to +graphox.us +activetrail.biz +adobeaemcloud.com +hlx.live +adobeaemcloud.net +hlx.page +hlx3.page +adobeio-static.net +adobeioruntime.net beep.pl +airkitapps.com +airkitapps-au.com +airkitapps.eu +aivencloud.com +akadns.net +akamai.net +akamai-staging.net +akamaiedge.net +akamaiedge-staging.net +akamaihd.net +akamaihd-staging.net +akamaiorigin.net +akamaiorigin-staging.net +akamaized.net +akamaized-staging.net +edgekey.net +edgekey-staging.net +edgesuite.net +edgesuite-staging.net barsy.ca +kasserver.com +altervista.org alwaysdata.net +myamaze.net cloudfront.net elasticbeanstalk.com +awsglobalaccelerator.com +eero.online +eero-stage.online t3l3p0rt.net apigee.io +siiites.com +appspacehosted.com +appspaceusercontent.com +appudo.net on-aptible.com +gv.vc pimienta.org poivron.org potager.org sweetpepper.org myasustor.com -go-vip.co -go-vip.net -wpcomstaging.com +translated.page +autocode.dev myfritz.net +onavstack.net +ecommerce-shop.pl b-data.io backplaneapp.io balena-devices.com +rs.ba +base.ec +official.ec +buyshop.jp +fashionstore.jp +handcrafted.jp +kawaiishop.jp +supersale.jp +theshop.jp +shopselect.net +base.shop +beagleboard.io betainabox.com bnr.la +bitbucket.io blackbaudcdn.net +of.je +bluebite.io boomla.net +boutir.com boxfuse.io square7.ch bplaced.com @@ -3867,48 +3534,60 @@ square7.de bplaced.net square7.net browsersafetymark.io +cafjs.com mycd.eu +canva-apps.cn +canva-apps.com +drr.ac +uwu.ai carrd.co crd.co -uwu.ai +ju.mp ae.org -ar.com br.com cn.com com.de com.se de.com eu.com -gb.com gb.net -hu.com hu.net jp.net jpn.com -kr.com mex.com -no.com -qc.com ru.com sa.com se.net uk.com uk.net us.com -uy.com za.bz za.com +ar.com +hu.com +kr.com +no.com +qc.com +uy.com africa.com gr.com in.net +web.in us.org co.com +aus.basketball +nz.basketball +radio.am +radio.fm c.la certmgr.org -xenapponazure.com +cx.ua discourse.group -virtueeldomein.nl +discourse.team cleverapps.io +clerk.app +clerkstage.app +clickrising.net c66.me cloud66.ws cloud66.zone @@ -3919,7 +3598,11 @@ freesite.host cloudaccess.net cloudcontrolled.com cloudcontrolapp.com -cloudera.site +cf-ipfs.com +cloudflare-ipfs.com +trycloudflare.com +pages.dev +r2.dev workers.dev wnext.app co.ca @@ -3936,12 +3619,18 @@ cloudns.org cloudns.pro cloudns.pw cloudns.us -cloudeity.net cnpy.gdn +codeberg.page co.nl co.no webhosting.be hosting-cluster.nl +ac.ru +edu.ru +gov.ru +int.ru +mil.ru +test.ru dynamisches-dns.de dnsupdater.de internet-dns.de @@ -3952,8 +3641,12 @@ knx-server.net static-access.net realm.cz cupcake.is +curv.dev cyon.link cyon.site +fnwk.site +folionetwork.site +platform0.app daplie.me dattolocal.com dattorelay.com @@ -3966,17 +3659,30 @@ co.dk firm.dk reg.dk store.dk +builtwithdark.com +edgestack.me +ddns5.com debian.net +deno.dev +deno-staging.dev dedyn.io +deta.app +deta.dev +discordsays.com +discordsez.com +jozi.biz dnshome.de online.th shop.th drayddns.com +shoparena.pl dreamhosters.com mydrobo.com drud.io drud.us duckdns.org +bip.sh +bitbridge.net dy.fi tunk.org dyndns-at-home.com @@ -4262,6 +3968,7 @@ myhome-server.de ddnss.org definima.net definima.io +ondigitalocean.app ddnsfree.com ddnsgeek.com giize.com @@ -4281,14 +3988,23 @@ myddns.rocks blogsite.xyz dynv6.net e4.cz +easypanel.app +easypanel.host +elementor.cloud +elementor.cool +en-root.fr mytuleap.com +tuleap-partners.com +encr.app +encoreapi.com onred.one -enonic.io eu.org +eurodir.ru twmail.cc twmail.net twmail.org url.tw +onfabrica.com ru.net adygeya.ru bashkiria.ru @@ -4362,14 +4078,22 @@ vladikavkaz.su vladimir.su vologda.su channelsdvr.net +edgecompute.app +fastly-edge.com fastly-terrarium.com fastlylb.net -fastpanel.direct fastvps-server.com -fhapp.xyz +fastvps.host +myfast.host +fastvps.site +myfast.space fedorainfracloud.org fedorapeople.org +conn.uk +copro.uk +hosp.uk mydobiss.com +fh-muenster.io filegear.me filegear-au.me filegear-de.me @@ -4378,8 +4102,22 @@ filegear-ie.me filegear-jp.me filegear-sg.me firebaseapp.com -flynnhub.com +fireweb.app +flap.id +onflashdrive.app +fldrv.com +fly.dev +edgeapp.net +shw.io flynnhosting.net +forgeblocks.com +framer.app +framercanvas.com +framer.media +framer.photos +framer.website +framer.wiki +0e.vc freebox-os.com freeboxos.com fbx-os.fr @@ -4387,21 +4125,157 @@ fbxos.fr freebox-os.fr freeboxos.fr freedesktop.org +freemyip.com futurehosting.at futuremailing.at +independent-commission.uk +independent-inquest.uk +independent-inquiry.uk +independent-panel.uk +independent-review.uk +public-inquiry.uk +royal-commission.uk usercontent.jp +gentapps.com +gentlentapis.com lab.ms -github.io +cdn-edges.net +ghost.io +gsj.bz githubusercontent.com +githubpreview.dev +github.io gitlab.io +gitapp.si +gitpage.si glitch.me -cloudapps.digital -ro.im +nog.community +co.ro shop.ro +lolipop.io +angry.jp +babyblue.jp +babymilk.jp +backdrop.jp +bambina.jp +bitter.jp +blush.jp +boo.jp +boy.jp +boyfriend.jp +but.jp +candypop.jp +capoo.jp +catfood.jp +cheap.jp +chicappa.jp +chillout.jp +chips.jp +chowder.jp +chu.jp +ciao.jp +cocotte.jp +coolblog.jp +cranky.jp +cutegirl.jp +daa.jp +deca.jp +deci.jp +digick.jp +egoism.jp +fakefur.jp +fem.jp +flier.jp +floppy.jp +fool.jp +frenchkiss.jp +girlfriend.jp +girly.jp +gloomy.jp +gonna.jp +greater.jp +hacca.jp +heavy.jp +her.jp +hiho.jp +hippy.jp +holy.jp +hungry.jp +icurus.jp +itigo.jp +jellybean.jp +kikirara.jp +kill.jp +kilo.jp +kuron.jp +littlestar.jp +lolipopmc.jp +lolitapunk.jp +lomo.jp +lovepop.jp +lovesick.jp +main.jp +mods.jp +mond.jp +mongolian.jp +moo.jp +namaste.jp +nikita.jp +nobushi.jp +noor.jp +oops.jp +parallel.jp +parasite.jp +pecori.jp +peewee.jp +penne.jp +pepper.jp +perma.jp +pigboat.jp +pinoko.jp +punyu.jp +pupu.jp +pussycat.jp +pya.jp +raindrop.jp +readymade.jp +sadist.jp +schoolbus.jp +secret.jp +staba.jp +stripper.jp +sub.jp +sunnyday.jp +thick.jp +tonkotsu.jp +under.jp +upper.jp +velvet.jp +verse.jp +versus.jp +vivian.jp +watson.jp +weblike.jp +whitesnow.jp +zombie.jp +heteml.net +cloudapps.digital +pymnt.uk +ro.im goip.de run.app web.app appspot.com +codespot.com +googleapis.com +googlecode.com +pagespeedmobilizer.com +publishproxy.com +withgoogle.com +withyoutube.com +cloud.goog +translate.goog +cloudfunctions.net blogspot.ae blogspot.al blogspot.am @@ -4456,43 +4330,55 @@ blogspot.td blogspot.tw blogspot.ug blogspot.vn -cloudfunctions.net -cloud.goog -codespot.com -googleapis.com -googlecode.com -pagespeedmobilizer.com -publishproxy.com -withgoogle.com -withyoutube.com +goupile.fr +gov.nl +awsmppl.com +günstigbestellen.de +günstigliefern.de fin.ci free.hr caa.li ua.rs conf.se +hs.zone +hs.run hashbang.sh hasura.app hasura-app.io hepforge.org herokuapp.com herokussl.com -myravendb.com +ravendb.cloud ravendb.community ravendb.me development.run ravendb.run -bpl.biz +homesklep.pl +secaas.hk +hoplix.shop orx.biz -ng.city -ng.ink biz.gl col.ng +firm.ng gen.ng ltd.ng +ngo.ng +edu.scot sch.so +ie.ua +hostyhosting.io häkkinen.fi moonscale.net iki.fi +ibxos.it +iliadboxos.it +impertrixcdn.com +impertrix.com +smushcdn.com +wphostedmail.com +wpmucdn.com +tempurl.host +wpmudev.host dyn-berlin.de in-berlin.de in-brb.de @@ -4507,26 +4393,65 @@ biz.at info.at info.cx pixolino.com +na4u.ru +iopsys.se ipifony.net +iservschule.de mein-iserv.de +schulplattform.de +schulserver.de test-iserv.de +iserv.dev iobb.net +mycloud.by +diadem.cloud +jele.cloud +keliweb.cloud +oxa.cloud +primetel.cloud +jele.club +amscompute.com +dopaas.com +kilatiron.com +jele.host +mircloud.host +jele.io +jcloud.kz +cloudjiffy.net +faststacks.net +sdscloud.pl +unicloud.pl +mircloud.ru +enscaled.sg +jele.site +jelastic.team +orangecloud.tn +mircloud.us myjino.ru +jotelulu.cloud js.org kaas.gg khplay.nl +ktistory.com +kapsi.fi keymachine.de kinghost.net uni5.net knightpoint.systems +koobin.events +oya.to +kuleuven.cloud co.krd edu.krd +krellian.net +webthings.io git-repos.de lcube-server.de svn-repos.de leadpages.co lpages.co lpusercontent.com +lelux.site co.business co.education co.events @@ -4534,22 +4459,23 @@ co.financial co.network co.place co.technology -linkitools.space linkyard.cloud linkyard-cloud.ch we.bs +localzone.xyz loginline.app loginline.dev loginline.io loginline.services loginline.site +servers.run +lohmus.me krasnik.pl leczna.pl lubartow.pl lublin.pl poniatowa.pl swidnik.pl -uklugs.org barsy.bg barsycenter.com barsyonline.com @@ -4567,44 +4493,51 @@ barsy.online barsy.org barsy.pro barsy.pub +barsy.ro barsy.shop barsy.site barsy.support barsy.uk mayfirst.info mayfirst.org +cn.vu +mazeplay.com +mcpe.me +mcdir.me +mcdir.ru +mcpre.ru +mediatech.by +mediatech.dev +hra.health miniserver.com memset.net +messerli.app meteorapp.com co.pl -azurecontainer.io azurewebsites.net azure-mobile.net cloudapp.net +azurestaticapps.net +csx.cc +mintere.site +forte.id mozilla-iot.org bmoattachments.org net.ru org.ru pp.ru -pony.club -of.fashion -on.fashion -of.football -in.london -of.london -for.men -and.mom -for.mom -for.one -for.sale -of.work -to.work -nctu.me -bitballoon.com -netlify.com +hostedpi.com +netlify.app 4u.com +ngrok.app +ngrok-free.app +ngrok.dev +ngrok-free.dev ngrok.io +ngrok.pizza nfshost.com +noop.app +noticeable.news dnsking.ch mypi.co n4t.co @@ -4716,62 +4649,40 @@ servequake.com sytes.net webhop.me zapto.org -nodum.co -nodum.io pcloud.host nyc.mn -nom.ae -nom.af -nom.ai -nom.al -nym.by -nym.bz -nom.cl -nom.gd -nom.ge -nom.gl -nym.gr -nom.gt -nym.gy -nom.hn -nym.ie -nom.im -nom.ke -nym.kz -nym.la -nym.lc -nom.li -nym.li -nym.lt -nym.lu -nym.me -nom.mk -nym.mn -nym.mx -nom.nu -nym.nz -nym.pe -nym.pt -nom.pw -nom.qa -nym.ro -nom.rs -nom.si -nym.sk -nom.st -nym.su -nym.sx -nom.tj -nym.tw -nom.ug -nom.uy -nom.vc -nom.vg cya.gg +omg.lol cloudycluster.net +omniwe.site +123hjemmeside.dk +123hjemmeside.no +123homepage.it +123kotisivu.fi +123minsida.se +123miweb.es +123paginaweb.pt +123sait.ru +123siteweb.fr +123webseite.at +123webseite.de +123website.be +123website.ch +123website.lu +123website.nl +service.one +simplesite.com +simplesite.gr +simplesite.pl nid.io +opensocial.site opencraft.hosting +orsites.com operaunite.com +tech.orange +authgear-staging.com +authgearapps.com +skygearapp.com outsystemscloud.com ownprovider.com own.pm @@ -4779,6 +4690,11 @@ ox.rs oy.lc pgfog.com pagefrontapp.com +pagexl.com +bar0.net +bar1.net +bar2.net +rdv.to art.pl gliwice.pl krakow.pl @@ -4788,17 +4704,38 @@ zakopane.pl pantheonsite.io gotpantheon.com mypep.link +perspecta.cloud +lk3.ru on-web.fr +platter-app.com +platter-app.dev +platterp.us +pdns.page +plesk.page +pleskns.com dyn53.io +onporter.run co.bn +postman-echo.com +pstmn.io +httpbin.org +prequalifyme.today priv.at prvcy.page protonet.io chirurgiens-dentistes-en-france.fr byen.site +pubtls.org +pythonanywhere.com +qoto.io +qualifioapp.com +ladesk.com +qbuser.com +cloudsite.builders instantcloud.cn ras.ru qa2.com +qcx.io dev-myqnapcloud.com alpha-myqnapcloud.com myqnapcloud.com @@ -4809,16 +4746,65 @@ rackmaze.net readthedocs.io rhcloud.com onrender.com +firewalledreplit.co repl.co repl.run resindevice.io hzc.io wellbeingzone.eu -ptplus.fit +itcouldbewor.se +rocky.page +биз.рус +ком.рус +крым.рус +мир.рус +мск.рус +орг.рус +самара.рус +сочи.рус +спб.рус +я.рус +180r.com +dojin.com +sakuratan.com +sakuraweb.com +x0.com +2-d.jp +bona.jp +crap.jp +daynight.jp +eek.jp +flop.jp +halfmoon.jp +jeez.jp +matrix.jp +mimoza.jp +netgamers.jp +nyanta.jp +o0o0.jp +rdy.jp +rgr.jp +rulez.jp +saloon.jp +sblo.jp +skr.jp +tank.jp +uh-oh.jp +undo.jp +websozai.jp +xii.jp +squares.net +jpn.org +kirara.st +x0.to +from.tv +sakura.tv sandcats.io logoip.de logoip.com +dedibox.fr schokokeks.net +gov.scot scrysec.com firewall-gateway.com firewall-gateway.de @@ -4830,11 +4816,21 @@ firewall-gateway.net my-firewall.org myfirewall.org spdns.org +seidat.net +sellfy.store +senseering.net +minisite.ms +magnet.page biz.ua co.ua pp.ua +shiftcrypto.dev +shiftcrypto.io shiftedit.io -myshopcolumnss.com +myshopblocks.com +myshopify.com +shopitsite.com +shopware.store mo-siemens.io 1kapp.com appchizi.com @@ -4843,41 +4839,78 @@ sinaapp.com vipsinaapp.com siteleaf.net bounty-full.com +small-web.org +vp4.me +snowflake.app +streamlit.app +streamlitapp.com +try-snowplow.com +srht.site stackhero-network.com +musician.io +novecore.site static.land +storebase.store +vps-host.net +playstation-cloud.com spacekit.io +myspreadshop.at +myspreadshop.be +myspreadshop.ca +myspreadshop.ch +myspreadshop.com +myspreadshop.de +myspreadshop.dk +myspreadshop.es +myspreadshop.fi +myspreadshop.fr +myspreadshop.ie +myspreadshop.it +myspreadshop.net +myspreadshop.nl +myspreadshop.no +myspreadshop.pl +myspreadshop.se +storipress.app storj.farm utwente.io temp-dns.com -applicationcloud.io -scapp.io +supabase.co +supabase.in +supabase.net syncloud.it -diskstation.me dscloud.biz -dscloud.me -dscloud.mobi dsmynas.com -dsmynas.net -dsmynas.org familyds.com -familyds.net -familyds.org +diskstation.me +dscloud.me i234.me myds.me synology.me +dscloud.mobi +dsmynas.net +familyds.net +dsmynas.org +familyds.org vpnplus.to +mytabit.com taifun-dns.de +ts.net gda.pl gdansk.pl gdynia.pl med.pl sopot.pl -edugit.org +edugit.io telebit.app telebit.io +reservd.com thingdustdata.com +tickets.io arvo.network azimuth.network +tlon.network +torproject.net bloxcms.com townnews-staging.com 12hp.at @@ -4913,28 +4946,99 @@ my-wan.de syno-ds.de synology-diskstation.de synology-ds.de +typedream.app uber.space hk.com hk.org ltd.hk inc.hk +it.com +name.pm +sch.tf +biz.wf +sch.wf +org.yt virtualuser.de virtual-user.de +upli.io +urown.cloud +dnsupdate.info 2038.io +vercel.app +vercel.dev +now.sh router.management v-info.info voorloper.cloud +neko.am +nyaa.am +be.ax +cat.ax +es.ax +eu.ax +gg.ax +mc.ax +us.ax +xy.ax +nl.ci +xx.gl +app.gp +blog.gt +de.gt +to.gt +be.gy +cc.hn +blog.kg +io.kg +jp.kg +tv.kg +uk.kg +us.kg +de.ls +at.md +de.md +jp.md +to.md +indie.porn +vxl.sh +ch.tc +me.tc +we.tc +nyan.to +at.vg +blog.vu +dev.vu +me.vu +v.ua wafflecell.com +reserve-online.net +reserve-online.com +bookonline.app +hotelwithflight.com wedeploy.io wedeploy.me wedeploy.sh remotewd.com wmflabs.org +toolforge.org +wmcloud.org +panel.gg +messwithdns.com +woltlab-demo.com +myforum.community +community-pro.de +diskussionsbereich.de +community-pro.net +meinforum.net +wpenginepowered.com +wixsite.com +editorx.io half.host xnbay.com cistron.nl demon.nl xs4all.space +yandexcloud.net official.academy yolasite.com ybo.faith @@ -4944,14 +5048,13 @@ ybo.party ybo.review ybo.science ybo.trade +ynh.fr nohost.me noho.st za.net za.org -now.sh bss.design basicserver.io virtualserver.io enterprisecloud.nu -zone.id %% diff --git a/src/Functions/URL/tldLookup.sh b/src/Functions/URL/tldLookup.sh index a7893c3a168..7f9848411fc 100755 --- a/src/Functions/URL/tldLookup.sh +++ b/src/Functions/URL/tldLookup.sh @@ -3,8 +3,8 @@ [ ! -f public_suffix_list.dat ] && wget -nv -O public_suffix_list.dat https://publicsuffix.org/list/public_suffix_list.dat echo '%language=C++ -%define lookup-function-name is_valid -%define class-name tldLookupHash +%define lookup-function-name isValid +%define class-name TopLevelDomainLookupHash %readonly-tables %includes %compare-strncmp diff --git a/src/Functions/UTCTimestampTransform.cpp b/src/Functions/UTCTimestampTransform.cpp new file mode 100644 index 00000000000..ff3c9c27ffc --- /dev/null +++ b/src/Functions/UTCTimestampTransform.cpp @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +namespace +{ + template + class UTCTimestampTransform : public IFunction + { + public: + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static constexpr auto name = Name::name; + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + + bool useDefaultImplementationForConstants() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() != 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}'s arguments number must be 2.", name); + WhichDataType which_type_first(arguments[0]); + WhichDataType which_type_second(arguments[1]); + if (!which_type_first.isDateTime() && !which_type_first.isDateTime64()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s 1st argument type must be datetime.", name); + if (dynamic_cast(arguments[0].get())->hasExplicitTimeZone()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s 1st argument should not have explicit time zone.", name); + if (!which_type_second.isString()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s 2nd argument type must be string.", name); + DataTypePtr date_time_type; + if (which_type_first.isDateTime()) + date_time_type = std::make_shared(); + else + { + const DataTypeDateTime64 * date_time_64 = static_cast(arguments[0].get()); + date_time_type = std::make_shared(date_time_64->getScale()); + } + return date_time_type; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override + { + if (arguments.size() != 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}'s arguments number must be 2.", name); + ColumnWithTypeAndName arg1 = arguments[0]; + ColumnWithTypeAndName arg2 = arguments[1]; + const auto * time_zone_const_col = checkAndGetColumnConstData(arg2.column.get()); + if (!time_zone_const_col) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of 2nd argument of function {}. Excepted const(String).", arg2.column->getName(), name); + String time_zone_val = time_zone_const_col->getDataAt(0).toString(); + auto column = result_type->createColumn(); + if (WhichDataType(arg1.type).isDateTime()) + { + const auto * date_time_col = checkAndGetColumn(arg1.column.get()); + for (size_t i = 0; i < date_time_col->size(); ++i) + { + UInt32 date_time_val = date_time_col->getElement(i); + LocalDateTime date_time(date_time_val, Name::to ? DateLUT::instance("UTC") : DateLUT::instance(time_zone_val)); + time_t time_val = date_time.to_time_t(Name::from ? DateLUT::instance("UTC") : DateLUT::instance(time_zone_val)); + column->insert(time_val); + } + } + else if (WhichDataType(arg1.type).isDateTime64()) + { + const auto * date_time_col = checkAndGetColumn(arg1.column.get()); + const DataTypeDateTime64 * date_time_type = static_cast(arg1.type.get()); + Int64 scale_multiplier = DecimalUtils::scaleMultiplier(date_time_type->getScale()); + for (size_t i = 0; i < date_time_col->size(); ++i) + { + DateTime64 date_time_val = date_time_col->getElement(i); + Int64 seconds = date_time_val.value / scale_multiplier; + Int64 micros = date_time_val.value % scale_multiplier; + LocalDateTime date_time(seconds, Name::to ? DateLUT::instance("UTC") : DateLUT::instance(time_zone_val)); + time_t time_val = date_time.to_time_t(Name::from ? DateLUT::instance("UTC") : DateLUT::instance(time_zone_val)); + DateTime64 date_time_64(time_val * scale_multiplier + micros); + column->insert(date_time_64); + } + } + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s 1st argument can only be datetime/datatime64. ", name); + return column; + } + + }; + + struct NameToUTCTimestamp + { + static constexpr auto name = "toUTCTimestamp"; + static constexpr auto from = false; + static constexpr auto to = true; + }; + + struct NameFromUTCTimestamp + { + static constexpr auto name = "fromUTCTimestamp"; + static constexpr auto from = true; + static constexpr auto to = false; + }; + + using ToUTCTimestampFunction = UTCTimestampTransform; + using FromUTCTimestampFunction = UTCTimestampTransform; +} + +REGISTER_FUNCTION(UTCTimestampTransform) +{ + factory.registerFunction(); + factory.registerFunction(); + factory.registerAlias("to_utc_timestamp", NameToUTCTimestamp::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("from_utc_timestamp", NameFromUTCTimestamp::name, FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp index d4ecbf66987..ca142479ff1 100644 --- a/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp +++ b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp @@ -171,6 +171,9 @@ ExternalLoader::LoadablePtr ExternalUserDefinedExecutableFunctionsLoader::create size_t command_termination_timeout_seconds = config.getUInt64(key_in_config + ".command_termination_timeout", 10); size_t command_read_timeout_milliseconds = config.getUInt64(key_in_config + ".command_read_timeout", 10000); size_t command_write_timeout_milliseconds = config.getUInt64(key_in_config + ".command_write_timeout", 10000); + ExternalCommandStderrReaction stderr_reaction + = parseExternalCommandStderrReaction(config.getString(key_in_config + ".stderr_reaction", "none")); + bool check_exit_code = config.getBool(key_in_config + ".check_exit_code", true); size_t pool_size = 0; size_t max_command_execution_time = 0; @@ -238,6 +241,8 @@ ExternalLoader::LoadablePtr ExternalUserDefinedExecutableFunctionsLoader::create .command_termination_timeout_seconds = command_termination_timeout_seconds, .command_read_timeout_milliseconds = command_read_timeout_milliseconds, .command_write_timeout_milliseconds = command_write_timeout_milliseconds, + .stderr_reaction = stderr_reaction, + .check_exit_code = check_exit_code, .pool_size = pool_size, .max_command_execution_time_seconds = max_command_execution_time, .is_executable_pool = is_executable_pool, diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h index 45196759d3b..a7d586061b2 100644 --- a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h @@ -15,7 +15,7 @@ class BackupEntriesCollector; class RestorerFromBackup; /// Factory for SQLUserDefinedFunctions -class UserDefinedSQLFunctionFactory : public IHints<1, UserDefinedSQLFunctionFactory> +class UserDefinedSQLFunctionFactory : public IHints<> { public: static UserDefinedSQLFunctionFactory & instance(); diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromZooKeeper.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromZooKeeper.cpp index 76ebcf769d5..29aff666da5 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromZooKeeper.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromZooKeeper.cpp @@ -223,7 +223,7 @@ bool UserDefinedSQLObjectsLoaderFromZooKeeper::storeObject( { auto code = zookeeper->tryCreate(path, create_statement, zkutil::CreateMode::Persistent); if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS)) - throw zkutil::KeeperException(code, path); + throw zkutil::KeeperException::fromPath(code, path); if (code == Coordination::Error::ZNODEEXISTS) { @@ -234,14 +234,14 @@ bool UserDefinedSQLObjectsLoaderFromZooKeeper::storeObject( code = zookeeper->trySet(path, create_statement); if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNONODE)) - throw zkutil::KeeperException(code, path); + throw zkutil::KeeperException::fromPath(code, path); } if (code == Coordination::Error::ZOK) break; if (!--num_attempts) - throw zkutil::KeeperException(code, path); + throw zkutil::KeeperException::fromPath(code, path); } LOG_DEBUG(log, "Object {} stored", backQuote(object_name)); @@ -262,7 +262,7 @@ bool UserDefinedSQLObjectsLoaderFromZooKeeper::removeObject( auto code = zookeeper->tryRemove(path); if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNONODE)) - throw zkutil::KeeperException(code, path); + throw zkutil::KeeperException::fromPath(code, path); if (code == Coordination::Error::ZNONODE) { diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index 4d2b8175f5b..499fe4ce7b2 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -1,7 +1,9 @@ -#include #include +#include +#include +#include +#include #include -#include "arrayScalarProduct.h" namespace DB @@ -10,6 +12,8 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; + extern const int BAD_ARGUMENTS; } @@ -70,44 +74,32 @@ namespace ErrorCodes * The "curve" will be present by a line that moves one step either towards right or top on each threshold change. */ - -struct NameArrayAUC -{ - static constexpr auto name = "arrayAUC"; -}; - - -class ArrayAUCImpl +class FunctionArrayAUC : public IFunction { public: - using ResultType = Float64; + static constexpr auto name = "arrayAUC"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } - static DataTypePtr getReturnType(const DataTypePtr & /* score_type */, const DataTypePtr & label_type) - { - if (!(isNumber(label_type) || isEnum(label_type))) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} label must have numeric type.", std::string(NameArrayAUC::name)); - - return std::make_shared>(); - } - - template - static ResultType apply( - const T * scores, - const U * labels, - size_t size) +private: + static Float64 apply( + const IColumn & scores, + const IColumn & labels, + ColumnArray::Offset current_offset, + ColumnArray::Offset next_offset) { struct ScoreLabel { - T score; + Float64 score; bool label; }; + size_t size = next_offset - current_offset; PODArrayWithStackMemory sorted_labels(size); for (size_t i = 0; i < size; ++i) { - bool label = labels[i] > 0; - sorted_labels[i].score = scores[i]; + bool label = labels.getFloat64(current_offset + i) > 0; + sorted_labels[i].score = scores.getFloat64(current_offset + i); sorted_labels[i].label = label; } @@ -129,18 +121,85 @@ public: /// Then divide the area to the area of rectangle. if (count_positive == 0 || count_positive == size) - return std::numeric_limits::quiet_NaN(); + return std::numeric_limits::quiet_NaN(); - return static_cast(area) / count_positive / (size - count_positive); + return static_cast(area) / count_positive / (size - count_positive); + } + + static void vector( + const IColumn & scores, + const IColumn & labels, + const ColumnArray::Offsets & offsets, + PaddedPODArray & result) + { + size_t size = offsets.size(); + result.resize(size); + + ColumnArray::Offset current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + auto next_offset = offsets[i]; + result[i] = apply(scores, labels, current_offset, next_offset); + current_offset = next_offset; + } + } + +public: + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t i = 0; i < getNumberOfArguments(); ++i) + { + const DataTypeArray * array_type = checkAndGetDataType(arguments[i].get()); + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "All arguments for function {} must be an array.", getName()); + + const auto & nested_type = array_type->getNestedType(); + if (!isNativeNumber(nested_type) && !isEnum(nested_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} cannot process values of type {}", + getName(), nested_type->getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + { + ColumnPtr col1 = arguments[0].column->convertToFullColumnIfConst(); + ColumnPtr col2 = arguments[1].column->convertToFullColumnIfConst(); + + const ColumnArray * col_array1 = checkAndGetColumn(col1.get()); + if (!col_array1) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); + + const ColumnArray * col_array2 = checkAndGetColumn(col2.get()); + if (!col_array2) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of second argument of function {}", arguments[1].column->getName(), getName()); + + if (!col_array1->hasEqualOffsets(*col_array2)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Array arguments for function {} must have equal sizes", getName()); + + auto col_res = ColumnVector::create(); + + vector( + col_array1->getData(), + col_array2->getData(), + col_array1->getOffsets(), + col_res->getData()); + + return col_res; } }; -/// auc(array_score, array_label) - Calculate AUC with array of score and label -using FunctionArrayAUC = FunctionArrayScalarProduct; - REGISTER_FUNCTION(ArrayAUC) { factory.registerFunction(); } + } diff --git a/src/Functions/array/arrayDistinct.cpp b/src/Functions/array/arrayDistinct.cpp index 527624794ea..ea331d6bdad 100644 --- a/src/Functions/array/arrayDistinct.cpp +++ b/src/Functions/array/arrayDistinct.cpp @@ -268,10 +268,9 @@ void FunctionArrayDistinct::executeHashed( if (nullable_col && (*src_null_map)[j]) continue; - UInt128 hash; SipHash hash_function; src_data.updateHashWithValue(j, hash_function); - hash_function.get128(hash); + const auto hash = hash_function.get128(); if (!set.find(hash)) { diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h index 0733f1e2d43..1a920260906 100644 --- a/src/Functions/array/arrayEnumerateRanked.h +++ b/src/Functions/array/arrayEnumerateRanked.h @@ -13,7 +13,6 @@ #include #include -// for better debug: #include /** The function will enumerate distinct values of the passed multidimensional arrays looking inside at the specified depths. * This is very unusual function made as a special order for our dear customer - Metrica web analytics system. @@ -134,18 +133,14 @@ private: /// Hash a set of keys into a UInt128 value. static inline UInt128 ALWAYS_INLINE hash128depths(const std::vector & indices, const ColumnRawPtrs & key_columns) { - UInt128 key; SipHash hash; - for (size_t j = 0, keys_size = key_columns.size(); j < keys_size; ++j) { // Debug: const auto & field = (*key_columns[j])[indices[j]]; DUMP(j, indices[j], field); key_columns[j]->updateHashWithValue(indices[j], hash); } - hash.get128(key); - - return key; + return hash.get128(); } diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index d1bbd169513..ee84e3138e8 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -510,11 +510,12 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable map.clear(); bool all_has_nullable = all_nullable; + bool current_has_nullable = false; for (size_t arg_num = 0; arg_num < args; ++arg_num) { const auto & arg = arrays.args[arg_num]; - bool current_has_nullable = false; + current_has_nullable = false; size_t off; // const array has only one row @@ -549,44 +550,93 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable } } - prev_off[arg_num] = off; - if (arg.is_const) - prev_off[arg_num] = 0; - + // We update offsets for all the arrays except the first one. Offsets for the first array would be updated later. + // It is needed to iterate the first array again so that the elements in the result would have fixed order. + if (arg_num) + { + prev_off[arg_num] = off; + if (arg.is_const) + prev_off[arg_num] = 0; + } if (!current_has_nullable) all_has_nullable = false; } - if (all_has_nullable) - { - ++result_offset; - result_data.insertDefault(); - null_map.push_back(1); - } + // We have NULL in output only once if it should be there + bool null_added = false; + const auto & arg = arrays.args[0]; + size_t off; + // const array has only one row + if (arg.is_const) + off = (*arg.offsets)[0]; + else + off = (*arg.offsets)[row]; - for (const auto & pair : map) + for (auto i : collections::range(prev_off[0], off)) { - if (pair.getMapped() == args) + all_has_nullable = all_nullable; + typename Map::LookupResult pair = nullptr; + + if (arg.null_map && (*arg.null_map)[i]) { + current_has_nullable = true; + if (all_has_nullable && !null_added) + { + ++result_offset; + result_data.insertDefault(); + null_map.push_back(1); + null_added = true; + } + if (null_added) + continue; + } + else if constexpr (is_numeric_column) + { + pair = map.find(columns[0]->getElement(i)); + } + else if constexpr (std::is_same_v || std::is_same_v) + pair = map.find(columns[0]->getDataAt(i)); + else + { + const char * data = nullptr; + pair = map.find(columns[0]->serializeValueIntoArena(i, arena, data)); + } + prev_off[0] = off; + if (arg.is_const) + prev_off[0] = 0; + + if (!current_has_nullable) + all_has_nullable = false; + + if (pair && pair->getMapped() == args) + { + // We increase pair->getMapped() here to not skip duplicate values from the first array. + ++pair->getMapped(); ++result_offset; if constexpr (is_numeric_column) - result_data.insertValue(pair.getKey()); + { + result_data.insertValue(pair->getKey()); + } else if constexpr (std::is_same_v || std::is_same_v) - result_data.insertData(pair.getKey().data, pair.getKey().size); + { + result_data.insertData(pair->getKey().data, pair->getKey().size); + } else - result_data.deserializeAndInsertFromArena(pair.getKey().data); - + { + result_data.deserializeAndInsertFromArena(pair->getKey().data); + } if (all_nullable) null_map.push_back(0); } } result_offsets.getElement(row) = result_offset; - } + } ColumnPtr result_column = std::move(result_data_ptr); if (all_nullable) result_column = ColumnNullable::create(result_column, std::move(null_map_column)); return ColumnArray::create(result_column, std::move(result_offsets_ptr)); + } diff --git a/src/Functions/array/arrayShiftRotate.cpp b/src/Functions/array/arrayShiftRotate.cpp new file mode 100644 index 00000000000..cea2917af37 --- /dev/null +++ b/src/Functions/array/arrayShiftRotate.cpp @@ -0,0 +1,399 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; +} + +enum class ShiftRotateStrategy : uint8_t +{ + Shift, + Rotate +}; + +enum class ShiftRotateDirection : uint8_t +{ + Left, + Right +}; + +template +class FunctionArrayShiftRotate : public IFunction +{ +public: + static constexpr auto name = Name::name; + static constexpr ShiftRotateStrategy strategy = Impl::strategy; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return strategy == ShiftRotateStrategy::Shift; } + size_t getNumberOfArguments() const override { return strategy == ShiftRotateStrategy::Rotate ? 2 : 0; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if constexpr (strategy == ShiftRotateStrategy::Shift) + { + if (arguments.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments.", getName()); + + if (arguments.size() > 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at most three arguments.", getName()); + } + + const DataTypePtr & first_arg = arguments[0]; + if (!isArray(first_arg)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}, expected Array", + arguments[0]->getName(), + getName()); + + if (!isNativeInteger(arguments[1])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}, expected Native Integer", + arguments[1]->getName(), + getName()); + + const DataTypePtr & elem_type = static_cast(*first_arg).getNestedType(); + if (arguments.size() == 3) + { + auto ret = tryGetLeastSupertype(DataTypes{elem_type, arguments[2]}); + // Note that this will fail if the default value does not fit into the array element type (e.g. UInt64 and Array(UInt8)). + // In this case array should be converted to Array(UInt64) explicitly. + if (!ret || !ret->equals(*elem_type)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}, expected {}", + arguments[2]->getName(), + getName(), + elem_type->getName()); + } + + return std::make_shared(elem_type); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + ColumnPtr column_array_ptr = arguments[0].column; + const auto * column_array = checkAndGetColumn(column_array_ptr.get()); + + if (!column_array) + { + const auto * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); + if (!column_const_array) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Expected Array column, found {}", column_array_ptr->getName()); + + column_array_ptr = column_const_array->convertToFullColumn(); + column_array = assert_cast(column_array_ptr.get()); + } + + ColumnPtr shift_num_column = arguments[1].column; + + if constexpr (strategy == ShiftRotateStrategy::Shift) + { + ColumnPtr default_column; + const auto elem_type = static_cast(*result_type).getNestedType(); + + if (arguments.size() == 3) + default_column = castColumn(arguments[2], elem_type); + else + default_column = elem_type->createColumnConstWithDefaultValue(input_rows_count); + + default_column = default_column->convertToFullColumnIfConst(); + + return Impl::execute(*column_array, shift_num_column, default_column, input_rows_count); + } + else + { + return Impl::execute(*column_array, shift_num_column, input_rows_count); + } + } +}; + +template +struct ArrayRotateImpl +{ + static constexpr ShiftRotateStrategy strategy = ShiftRotateStrategy::Rotate; + static ColumnPtr execute(const ColumnArray & array, ColumnPtr shift_num_column, size_t input_rows_count) + { + size_t batch_size = array.getData().size(); + + IColumn::Permutation permutation(batch_size); + const IColumn::Offsets & offsets = array.getOffsets(); + + IColumn::Offset current_offset = 0; + for (size_t i = 0; i < input_rows_count; ++i) + { + const size_t offset = offsets[i]; + const size_t nested_size = offset - current_offset; + Int64 shift_num_value = shift_num_column->getInt(i); + + // Rotating left to -N is the same as rotating right to N. + ShiftRotateDirection actual_direction = direction; + if (shift_num_value < 0) + { + if (shift_num_value == std::numeric_limits::min()) + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Shift number {} is out of range", shift_num_value); + actual_direction = (direction == ShiftRotateDirection::Left) ? ShiftRotateDirection::Right : ShiftRotateDirection::Left; + shift_num_value = -shift_num_value; + } + + size_t shift_num = static_cast(shift_num_value); + if (nested_size > 0 && shift_num >= nested_size) + shift_num %= nested_size; + + // Rotating left to N is the same as shifting right to (size - N). + if (actual_direction == ShiftRotateDirection::Right) + shift_num = nested_size - shift_num; + + for (size_t j = 0; j < nested_size; ++j) + permutation[current_offset + j] = current_offset + (j + shift_num) % nested_size; + + current_offset = offset; + } + + return ColumnArray::create(array.getData().permute(permutation, 0), array.getOffsetsPtr()); + } +}; + +template +struct ArrayShiftImpl +{ + static constexpr ShiftRotateStrategy strategy = ShiftRotateStrategy::Shift; + + static ColumnPtr + execute(const ColumnArray & array, ColumnPtr shift_column, ColumnPtr default_column, size_t input_column_rows) + { + const IColumn::Offsets & offsets = array.getOffsets(); + const IColumn & array_data = array.getData(); + const size_t data_size = array_data.size(); + + auto result_column = array.getData().cloneEmpty(); + result_column->reserve(data_size); + + IColumn::Offset current_offset = 0; + for (size_t i = 0; i < input_column_rows; ++i) + { + const size_t offset = offsets[i]; + const size_t nested_size = offset - current_offset; + Int64 shift_num_value = shift_column->getInt(i); + + // Shifting left to -N is the same as shifting right to N. + ShiftRotateDirection actual_direction = direction; + if (shift_num_value < 0) + { + if (shift_num_value == std::numeric_limits::min()) + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Shift number {} is out of range", shift_num_value); + actual_direction = (direction == ShiftRotateDirection::Left) ? ShiftRotateDirection::Right : ShiftRotateDirection::Left; + shift_num_value = -shift_num_value; + } + + const size_t number_of_default_values = std::min(static_cast(shift_num_value), nested_size); + const size_t num_of_original_values = nested_size - number_of_default_values; + + if (actual_direction == ShiftRotateDirection::Right) + { + result_column->insertManyFrom(*default_column, i, number_of_default_values); + result_column->insertRangeFrom(array_data, current_offset, num_of_original_values); + } + else + { + result_column->insertRangeFrom(array_data, current_offset + number_of_default_values, num_of_original_values); + result_column->insertManyFrom(*default_column, i, number_of_default_values); + } + + current_offset = offset; + } + + return ColumnArray::create(std::move(result_column), array.getOffsetsPtr()); + } +}; + +struct NameArrayShiftLeft +{ + static constexpr auto name = "arrayShiftLeft"; +}; + +struct NameArrayShiftRight +{ + static constexpr auto name = "arrayShiftRight"; +}; + +struct NameArrayRotateLeft +{ + static constexpr auto name = "arrayRotateLeft"; +}; + +struct NameArrayRotateRight +{ + static constexpr auto name = "arrayRotateRight"; +}; + +using ArrayShiftLeftImpl = ArrayShiftImpl; +using FunctionArrayShiftLeft = FunctionArrayShiftRotate; + +using ArrayShiftRightImpl = ArrayShiftImpl; +using FunctionArrayShiftRight = FunctionArrayShiftRotate; + +using ArrayRotateLeftImpl = ArrayRotateImpl; +using FunctionArrayRotateLeft = FunctionArrayShiftRotate; + +using ArrayRotateRightImpl = ArrayRotateImpl; +using FunctionArrayRotateRight = FunctionArrayShiftRotate; + + +REGISTER_FUNCTION(ArrayShiftOrRotate) +{ + factory.registerFunction( + FunctionDocumentation{ + .description = R"( +Returns an array of the same size as the original array with elements rotated +to the left by the specified number of positions. +[example:simple_int] +[example:overflow_int] +[example:simple_string] +[example:simple_array] +[example:simple_nested_array] + +Negative rotate values are treated as rotating to the right by the absolute +value of the rotation. +[example:negative_rotation_int] +)", + .examples{ + {"simple_int", "SELECT arrayRotateLeft([1, 2, 3, 4, 5], 3)", "[4, 5, 1, 2, 3]"}, + {"simple_string", "SELECT arrayRotateLeft(['a', 'b', 'c', 'd', 'e'], 3)", "['d', 'e', 'a', 'b', 'c']"}, + {"simple_array", "SELECT arrayRotateLeft([[1, 2], [3, 4], [5, 6]], 2)", "[[5, 6], [1, 2], [3, 4]]"}, + {"simple_nested_array", + "SELECT arrayRotateLeft([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 1)", + "[[[5, 6], [7, 8]], [[1, 2], [3, 4]]]"}, + {"negative_rotation_int", "SELECT arrayRotateLeft([1, 2, 3, 4, 5], -3)", "[3, 4, 5, 1, 2]"}, + {"overflow_int", "SELECT arrayRotateLeft([1, 2, 3, 4, 5], 8)", "[4, 5, 1, 2, 3]"}, + + }, + .categories = {"Array"}, + }); + factory.registerFunction( + FunctionDocumentation{ + .description = R"( +Returns an array of the same size as the original array with elements rotated +to the right by the specified number of positions. +[example:simple_int] +[example:overflow_int] +[example:simple_string] +[example:simple_array] +[example:simple_nested_array] + +Negative rotate values are treated as rotating to the left by the absolute +value of the rotation. +[example:negative_rotation_int] +)", + .examples{ + {"simple_int", "SELECT arrayRotateRight([1, 2, 3, 4, 5], 3)", "[3, 4, 5, 1, 2]"}, + {"simple_string", "SELECT arrayRotateRight(['a', 'b', 'c', 'd', 'e'], 3)", "['c', 'd', 'e', 'a', 'b']"}, + {"simple_array", "SELECT arrayRotateRight([[1, 2], [3, 4], [5, 6]], 2)", "[[3, 4], [5, 6], [1, 2]]"}, + {"simple_nested_array", + "SELECT arrayRotateRight([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 1)", + "[[[7, 8], [1, 2]], [[3, 4], [5, 6]]]"}, + {"negative_rotation_int", "SELECT arrayRotateRight([1, 2, 3, 4, 5], -3)", "[4, 5, 1, 2, 3]"}, + {"overflow_int", "SELECT arrayRotateRight([1, 2, 3, 4, 5], 8)", "[4, 5, 1, 2, 3]"}, + }, + .categories = {"Array"}, + }); + factory.registerFunction( + FunctionDocumentation{ + .description = R"( +Returns an array of the same size as the original array with elements shifted +to the left by the specified number of positions. New elements are filled with +provided default values or default values of the corresponding type. +[example:simple_int] +[example:overflow_int] +[example:simple_string] +[example:simple_array] +[example:simple_nested_array] + +Negative shift values are treated as shifting to the right by the absolute +value of the shift. +[example:negative_shift_int] + +The default value must be of the same type as the array elements. +[example:simple_int_with_default] +[example:simple_string_with_default] +[example:simple_array_with_default] +[example:casted_array_with_default] +)", + .examples{ + {"simple_int", "SELECT arrayShiftLeft([1, 2, 3, 4, 5], 3)", "[4, 5, 0, 0, 0]"}, + {"negative_shift_int", "SELECT arrayShiftLeft([1, 2, 3, 4, 5], -3)", "[0, 0, 0, 1, 2]"}, + {"overflow_int", "SELECT arrayShiftLeft([1, 2, 3, 4, 5], 8)", "[0, 0, 0, 0, 0]"}, + {"simple_string", "SELECT arrayShiftLeft(['a', 'b', 'c', 'd', 'e'], 3)", "['d', 'e', '', '', '']"}, + {"simple_array", "SELECT arrayShiftLeft([[1, 2], [3, 4], [5, 6]], 2)", "[[5, 6], [], []]"}, + {"simple_nested_array", "SELECT arrayShiftLeft([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 1)", "[[[5, 6], [7, 8]], []]"}, + {"simple_int_with_default", "SELECT arrayShiftLeft([1, 2, 3, 4, 5], 3, 7)", "[4, 5, 7, 7, 7]"}, + {"simple_string_with_default", "SELECT arrayShiftLeft(['a', 'b', 'c', 'd', 'e'], 3, 'foo')", "['d', 'e', 'foo', 'foo', 'foo']"}, + {"simple_array_with_default", "SELECT arrayShiftLeft([[1, 2], [3, 4], [5, 6]], 2, [7, 8])", "[[5, 6], [7, 8], [7, 8]]"}, + {"casted_array_with_default", + "SELECT arrayShiftLeft(CAST('[1, 2, 3, 4, 5, 6]', 'Array(UInt16)'), 1, 1000)", + "[2, 3, 4, 5, 6, 1000]"}, + }, + .categories = {"Array"}, + }); + factory.registerFunction( + FunctionDocumentation{ + .description = R"( +Returns an array of the same size as the original array with elements shifted +to the right by the specified number of positions. New elements are filled with +provided default values or default values of the corresponding type. +[example:simple_int] +[example:overflow_int] +[example:simple_string] +[example:simple_array] +[example:simple_nested_array] + +Negative shift values are treated as shifting to the left by the absolute +value of the shift. +[example:negative_shift_int] + +The default value must be of the same type as the array elements. +[example:simple_int_with_default] +[example:simple_string_with_default] +[example:simple_array_with_default] +[example:casted_array_with_default] +)", + .examples{ + {"simple_int", "SELECT arrayShiftRight([1, 2, 3, 4, 5], 3)", "[0, 0, 0, 1, 2]"}, + {"negative_shift_int", "SELECT arrayShiftRight([1, 2, 3, 4, 5], -3)", "[4, 5, 0, 0, 0]"}, + {"overflow_int", "SELECT arrayShiftRight([1, 2, 3, 4, 5], 8)", "[0, 0, 0, 0, 0]"}, + {"simple_string", "SELECT arrayShiftRight(['a', 'b', 'c', 'd', 'e'], 3)", "['', '', '', 'a', 'b']"}, + {"simple_array", "SELECT arrayShiftRight([[1, 2], [3, 4], [5, 6]], 2)", "[[], [], [1, 2]]"}, + {"simple_nested_array", "SELECT arrayShiftRight([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 1)", "[[], [[1, 2], [3, 4]]]"}, + {"simple_int_with_default", "SELECT arrayShiftRight([1, 2, 3, 4, 5], 3, 7)", "[7, 7, 7, 1, 2]"}, + {"simple_string_with_default", + "SELECT arrayShiftRight(['a', 'b', 'c', 'd', 'e'], 3, 'foo')", + "['foo', 'foo', 'foo', 'a', 'b']"}, + {"simple_array_with_default", "SELECT arrayShiftRight([[1, 2], [3, 4], [5, 6]], 2, [7, 8])", "[[7, 8], [7, 8], [1, 2]]"}, + {"casted_array_with_default", + "SELECT arrayShiftRight(CAST('[1, 2, 3, 4, 5, 6]', 'Array(UInt16)'), 1, 1000)", + "[1000, 1, 2, 3, 4, 5]"}, + }, + .categories = {"Array"}, + }); +} + +} diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index 7c20e195098..faa5ae47b29 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -16,6 +16,7 @@ #include #include + namespace DB { diff --git a/src/Functions/array/mapPopulateSeries.cpp b/src/Functions/array/mapPopulateSeries.cpp index 45732d8957c..0db71ab2cf8 100644 --- a/src/Functions/array/mapPopulateSeries.cpp +++ b/src/Functions/array/mapPopulateSeries.cpp @@ -102,17 +102,13 @@ private: if (key_argument_data_type.isArray()) { - DataTypePtr value_type; - if (1 < arguments.size()) - value_type = arguments[1]; - - if (arguments.size() < 2 || (value_type && !isArray(value_type))) + if (arguments.size() < 2 || !arguments[1] || !isArray(arguments[1])) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} if array argument is passed as key, additional array argument as value must be passed", getName()); const auto & key_array_type = assert_cast(*arguments[0]); - const auto & value_array_type = assert_cast(*value_type); + const auto & value_array_type = assert_cast(*arguments[1]); key_argument_series_type = key_array_type.getNestedType(); value_argument_series_type = value_array_type.getNestedType(); diff --git a/src/Functions/base64Decode.cpp b/src/Functions/base64Decode.cpp index 4060aafe1a3..5f7a3406c62 100644 --- a/src/Functions/base64Decode.cpp +++ b/src/Functions/base64Decode.cpp @@ -7,10 +7,9 @@ namespace DB { REGISTER_FUNCTION(Base64Decode) { - tb64ini(0, 0); factory.registerFunction>(); - /// MysQL compatibility alias. + /// MySQL compatibility alias. factory.registerAlias("FROM_BASE64", "base64Decode", FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/base64Encode.cpp b/src/Functions/base64Encode.cpp index 773db7e09d9..69268f5a25d 100644 --- a/src/Functions/base64Encode.cpp +++ b/src/Functions/base64Encode.cpp @@ -7,10 +7,9 @@ namespace DB { REGISTER_FUNCTION(Base64Encode) { - tb64ini(0, 0); factory.registerFunction>(); - /// MysQL compatibility alias. + /// MySQL compatibility alias. factory.registerAlias("TO_BASE64", "base64Encode", FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/castTypeToEither.h b/src/Functions/castTypeToEither.h index f5ee9779451..aa8330366f1 100644 --- a/src/Functions/castTypeToEither.h +++ b/src/Functions/castTypeToEither.h @@ -8,18 +8,12 @@ namespace DB template static bool castTypeToEither(const T * type, F && f) { - /// XXX can't use && here because gcc-7 complains about parentheses around && within || - return ((typeid_cast(type) ? f(*typeid_cast(type)) : false) || ...); + return ((typeid_cast(type) && f(*typeid_cast(type))) || ...); } template constexpr bool castTypeToEither(TypeList, const auto * type, auto && f) { - return ( - (typeid_cast(type) != nullptr - ? std::forward(f)( - *typeid_cast(type)) - : false) - || ...); + return ((typeid_cast(type) != nullptr && std::forward(f)(*typeid_cast(type))) || ...); } } diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp index 8288d872f18..9eb222d8c09 100644 --- a/src/Functions/concat.cpp +++ b/src/Functions/concat.cpp @@ -208,6 +208,10 @@ public: { return FunctionFactory::instance().getImpl("mapConcat", context)->build(arguments); } + else if (isTuple(arguments.at(0).type)) + { + return FunctionFactory::instance().getImpl("tupleConcat", context)->build(arguments); + } else return std::make_unique( FunctionConcat::create(context), collections::map(arguments, [](const auto & elem) { return elem.type; }), return_type); diff --git a/src/Functions/currentDatabase.cpp b/src/Functions/currentDatabase.cpp index b1a3cbf5856..954899c3c2b 100644 --- a/src/Functions/currentDatabase.cpp +++ b/src/Functions/currentDatabase.cpp @@ -54,7 +54,9 @@ public: REGISTER_FUNCTION(CurrentDatabase) { factory.registerFunction(); - factory.registerAlias("DATABASE", "currentDatabase", FunctionFactory::CaseInsensitive); + factory.registerAlias("DATABASE", FunctionCurrentDatabase::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("SCHEMA", FunctionCurrentDatabase::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("current_database", FunctionCurrentDatabase::name, FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/currentSchemas.cpp b/src/Functions/currentSchemas.cpp new file mode 100644 index 00000000000..322e719eb17 --- /dev/null +++ b/src/Functions/currentSchemas.cpp @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +class FunctionCurrentSchemas : public IFunction +{ + const String db_name; + +public: + static constexpr auto name = "currentSchemas"; + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context->getCurrentDatabase()); + } + + explicit FunctionCurrentSchemas(const String & db_name_) : + db_name{db_name_} + { + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override + { + return 1; + } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + // For compatibility, function implements the same signature as Postgres' + const bool argument_is_valid = arguments.size() == 1 && isBool(arguments.front()); + if (!argument_is_valid) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument for function {} must be bool", getName()); + + return std::make_shared(std::make_shared()); + } + + bool isDeterministic() const override { return false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override + { + return DataTypeArray(std::make_shared()) + .createColumnConst(input_rows_count, Array { db_name }); + } +}; + +} + +REGISTER_FUNCTION(CurrentSchema) +{ + factory.registerFunction(FunctionDocumentation + { + .description=R"( +Returns a single-element array with the name of the current database + +Requires a boolean parameter, but it is ignored actually. It is required just for compatibility with the implementation of this function in other DB engines. + +[example:common] +)", + .examples{ + {"common", "SELECT current_schemas(true);", "['default']"} + } + }, + FunctionFactory::CaseInsensitive); + factory.registerAlias("current_schemas", FunctionCurrentSchemas::name, FunctionFactory::CaseInsensitive); + +} + +} diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 6bfbbb7c735..c9c9020f068 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -381,25 +381,25 @@ public: const auto & timezone_x = extractTimeZoneFromFunctionArguments(arguments, 3, 1); const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2); - if (unit == "year" || unit == "yy" || unit == "yyyy") + if (unit == "year" || unit == "years" || unit == "yy" || unit == "yyyy") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "quarter" || unit == "qq" || unit == "q") + else if (unit == "quarter" || unit == "quarters" || unit == "qq" || unit == "q") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "month" || unit == "mm" || unit == "m") + else if (unit == "month" || unit == "months" || unit == "mm" || unit == "m") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "week" || unit == "wk" || unit == "ww") + else if (unit == "week" || unit == "weeks" || unit == "wk" || unit == "ww") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "day" || unit == "dd" || unit == "d") + else if (unit == "day" || unit == "days" || unit == "dd" || unit == "d") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "hour" || unit == "hh" || unit == "h") + else if (unit == "hour" || unit == "hours" || unit == "hh" || unit == "h") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "minute" || unit == "mi" || unit == "n") + else if (unit == "minute" || unit == "minutes" || unit == "mi" || unit == "n") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "second" || unit == "ss" || unit == "s") + else if (unit == "second" || unit == "seconds" || unit == "ss" || unit == "s") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "millisecond" || unit == "ms") + else if (unit == "millisecond" || unit == "milliseconds" || unit == "ms") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "microsecond" || unit == "us" || unit == "u") + else if (unit == "microsecond" || unit == "microseconds" || unit == "us" || unit == "u") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, diff --git a/src/Functions/decodeHTMLComponent.cpp b/src/Functions/decodeHTMLComponent.cpp new file mode 100644 index 00000000000..2cd95127266 --- /dev/null +++ b/src/Functions/decodeHTMLComponent.cpp @@ -0,0 +1,229 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + struct DecodeHTMLComponentName + { + static constexpr auto name = "decodeHTMLComponent"; + }; + + class FunctionDecodeHTMLComponentImpl + { + public: + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + /// The size of result is always not more than the size of source. + /// Because entities decodes to the shorter byte sequence. + /// Example: &#xx... &#xx... will decode to UTF-8 byte sequence not longer than 4 bytes. + res_data.resize(data.size()); + + size_t size = offsets.size(); + res_offsets.resize(size); + + size_t prev_offset = 0; + size_t res_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + const char * src_data = reinterpret_cast(&data[prev_offset]); + size_t src_size = offsets[i] - prev_offset; + size_t dst_size = execute(src_data, src_size, reinterpret_cast(res_data.data() + res_offset)); + + res_offset += dst_size; + res_offsets[i] = res_offset; + prev_offset = offsets[i]; + } + + res_data.resize(res_offset); + } + + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function decodeHTMLComponent cannot work with FixedString argument"); + } + + private: + static const int max_legal_unicode_value = 0x10FFFF; + static const int max_decimal_length_of_unicode_point = 7; /// 1114111 + + + static size_t execute(const char * src, size_t src_size, char * dst) + { + const char * src_pos = src; + const char * src_end = src + src_size; + char * dst_pos = dst; + // perfect hashmap to lookup html character references + HTMLCharacterHash hash; + // to hold char seq for lookup, reuse it + std::vector seq; + while (true) + { + const char * entity_pos = find_first_symbols<'&'>(src_pos, src_end); + + /// Copy text between entities. + size_t bytes_to_copy = entity_pos - src_pos; + memcpySmallAllowReadWriteOverflow15(dst_pos, src_pos, bytes_to_copy); + dst_pos += bytes_to_copy; + src_pos = entity_pos; + + ++entity_pos; + + const char * entity_end = find_first_symbols<';'>(entity_pos, src_end); + + if (entity_end == src_end) + break; + + bool parsed = false; + + /// covers &#NNNN; or &#xNNNN hexadecimal values; + uint32_t code_point = 0; + if (isValidNumericEntity(entity_pos, entity_end, code_point)) + { + codePointToUTF8(code_point, dst_pos); + parsed = true; + } + else /// covers html encoded character sequences + { + // seq_length should also include `;` at the end + size_t seq_length = (entity_end - entity_pos) + 1; + seq.assign(entity_pos, entity_pos + seq_length); + // null terminate the sequence + seq.push_back('\0'); + // lookup the html sequence in the perfect hashmap. + const auto * res = hash.Lookup(seq.data(), strlen(seq.data())); + // reset so that it's reused in the next iteration + seq.clear(); + if (res) + { + const auto * glyph = res->glyph; + for (size_t i = 0; i < strlen(glyph); ++i) + { + *dst_pos = glyph[i]; + ++dst_pos; + } + parsed = true; + } + else + parsed = false; + } + + if (parsed) + { + /// Skip the parsed entity. + src_pos = entity_end + 1; + } + else + { + /// Copy one byte as is and skip it. + *dst_pos = *src_pos; + ++dst_pos; + ++src_pos; + } + } + + /// Copy the rest of the string. + if (src_pos < src_end) + { + size_t bytes_to_copy = src_end - src_pos; + memcpySmallAllowReadWriteOverflow15(dst_pos, src_pos, bytes_to_copy); + dst_pos += bytes_to_copy; + } + + return dst_pos - dst; + } + + static size_t codePointToUTF8(uint32_t code_point, char *& dst_pos) + { + if (code_point < (1 << 7)) + { + dst_pos[0] = (code_point & 0x7F); + ++dst_pos; + return 1; + } + else if (code_point < (1 << 11)) + { + dst_pos[0] = ((code_point >> 6) & 0x1F) + 0xC0; + dst_pos[1] = (code_point & 0x3F) + 0x80; + dst_pos += 2; + return 2; + } + else if (code_point < (1 << 16)) + { + dst_pos[0] = ((code_point >> 12) & 0x0F) + 0xE0; + dst_pos[1] = ((code_point >> 6) & 0x3F) + 0x80; + dst_pos[2] = (code_point & 0x3F) + 0x80; + dst_pos += 3; + return 3; + } + else + { + dst_pos[0] = ((code_point >> 18) & 0x07) + 0xF0; + dst_pos[1] = ((code_point >> 12) & 0x3F) + 0x80; + dst_pos[2] = ((code_point >> 6) & 0x3F) + 0x80; + dst_pos[3] = (code_point & 0x3F) + 0x80; + dst_pos += 4; + return 4; + } + } + + [[maybe_unused]] static bool isValidNumericEntity(const char * src, const char * end, uint32_t & code_point) + { + if (src + strlen("#") >= end) + return false; + if (src[0] != '#' || (end - src > 1 + max_decimal_length_of_unicode_point)) + return false; + + if (src + 2 < end && (src[1] == 'x' || src[1] == 'X')) + { + src += 2; + for (; src < end; ++src) + { + if (!isHexDigit(*src)) + return false; + code_point *= 16; + code_point += unhex(*src); + } + } + else + { + src += 1; + for (; src < end; ++src) + { + if (!isNumericASCII(*src)) + return false; + code_point *= 10; + code_point += *src - '0'; + } + } + + return code_point <= max_legal_unicode_value; + } + }; + + using FunctionDecodeHTMLComponent = FunctionStringToString; + +} + +REGISTER_FUNCTION(DecodeHTMLComponent) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/endsWithUTF8.cpp b/src/Functions/endsWithUTF8.cpp new file mode 100644 index 00000000000..1b042452298 --- /dev/null +++ b/src/Functions/endsWithUTF8.cpp @@ -0,0 +1,21 @@ +#include +#include +#include + + +namespace DB +{ + +using FunctionEndsWithUTF8 = FunctionStartsEndsWith; + +REGISTER_FUNCTION(EndsWithUTF8) +{ + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Returns whether string `str` ends with `suffix`, the difference between `endsWithUTF8` and `endsWith` is that `endsWithUTF8` match `str` and `suffix` by UTF-8 characters. + )", + .examples{{"endsWithUTF8", "select endsWithUTF8('富强民主文明和谐', '富强');", ""}}, + .categories{"String"}}); +} + +} diff --git a/src/Functions/evalMLMethod.cpp b/src/Functions/evalMLMethod.cpp index 346c8249905..4d5657f0aab 100644 --- a/src/Functions/evalMLMethod.cpp +++ b/src/Functions/evalMLMethod.cpp @@ -5,7 +5,6 @@ #include #include -#include #include diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index 3a7987be93e..f9637e59461 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -14,8 +14,6 @@ #include #include -#include - namespace DB { diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 50772866648..ac3013424fb 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -746,7 +746,7 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } @@ -855,17 +855,25 @@ public: template ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const { - auto * times = checkAndGetColumn(arguments[0].column.get()); + auto non_const_datetime = arguments[0].column->convertToFullColumnIfConst(); + auto * times = checkAndGetColumn(non_const_datetime.get()); if (!times) return nullptr; - const ColumnConst * format_column = checkAndGetColumnConst(arguments[1].column.get()); - if (!format_column) + String format; + if (const auto * format_column = checkAndGetColumnConst(arguments[1].column.get())) + format = format_column->getValue(); + else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second ('format') argument of function {}. Must be constant string.", arguments[1].column->getName(), getName()); - String format = format_column->getValue(); + const ColumnConst * const_time_zone_column = nullptr; + const DateLUTImpl * time_zone = nullptr; + if (arguments.size() == 2) + time_zone = &extractTimeZoneFromFunctionArguments(arguments, 2, 0); + else if (arguments.size() > 2) + const_time_zone_column = checkAndGetColumnConst(arguments[2].column.get()); UInt32 scale [[maybe_unused]] = 0; if constexpr (std::is_same_v) @@ -893,15 +901,19 @@ public: String out_template; size_t out_template_size = parseFormat(format, instructions, scale, mysql_with_only_fixed_length_formatters, out_template); - const DateLUTImpl * time_zone_tmp = nullptr; if (castType(arguments[0].type.get(), [&]([[maybe_unused]] const auto & type) { return true; })) - time_zone_tmp = &extractTimeZoneFromFunctionArguments(arguments, 2, 0); + { + if (const_time_zone_column) + time_zone = &extractTimeZoneFromFunctionArguments(arguments, 2, 0); + } else if (std::is_same_v || std::is_same_v) - time_zone_tmp = &extractTimeZoneFromFunctionArguments(arguments, 2, 0); + { + if (const_time_zone_column) + time_zone = &extractTimeZoneFromFunctionArguments(arguments, 2, 0); + } else - time_zone_tmp = &DateLUT::instance(); + time_zone = &DateLUT::instance(); - const DateLUTImpl & time_zone = *time_zone_tmp; const auto & vec = times->getData(); auto col_res = ColumnString::create(); @@ -941,6 +953,13 @@ public: auto * pos = begin; for (size_t i = 0; i < vec.size(); ++i) { + if (!const_time_zone_column && arguments.size() > 2) + { + if (!arguments[2].column.get()->getDataAt(i).toString().empty()) + time_zone = &DateLUT::instance(arguments[2].column.get()->getDataAt(i).toString()); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); + } if constexpr (std::is_same_v) { auto c = DecimalUtils::split(vec[i], scale); @@ -954,12 +973,14 @@ public: } for (auto & instruction : instructions) - instruction.perform(pos, static_cast(c.whole), c.fractional, scale, time_zone); + { + instruction.perform(pos, static_cast(c.whole), c.fractional, scale, *time_zone); + } } else { for (auto & instruction : instructions) - instruction.perform(pos, static_cast(vec[i]), 0, 0, time_zone); + instruction.perform(pos, static_cast(vec[i]), 0, 0, *time_zone); } *pos++ = '\0'; diff --git a/src/Functions/fromModifiedJulianDay.cpp b/src/Functions/fromModifiedJulianDay.cpp index 8e76bb27ff1..695d1b7d63c 100644 --- a/src/Functions/fromModifiedJulianDay.cpp +++ b/src/Functions/fromModifiedJulianDay.cpp @@ -13,12 +13,12 @@ #include #include + namespace DB { namespace ErrorCodes { - extern const int CANNOT_FORMAT_DATETIME; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -56,25 +56,14 @@ namespace DB { if constexpr (nullOnErrors) { - try - { - const GregorianDate<> gd(vec_from[i]); - gd.write(write_buffer); - (*vec_null_map_to)[i] = false; - } - catch (const Exception & e) - { - if (e.code() == ErrorCodes::CANNOT_FORMAT_DATETIME) - (*vec_null_map_to)[i] = true; - else - throw; - } + GregorianDate gd; + (*vec_null_map_to)[i] = !(gd.tryInit(vec_from[i]) && gd.tryWrite(write_buffer)); writeChar(0, write_buffer); offsets_to[i] = write_buffer.count(); } else { - const GregorianDate<> gd(vec_from[i]); + GregorianDate gd(vec_from[i]); gd.write(write_buffer); writeChar(0, write_buffer); offsets_to[i] = write_buffer.count(); diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index 1e89d9b5167..e70c2e17595 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -60,9 +60,8 @@ public: { /// https://tools.ietf.org/html/rfc4122#section-4.4 - UInt128 & impl = uuid.toUnderType(); - impl.items[0] = (impl.items[0] & 0xffffffffffff0fffull) | 0x0000000000004000ull; - impl.items[1] = (impl.items[1] & 0x3fffffffffffffffull) | 0x8000000000000000ull; + UUIDHelpers::getHighBytes(uuid) = (UUIDHelpers::getHighBytes(uuid) & 0xffffffffffff0fffull) | 0x0000000000004000ull; + UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & 0x3fffffffffffffffull) | 0x8000000000000000ull; } return col_res; diff --git a/src/Functions/getTypeSerializationStreams.cpp b/src/Functions/getTypeSerializationStreams.cpp index 2b13f0f140d..da9fce70ee9 100644 --- a/src/Functions/getTypeSerializationStreams.cpp +++ b/src/Functions/getTypeSerializationStreams.cpp @@ -65,15 +65,7 @@ private: if (!arg_string) return argument.type; - try - { - DataTypePtr type = DataTypeFactory::instance().get(arg_string->getDataAt(0).toString()); - return type; - } - catch (const DB::Exception &) - { - return argument.type; - } + return DataTypeFactory::instance().get(arg_string->getDataAt(0).toString()); } }; diff --git a/src/Functions/in.cpp b/src/Functions/in.cpp index 9045ba677f2..4c55c5cb119 100644 --- a/src/Functions/in.cpp +++ b/src/Functions/in.cpp @@ -3,12 +3,10 @@ #include #include #include -#include #include #include #include #include -#include #include @@ -70,12 +68,6 @@ public: return 2; } - /// Do not use default implementation for LowCardinality. - /// For now, Set may be const or non const column, depending on how it was created. - /// But we will return UInt8 for any case. - /// TODO: we could use special implementation later. - bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return std::make_shared(); @@ -143,8 +135,6 @@ public: else columns_of_key_columns.emplace_back(left_arg); - /// Replace single LowCardinality column to it's dictionary if possible. - ColumnPtr lc_indexes = nullptr; bool is_const = false; if (columns_of_key_columns.size() == 1) { @@ -155,20 +145,10 @@ public: col = &const_col->getDataColumn(); is_const = true; } - - if (const auto * lc = typeid_cast(col)) - { - lc_indexes = lc->getIndexesPtr(); - arg.column = lc->getDictionary().getNestedColumn(); - arg.type = removeLowCardinality(arg.type); - } } auto res = set->execute(columns_of_key_columns, negative); - if (lc_indexes) - res = res->index(*lc_indexes, 0); - if (is_const) res = ColumnUInt8::create(input_rows_count, res->getUInt(0)); diff --git a/src/Functions/isNotDistinctFrom.cpp b/src/Functions/isNotDistinctFrom.cpp new file mode 100644 index 00000000000..0e91051b027 --- /dev/null +++ b/src/Functions/isNotDistinctFrom.cpp @@ -0,0 +1,27 @@ +#include + + +namespace DB +{ + +REGISTER_FUNCTION(IsNotDistinctFrom) +{ + factory.registerFunction( + FunctionDocumentation{ + .description = R"( +Performs a null-safe comparison between two values. This function will consider +two `NULL` values as identical and will return `true`, which is distinct from the usual +equals behavior where comparing two `NULL` values would return `NULL`. + +Currently, this function can only be used in the `JOIN ON` section of a query. +[example:join_on_is_not_distinct_from] +)", + .examples{ + {"join_on_is_not_distinct_from", "SELECT * FROM (SELECT NULL AS a) AS t1 JOIN (SELECT NULL AS b) AS t2 ON isNotDistinctFrom(t1.a, t2.b)", "NULL\tNULL"}, + }, + .categories = {"Comparison", "Join Operators"}, + }); + +} + +} diff --git a/src/Functions/isNotDistinctFrom.h b/src/Functions/isNotDistinctFrom.h new file mode 100644 index 00000000000..290b96b4353 --- /dev/null +++ b/src/Functions/isNotDistinctFrom.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +/** + * Performs null-safe comparison. + * equals(NULL, NULL) is NULL, while isNotDistinctFrom(NULL, NULL) is true. + * Currently, it can be used only in the JOIN ON section. + * This wrapper is needed to register function to make possible query analysis, syntax completion and so on. + */ +class FunctionIsNotDistinctFrom : public IFunction +{ +public: + static constexpr auto name = "isNotDistinctFrom"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return false; } + + size_t getNumberOfArguments() const override { return 2; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + bool useDefaultImplementationForNulls() const override { return false; } + + bool useDefaultImplementationForNothing() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared(); } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & /* arguments */, const DataTypePtr &, size_t /* rows_count */) const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} can be used only in the JOIN ON section", getName()); + } +}; + +} diff --git a/src/Functions/isZeroOrNull.cpp b/src/Functions/isZeroOrNull.cpp index bc0ac299a23..119fb2f67fd 100644 --- a/src/Functions/isZeroOrNull.cpp +++ b/src/Functions/isZeroOrNull.cpp @@ -44,14 +44,18 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & types) const override { - if (!isNumber(removeNullable(types.at(0)))) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The argument of function {} must have simple numeric type, possibly Nullable", name); + if (!isNumber(removeNullable(types.at(0))) && !isNothing(removeNullable(types.at(0)))) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "The argument of function {} must have simple numeric type, possibly Nullable or Null", name); return std::make_shared(); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (isNothing(removeNullable(arguments[0].type))) + return DataTypeUInt8{}.createColumnConst(input_rows_count, 1); + const ColumnPtr & input_column = arguments[0].column; ColumnPtr res; @@ -72,7 +76,10 @@ public: return true; })) { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The argument of function {} must have simple numeric type, possibly Nullable", name); + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "The argument of function {} must have simple numeric type, possibly Nullable or Null", + name); } } else @@ -89,7 +96,10 @@ public: return true; })) { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The argument of function {} must have simple numeric type, possibly Nullable", name); + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "The argument of function {} must have simple numeric type, possibly Nullable or Null", + name); } } diff --git a/src/Functions/keyvaluepair/impl/StateHandler.h b/src/Functions/keyvaluepair/impl/StateHandler.h index 27c1a0b44be..178974e9d36 100644 --- a/src/Functions/keyvaluepair/impl/StateHandler.h +++ b/src/Functions/keyvaluepair/impl/StateHandler.h @@ -2,7 +2,6 @@ #include -#include namespace DB { diff --git a/src/Functions/keyvaluepair/tests/gtest_extractKeyValuePairs.cpp b/src/Functions/keyvaluepair/tests/gtest_extractKeyValuePairs.cpp index 507d9c0e5c7..55a08023cbd 100644 --- a/src/Functions/keyvaluepair/tests/gtest_extractKeyValuePairs.cpp +++ b/src/Functions/keyvaluepair/tests/gtest_extractKeyValuePairs.cpp @@ -9,11 +9,11 @@ #include #include -#include #include #include #include + namespace { using namespace DB; @@ -174,5 +174,3 @@ INSTANTIATE_TEST_SUITE_P(InvalidEscapeSeqInValue, extractKVPairKeyValuePairExtra } ) ); - - diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index c3fbc08c4a9..fdab85c4640 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -398,7 +398,7 @@ namespace static Int32 daysSinceEpochFromDayOfYear(Int32 year_, Int32 day_of_year_) { if (!isDayOfYearValid(year_, day_of_year_)) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, year:{} day of year:{}", year_, day_of_year_); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, out of range (year: {} day of year: {})", year_, day_of_year_); Int32 res = daysSinceEpochFromDate(year_, 1, 1); res += day_of_year_ - 1; @@ -408,7 +408,7 @@ namespace static Int32 daysSinceEpochFromDate(Int32 year_, Int32 month_, Int32 day_) { if (!isDateValid(year_, month_, day_)) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, year:{} month:{} day:{}", year_, month_, day_); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, out of range (year: {} month: {} day_of_month: {})", year_, month_, day_); Int32 res = cumulativeYearDays[year_ - 1970]; res += isLeapYear(year_) ? cumulativeLeapDays[month_ - 1] : cumulativeDays[month_ - 1]; @@ -485,15 +485,16 @@ namespace DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - FunctionArgumentDescriptors args{ + FunctionArgumentDescriptors mandatory_args{ {"time", &isString, nullptr, "String"}, - {"format", &isString, nullptr, "String"}, + {"format", &isString, nullptr, "String"} }; - if (arguments.size() == 3) - args.emplace_back(FunctionArgumentDescriptor{"timezone", &isString, nullptr, "String"}); + FunctionArgumentDescriptors optional_args{ + {"timezone", &isString, &isColumnConst, "const String"} + }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); String time_zone_name = getTimeZone(arguments).getTimeZone(); DataTypePtr date_type = std::make_shared(time_zone_name); diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp index 36c944d16fd..9e86a70f877 100644 --- a/src/Functions/reinterpretAs.cpp +++ b/src/Functions/reinterpretAs.cpp @@ -1,26 +1,27 @@ #include -#include #include +#include #include -#include -#include -#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include -#include +#include #include +#include #include @@ -261,8 +262,10 @@ public: memcpy(static_cast(&to[i]), static_cast(&from[i]), copy_size); else { - size_t offset_to = sizeof(To) > sizeof(From) ? sizeof(To) - sizeof(From) : 0; - memcpy(reinterpret_cast(&to[i]) + offset_to, static_cast(&from[i]), copy_size); + // Handle the cases of both 128-bit representation to 256-bit and 128-bit to 64-bit or lower. + const size_t offset_from = sizeof(From) > sizeof(To) ? sizeof(From) - sizeof(To) : 0; + const size_t offset_to = sizeof(To) > sizeof(From) ? sizeof(To) - sizeof(From) : 0; + memcpy(reinterpret_cast(&to[i]) + offset_to, reinterpret_cast(&from[i]) + offset_from, copy_size); } } @@ -315,7 +318,11 @@ private: { std::string_view data = src.getDataAt(i).toView(); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ memcpy(&data_to[offset], data.data(), std::min(n, data.size())); +#else + reverseMemcpy(&data_to[offset], data.data(), std::min(n, data.size())); +#endif offset += n; } } @@ -326,7 +333,11 @@ private: ColumnFixedString::Chars & data_to = dst.getChars(); data_to.resize(n * rows); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ memcpy(data_to.data(), src.getRawData().data(), data_to.size()); +#else + reverseMemcpy(data_to.data(), src.getRawData().data(), data_to.size()); +#endif } static void NO_INLINE executeToString(const IColumn & src, ColumnString & dst) diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index d1960860308..fba8293e5ff 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -9,7 +9,8 @@ #include #include #include -#include +#include + namespace ProfileEvents { @@ -40,11 +41,17 @@ enum class FunctionSleepVariant template class FunctionSleep : public IFunction { +private: + UInt64 max_microseconds; public: static constexpr auto name = variant == FunctionSleepVariant::PerBlock ? "sleep" : "sleepEachRow"; - static FunctionPtr create(ContextPtr) + static FunctionPtr create(ContextPtr context) + { + return std::make_shared>(context->getSettingsRef().function_sleep_max_microseconds_per_block); + } + + FunctionSleep(UInt64 max_microseconds_) : max_microseconds(max_microseconds_) { - return std::make_shared>(); } /// Get the name of the function. @@ -105,13 +112,19 @@ public: if (size > 0) { /// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time. - if (seconds > 3.0) /// The choice is arbitrary - throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds)); + if (max_microseconds && seconds * 1e6 > max_microseconds) + throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is {} microseconds. Requested: {}", max_microseconds, seconds); if (!dry_run) { UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size); UInt64 microseconds = static_cast(seconds * count * 1e6); + + if (max_microseconds && microseconds > max_microseconds) + throw Exception(ErrorCodes::TOO_SLOW, + "The maximum sleep time is {} microseconds. Requested: {} microseconds per block (of size {})", + max_microseconds, microseconds, size); + sleepForMicroseconds(microseconds); ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count); ProfileEvents::increment(ProfileEvents::SleepFunctionMicroseconds, microseconds); diff --git a/src/Functions/startsWithUTF8.cpp b/src/Functions/startsWithUTF8.cpp new file mode 100644 index 00000000000..3f46916d760 --- /dev/null +++ b/src/Functions/startsWithUTF8.cpp @@ -0,0 +1,21 @@ +#include +#include +#include + + +namespace DB +{ + +using FunctionStartsWithUTF8 = FunctionStartsEndsWith; + +REGISTER_FUNCTION(StartsWithUTF8) +{ + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Returns whether string `str` starts with `prefix`, the difference between `startsWithUTF8` and `startsWith` is that `startsWithUTF8` match `str` and `suffix` by UTF-8 characters. + )", + .examples{{"startsWithUTF8", "select startsWithUTF8('富强民主文明和谐', '富强');", ""}}, + .categories{"String"}}); +} + +} diff --git a/src/Functions/structureToFormatSchema.cpp b/src/Functions/structureToFormatSchema.cpp new file mode 100644 index 00000000000..406da372c04 --- /dev/null +++ b/src/Functions/structureToFormatSchema.cpp @@ -0,0 +1,145 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +template +class FunctionStructureToFormatSchema : public IFunction +{ +public: + + static constexpr auto name = Impl::name; + explicit FunctionStructureToFormatSchema(ContextPtr context_) : context(std::move(context_)) + { + } + + static FunctionPtr create(ContextPtr ctx) + { + return std::make_shared(std::move(ctx)); + } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 1}; } + bool useDefaultImplementationForConstants() const override { return false; } + bool useDefaultImplementationForNulls() const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.empty() || arguments.size() > 2) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, expected 1 or 2", + getName(), arguments.size()); + + if (!isString(arguments[0])) + { + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of the first argument of function {}, expected constant string", + arguments[0]->getName(), + getName()); + } + + if (arguments.size() > 1 && !isString(arguments[1])) + { + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of the second argument of function {}, expected constant string", + arguments[1]->getName(), + getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + if (arguments.empty() || arguments.size() > 2) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, expected 1 or 2", + getName(), arguments.size()); + + String structure = arguments[0].column->getDataAt(0).toString(); + String message_name = arguments.size() == 2 ? arguments[1].column->getDataAt(0).toString() : "Message"; + auto columns_list = parseColumnsListFromString(structure, context); + auto col_res = ColumnString::create(); + auto & data = assert_cast(*col_res).getChars(); + WriteBufferFromVector buf(data); + Impl::writeSchema(buf, message_name, columns_list.getAll()); + buf.finalize(); + auto & offsets = assert_cast(*col_res).getOffsets(); + offsets.push_back(data.size()); + return ColumnConst::create(std::move(col_res), input_rows_count); + } + +private: + ContextPtr context; +}; + + +REGISTER_FUNCTION(StructureToCapnProtoSchema) +{ + factory.registerFunction>(FunctionDocumentation + { + .description=R"( +Function that converts ClickHouse table structure to CapnProto format schema +)", + .examples{ + {"random", "SELECT structureToCapnProtoSchema('s String, x UInt32', 'MessageName') format TSVRaw", "struct MessageName\n" +"{\n" +" s @0 : Data;\n" +" x @1 : UInt32;\n" +"}"}, + }, + .categories{"Other"} + }, + FunctionFactory::CaseSensitive); +} + + +REGISTER_FUNCTION(StructureToProtobufSchema) +{ + factory.registerFunction>(FunctionDocumentation + { + .description=R"( +Function that converts ClickHouse table structure to Protobuf format schema +)", + .examples{ + {"random", "SELECT structureToCapnProtoSchema('s String, x UInt32', 'MessageName') format TSVRaw", "syntax = \"proto3\";\n" +"\n" +"message MessageName\n" +"{\n" +" bytes s = 1;\n" +" uint32 x = 2;\n" +"}"}, + }, + .categories{"Other"} + }, + FunctionFactory::CaseSensitive); +} + +} diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp new file mode 100644 index 00000000000..5f3f054b624 --- /dev/null +++ b/src/Functions/substringIndex.cpp @@ -0,0 +1,302 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + + template + class FunctionSubstringIndex : public IFunction + { + public: + static constexpr auto name = is_utf8 ? "substringIndexUTF8" : "substringIndex"; + + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 3; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, String expected", + arguments[0]->getName(), + getName()); + + if (!isString(arguments[1])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}, String expected", + arguments[1]->getName(), + getName()); + + if (!isNativeInteger(arguments[2])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of third argument of function {}, Integer expected", + arguments[2]->getName(), + getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + ColumnPtr column_string = arguments[0].column; + ColumnPtr column_delim = arguments[1].column; + ColumnPtr column_count = arguments[2].column; + + const ColumnConst * column_delim_const = checkAndGetColumnConst(column_delim.get()); + if (!column_delim_const) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument to {} must be a constant String", getName()); + + String delim = column_delim_const->getValue(); + if constexpr (!is_utf8) + { + if (delim.size() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single character", getName()); + } + else + { + if (UTF8::countCodePoints(reinterpret_cast(delim.data()), delim.size()) != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single UTF-8 character", getName()); + } + + auto column_res = ColumnString::create(); + ColumnString::Chars & vec_res = column_res->getChars(); + ColumnString::Offsets & offsets_res = column_res->getOffsets(); + + const ColumnConst * column_string_const = checkAndGetColumnConst(column_string.get()); + if (column_string_const) + { + String str = column_string_const->getValue(); + constantVector(str, delim, column_count.get(), vec_res, offsets_res); + } + else + { + const auto * col_str = checkAndGetColumn(column_string.get()); + if (!col_str) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument to {} must be a String", getName()); + + bool is_count_const = isColumnConst(*column_count); + if (is_count_const) + { + Int64 count = column_count->getInt(0); + vectorConstant(col_str, delim, count, vec_res, offsets_res); + } + else + vectorVector(col_str, delim, column_count.get(), vec_res, offsets_res); + } + return column_res; + } + + protected: + static void vectorVector( + const ColumnString * str_column, + const String & delim, + const IColumn * count_column, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + size_t rows = str_column->size(); + res_data.reserve(str_column->getChars().size() / 2); + res_offsets.reserve(rows); + + std::unique_ptr searcher + = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size()); + + for (size_t i = 0; i < rows; ++i) + { + StringRef str_ref = str_column->getDataAt(i); + Int64 count = count_column->getInt(i); + + StringRef res_ref; + if constexpr (!is_utf8) + res_ref = substringIndex(str_ref, delim[0], count); + else + res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count); + + appendToResultColumn(res_ref, res_data, res_offsets); + } + } + + static void vectorConstant( + const ColumnString * str_column, + const String & delim, + Int64 count, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + size_t rows = str_column->size(); + res_data.reserve(str_column->getChars().size() / 2); + res_offsets.reserve(rows); + + std::unique_ptr searcher + = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size()); + + for (size_t i = 0; i < rows; ++i) + { + StringRef str_ref = str_column->getDataAt(i); + + StringRef res_ref; + if constexpr (!is_utf8) + res_ref = substringIndex(str_ref, delim[0], count); + else + res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count); + + appendToResultColumn(res_ref, res_data, res_offsets); + } + } + + static void constantVector( + const String & str, + const String & delim, + const IColumn * count_column, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + size_t rows = count_column->size(); + res_data.reserve(str.size() * rows / 2); + res_offsets.reserve(rows); + + std::unique_ptr searcher + = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size()); + + StringRef str_ref{str.data(), str.size()}; + for (size_t i = 0; i < rows; ++i) + { + Int64 count = count_column->getInt(i); + + StringRef res_ref; + if constexpr (!is_utf8) + res_ref = substringIndex(str_ref, delim[0], count); + else + res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count); + + appendToResultColumn(res_ref, res_data, res_offsets); + } + } + + static void appendToResultColumn(const StringRef & res_ref, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) + { + size_t res_offset = res_data.size(); + res_data.resize(res_offset + res_ref.size + 1); + memcpy(&res_data[res_offset], res_ref.data, res_ref.size); + res_offset += res_ref.size; + res_data[res_offset] = 0; + ++res_offset; + + res_offsets.emplace_back(res_offset); + } + + static StringRef substringIndexUTF8( + const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 count) + { + if (count == 0) + return {str_ref.data, 0}; + + const auto * begin = reinterpret_cast(str_ref.data); + const auto * end = reinterpret_cast(str_ref.data + str_ref.size); + const auto * pos = begin; + if (count > 0) + { + Int64 i = 0; + while (i < count) + { + pos = searcher->search(pos, end - pos); + + if (pos != end) + { + pos += delim.size(); + ++i; + } + else + return str_ref; + } + return {begin, static_cast(pos - begin - delim.size())}; + } + else + { + Int64 total = 0; + while (pos < end && end != (pos = searcher->search(pos, end - pos))) + { + pos += delim.size(); + ++total; + } + + if (total + count < 0) + return str_ref; + + pos = begin; + Int64 i = 0; + Int64 count_from_left = total + 1 + count; + while (i < count_from_left && pos < end && end != (pos = searcher->search(pos, end - pos))) + { + pos += delim.size(); + ++i; + } + return {pos, static_cast(end - pos)}; + } + } + + static StringRef substringIndex(const StringRef & str_ref, char delim, Int64 count) + { + if (count == 0) + return {str_ref.data, 0}; + + const auto * pos = count > 0 ? str_ref.data : str_ref.data + str_ref.size - 1; + const auto * end = count > 0 ? str_ref.data + str_ref.size : str_ref.data - 1; + int d = count > 0 ? 1 : -1; + + for (; count; pos += d) + { + if (pos == end) + return str_ref; + if (*pos == delim) + count -= d; + } + pos -= d; + return { + d > 0 ? str_ref.data : pos + 1, static_cast(d > 0 ? pos - str_ref.data : str_ref.data + str_ref.size - pos - 1)}; + } + }; +} + + +REGISTER_FUNCTION(SubstringIndex) +{ + factory.registerFunction>(); /// substringIndex + factory.registerFunction>(); /// substringIndexUTF8 + + factory.registerAlias("SUBSTRING_INDEX", "substringIndex", FunctionFactory::CaseInsensitive); +} + + +} diff --git a/src/Functions/toDayOfMonth.cpp b/src/Functions/toDayOfMonth.cpp index d7689ef00f2..c20b0b75797 100644 --- a/src/Functions/toDayOfMonth.cpp +++ b/src/Functions/toDayOfMonth.cpp @@ -13,7 +13,7 @@ REGISTER_FUNCTION(ToDayOfMonth) { factory.registerFunction(); - /// MysQL compatibility alias. + /// MySQL compatibility alias. factory.registerAlias("DAY", "toDayOfMonth", FunctionFactory::CaseInsensitive); factory.registerAlias("DAYOFMONTH", "toDayOfMonth", FunctionFactory::CaseInsensitive); } diff --git a/src/Functions/toDayOfWeek.cpp b/src/Functions/toDayOfWeek.cpp index 06343714b9d..dc508d70814 100644 --- a/src/Functions/toDayOfWeek.cpp +++ b/src/Functions/toDayOfWeek.cpp @@ -12,7 +12,7 @@ REGISTER_FUNCTION(ToDayOfWeek) { factory.registerFunction(); - /// MysQL compatibility alias. + /// MySQL compatibility alias. factory.registerAlias("DAYOFWEEK", "toDayOfWeek", FunctionFactory::CaseInsensitive); } diff --git a/src/Functions/toDayOfYear.cpp b/src/Functions/toDayOfYear.cpp index 8b03f1a4211..0cbafd6275a 100644 --- a/src/Functions/toDayOfYear.cpp +++ b/src/Functions/toDayOfYear.cpp @@ -13,7 +13,7 @@ REGISTER_FUNCTION(ToDayOfYear) { factory.registerFunction(); - /// MysQL compatibility alias. + /// MySQL compatibility alias. factory.registerAlias("DAYOFYEAR", "toDayOfYear", FunctionFactory::CaseInsensitive); } diff --git a/src/Functions/toDaysSinceYearZero.cpp b/src/Functions/toDaysSinceYearZero.cpp new file mode 100644 index 00000000000..e569c5cb1f3 --- /dev/null +++ b/src/Functions/toDaysSinceYearZero.cpp @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +/** Returns number of days passed since 0000-01-01 */ +class FunctionToDaysSinceYearZero : public IFunction +{ + using ResultType = DataTypeUInt32; +public: + static constexpr auto name = "toDaysSinceYearZero"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + + explicit FunctionToDaysSinceYearZero(ContextPtr /*context*/) {} + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors mandatory_args{ + {"date", &isDateOrDate32, nullptr, "Date or Date32"} + }; + + validateFunctionArgumentTypes(*this, arguments, mandatory_args); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + const IDataType * from_type = arguments[0].type.get(); + WhichDataType which(from_type); + + if (which.isDate()) + return DateTimeTransformImpl::execute(arguments, result_type, input_rows_count); + else if (which.isDate32()) + return DateTimeTransformImpl::execute(arguments, result_type, input_rows_count); + + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}", + arguments[0].type->getName(), this->getName()); + } +}; + +} + +REGISTER_FUNCTION(ToDaysSinceYearZero) +{ + factory.registerFunction( + FunctionDocumentation{ + .description=R"( +Returns for a given date, the number of days passed since 1 January 0000 in the proleptic Gregorian calendar defined by ISO 8601. +The calculation is the same as in MySQL's TO_DAYS() function. +)", + .examples{ + {"typical", "SELECT toDaysSinceYearZero(toDate('2023-09-08'))", "713569"}}, + .categories{"Dates and Times"} + }); + + /// MySQL compatibility alias. + factory.registerAlias("TO_DAYS", FunctionToDaysSinceYearZero::name, FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/toHour.cpp b/src/Functions/toHour.cpp index a6a57946e33..fc9ec657adf 100644 --- a/src/Functions/toHour.cpp +++ b/src/Functions/toHour.cpp @@ -13,7 +13,7 @@ REGISTER_FUNCTION(ToHour) { factory.registerFunction(); - /// MysQL compatibility alias. + /// MySQL compatibility alias. factory.registerAlias("HOUR", "toHour", FunctionFactory::CaseInsensitive); } diff --git a/src/Functions/toMinute.cpp b/src/Functions/toMinute.cpp index 25939870554..162ecb282df 100644 --- a/src/Functions/toMinute.cpp +++ b/src/Functions/toMinute.cpp @@ -13,7 +13,7 @@ REGISTER_FUNCTION(ToMinute) { factory.registerFunction(); - /// MysQL compatibility alias. + /// MySQL compatibility alias. factory.registerAlias("MINUTE", "toMinute", FunctionFactory::CaseInsensitive); } diff --git a/src/Functions/toModifiedJulianDay.cpp b/src/Functions/toModifiedJulianDay.cpp index 0d854bcc110..907c7570ce2 100644 --- a/src/Functions/toModifiedJulianDay.cpp +++ b/src/Functions/toModifiedJulianDay.cpp @@ -17,8 +17,6 @@ namespace DB { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; - extern const int CANNOT_PARSE_DATE; } template @@ -78,27 +76,18 @@ namespace DB if constexpr (nullOnErrors) { - try - { - const GregorianDate<> date(read_buffer); - vec_to[i] = date.toModifiedJulianDay(); - vec_null_map_to[i] = false; - } - catch (const Exception & e) - { - if (e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || e.code() == ErrorCodes::CANNOT_PARSE_DATE) - { - vec_to[i] = static_cast(0); - vec_null_map_to[i] = true; - } - else - throw; - } + GregorianDate date; + + int64_t res = 0; + bool success = date.tryInit(read_buffer) && date.tryToModifiedJulianDay(res); + + vec_to[i] = static_cast(res); + vec_null_map_to[i] = !success; } else { - const GregorianDate<> date(read_buffer); - vec_to[i] = date.toModifiedJulianDay(); + const GregorianDate date(read_buffer); + vec_to[i] = static_cast(date.toModifiedJulianDay()); } } diff --git a/src/Functions/toMonth.cpp b/src/Functions/toMonth.cpp index 783a1341e23..422f21e7df8 100644 --- a/src/Functions/toMonth.cpp +++ b/src/Functions/toMonth.cpp @@ -12,7 +12,7 @@ using FunctionToMonth = FunctionDateOrDateTimeToSomething(); - /// MysQL compatibility alias. + /// MySQL compatibility alias. factory.registerAlias("MONTH", "toMonth", FunctionFactory::CaseInsensitive); } diff --git a/src/Functions/toQuarter.cpp b/src/Functions/toQuarter.cpp index 2268b6402c6..3c301095ff2 100644 --- a/src/Functions/toQuarter.cpp +++ b/src/Functions/toQuarter.cpp @@ -12,7 +12,7 @@ using FunctionToQuarter = FunctionDateOrDateTimeToSomething(); - /// MysQL compatibility alias. + /// MySQL compatibility alias. factory.registerAlias("QUARTER", "toQuarter", FunctionFactory::CaseInsensitive); } diff --git a/src/Functions/toSecond.cpp b/src/Functions/toSecond.cpp index 2fd64912c0f..372097fd488 100644 --- a/src/Functions/toSecond.cpp +++ b/src/Functions/toSecond.cpp @@ -13,7 +13,7 @@ REGISTER_FUNCTION(ToSecond) { factory.registerFunction(); - /// MysQL compatibility alias. + /// MySQL compatibility alias. factory.registerAlias("SECOND", "toSecond", FunctionFactory::CaseInsensitive); } diff --git a/src/Functions/toValidUTF8.cpp b/src/Functions/toValidUTF8.cpp index 528cef93dd3..41d29d9c494 100644 --- a/src/Functions/toValidUTF8.cpp +++ b/src/Functions/toValidUTF8.cpp @@ -7,6 +7,8 @@ #include +#include + #ifdef __SSE2__ # include #endif @@ -73,16 +75,13 @@ struct ToValidUTF8Impl /// Fast skip of ASCII for aarch64. static constexpr size_t SIMD_BYTES = 16; const char * simd_end = p + (end - p) / SIMD_BYTES * SIMD_BYTES; - /// Returns a 64 bit mask of nibbles (4 bits for each byte). - auto get_nibble_mask = [](uint8x16_t input) -> uint64_t - { return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(input), 4)), 0); }; /// Other options include /// vmaxvq_u8(input) < 0b10000000; /// Used by SIMDJSON, has latency 3 for M1, 6 for everything else /// SIMDJSON uses it for 64 byte masks, so it's a little different. /// vmaxvq_u32(vandq_u32(input, vdupq_n_u32(0x80808080))) // u32 version has latency 3 /// shrn version has universally <=3 cycles, on servers 2 cycles. - while (p < simd_end && get_nibble_mask(vcgeq_u8(vld1q_u8(reinterpret_cast(p)), vdupq_n_u8(0x80))) == 0) + while (p < simd_end && getNibbleMask(vcgeq_u8(vld1q_u8(reinterpret_cast(p)), vdupq_n_u8(0x80))) == 0) p += SIMD_BYTES; if (!(p < end)) diff --git a/src/Functions/toYear.cpp b/src/Functions/toYear.cpp index 9cf2a260921..75479adb82c 100644 --- a/src/Functions/toYear.cpp +++ b/src/Functions/toYear.cpp @@ -13,7 +13,7 @@ REGISTER_FUNCTION(ToYear) { factory.registerFunction(); - /// MysQL compatibility alias. + /// MySQL compatibility alias. factory.registerAlias("YEAR", "toYear", FunctionFactory::CaseInsensitive); } diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 1fc0e3adf96..b7582b37017 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -156,14 +156,27 @@ namespace { initialize(arguments, result_type); - const auto * in = arguments.front().column.get(); + const auto * in = arguments[0].column.get(); if (isColumnConst(*in)) return executeConst(arguments, result_type, input_rows_count); ColumnPtr default_non_const; if (!cache.default_column && arguments.size() == 4) + { default_non_const = castColumn(arguments[3], result_type); + if (in->size() > default_non_const->size()) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Fourth argument of function {} must be a constant or a column at least as big as the second and third arguments", + getName()); + } + } + + ColumnPtr in_casted = arguments[0].column; + if (arguments.size() == 3) + in_casted = castColumn(arguments[0], result_type); auto column_result = result_type->createColumn(); if (cache.is_empty) @@ -174,30 +187,30 @@ namespace } else if (cache.table_num_to_idx) { - if (!executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const)) + if (!executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted)) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName()); } } else if (cache.table_string_to_idx) { - if (!executeString(in, *column_result, default_non_const)) - executeContiguous(in, *column_result, default_non_const); + if (!executeString(in, *column_result, default_non_const, *in_casted)) + executeContiguous(in, *column_result, default_non_const, *in_casted); } else if (cache.table_anything_to_idx) { - executeAnything(in, *column_result, default_non_const); + executeAnything(in, *column_result, default_non_const, *in_casted); } else throw Exception(ErrorCodes::LOGICAL_ERROR, "State of the function `transform` is not initialized"); @@ -218,7 +231,7 @@ namespace return impl->execute(args, result_type, input_rows_count); } - void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const) const + void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const { const size_t size = in->size(); const auto & table = *cache.table_anything_to_idx; @@ -236,11 +249,11 @@ namespace else if (default_non_const) column_result.insertFrom(*default_non_const, i); else - column_result.insertFrom(*in, i); + column_result.insertFrom(in_casted, i); } } - void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const) const + void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const { const size_t size = in->size(); const auto & table = *cache.table_string_to_idx; @@ -255,12 +268,12 @@ namespace else if (default_non_const) column_result.insertFrom(*default_non_const, i); else - column_result.insertFrom(*in, i); + column_result.insertFrom(in_casted, i); } } template - bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const + bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const { const auto * const in = checkAndGetColumn(in_untyped); if (!in) @@ -297,7 +310,7 @@ namespace else if (default_non_const) column_result.insertFrom(*default_non_const, i); else - column_result.insertFrom(*in, i); + column_result.insertFrom(in_casted, i); } } return true; @@ -451,7 +464,7 @@ namespace } } - bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const + bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const { const auto * const in = checkAndGetColumn(in_untyped); if (!in) @@ -486,9 +499,9 @@ namespace else if (cache.default_column) column_result.insertFrom(*cache.default_column, 0); else if (default_non_const) - column_result.insertFrom(*default_non_const, 0); + column_result.insertFrom(*default_non_const, i); else - column_result.insertFrom(*in, i); + column_result.insertFrom(in_casted, i); } } return true; @@ -654,13 +667,13 @@ namespace std::unique_ptr table_string_to_idx; std::unique_ptr table_anything_to_idx; - bool is_empty = false; - ColumnPtr from_column; ColumnPtr to_column; ColumnPtr default_column; - std::atomic initialized{false}; + bool is_empty = false; + bool initialized = false; + std::mutex mutex; }; @@ -693,13 +706,12 @@ namespace /// Can be called from different threads. It works only on the first call. void initialize(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const { + std::lock_guard lock(cache.mutex); if (cache.initialized) return; const DataTypePtr & from_type = arguments[0].type; - std::lock_guard lock(cache.mutex); - if (from_type->onlyNull()) { cache.is_empty = true; @@ -761,9 +773,8 @@ namespace } /// Note: Doesn't check the duplicates in the `from` array. - - WhichDataType which(from_type); - if (isNativeNumber(which) || which.isDecimal32() || which.isDecimal64()) + /// Field may be of Float type, but for the purpose of bitwise equality we can treat them as UInt64 + if (WhichDataType which(from_type); isNativeNumber(which) || which.isDecimal32() || which.isDecimal64()) { cache.table_num_to_idx = std::make_unique(); auto & table = *cache.table_num_to_idx; @@ -771,10 +782,17 @@ namespace { if (applyVisitor(FieldVisitorAccurateEquals(), (*cache.from_column)[i], (*from_column_uncasted)[i])) { - /// Field may be of Float type, but for the purpose of bitwise equality we can treat them as UInt64 - StringRef ref = cache.from_column->getDataAt(i); UInt64 key = 0; - memcpy(&key, ref.data, ref.size); + auto * dst = reinterpret_cast(&key); + const auto ref = cache.from_column->getDataAt(i); + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" + if constexpr (std::endian::native == std::endian::big) + dst += sizeof(key) - ref.size; +#pragma clang diagnostic pop + + memcpy(dst, ref.data, ref.size); table[key] = i; } } diff --git a/src/Functions/translate.cpp b/src/Functions/translate.cpp index 83779eee23c..836cb4de2f3 100644 --- a/src/Functions/translate.cpp +++ b/src/Functions/translate.cpp @@ -6,6 +6,8 @@ #include #include #include +#include + namespace DB { diff --git a/src/Functions/tuple.cpp b/src/Functions/tuple.cpp index f6c2831365f..c83195bc976 100644 --- a/src/Functions/tuple.cpp +++ b/src/Functions/tuple.cpp @@ -1,87 +1,7 @@ -#include -#include -#include -#include -#include - +#include namespace DB { -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - -namespace -{ - -/** tuple(x, y, ...) is a function that allows you to group several columns - * tupleElement(tuple, n) is a function that allows you to retrieve a column from tuple. - */ - -class FunctionTuple : public IFunction -{ -public: - static constexpr auto name = "tuple"; - - static FunctionPtr create(ContextPtr) - { - return std::make_shared(); - } - - String getName() const override - { - return name; - } - - bool isVariadic() const override - { - return true; - } - - size_t getNumberOfArguments() const override - { - return 0; - } - - bool isInjective(const ColumnsWithTypeAndName &) const override - { - return true; - } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - bool useDefaultImplementationForNulls() const override { return false; } - /// tuple(..., Nothing, ...) -> Tuple(..., Nothing, ...) - bool useDefaultImplementationForNothing() const override { return false; } - bool useDefaultImplementationForConstants() const override { return true; } - bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); - - return std::make_shared(arguments); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - size_t tuple_size = arguments.size(); - Columns tuple_columns(tuple_size); - for (size_t i = 0; i < tuple_size; ++i) - { - /** If tuple is mixed of constant and not constant columns, - * convert all to non-constant columns, - * because many places in code expect all non-constant columns in non-constant tuple. - */ - tuple_columns[i] = arguments[i].column->convertToFullColumnIfConst(); - } - return ColumnTuple::create(tuple_columns); - } -}; - -} REGISTER_FUNCTION(Tuple) { diff --git a/src/Functions/tuple.h b/src/Functions/tuple.h new file mode 100644 index 00000000000..cc616f5df8a --- /dev/null +++ b/src/Functions/tuple.h @@ -0,0 +1,70 @@ +#pragma once + +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +/** tuple(x, y, ...) is a function that allows you to group several columns + * tupleElement(tuple, n) is a function that allows you to retrieve a column from tuple. + */ +class FunctionTuple : public IFunction +{ +public: + static constexpr auto name = "tuple"; + + /// maybe_unused: false-positive + [[ maybe_unused ]] static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + + size_t getNumberOfArguments() const override { return 0; } + + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + bool useDefaultImplementationForNulls() const override { return false; } + + /// tuple(..., Nothing, ...) -> Tuple(..., Nothing, ...) + bool useDefaultImplementationForNothing() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); + + return std::make_shared(arguments); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + size_t tuple_size = arguments.size(); + Columns tuple_columns(tuple_size); + for (size_t i = 0; i < tuple_size; ++i) + { + /** If tuple is mixed of constant and not constant columns, + * convert all to non-constant columns, + * because many places in code expect all non-constant columns in non-constant tuple. + */ + tuple_columns[i] = arguments[i].column->convertToFullColumnIfConst(); + } + return ColumnTuple::create(tuple_columns); + } +}; + +} diff --git a/src/Functions/tupleConcat.cpp b/src/Functions/tupleConcat.cpp new file mode 100644 index 00000000000..0556f4181e6 --- /dev/null +++ b/src/Functions/tupleConcat.cpp @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_COLUMN; +} + +/// tupleConcat(tup1, ...) - concatenate tuples. +class FunctionTupleConcat : public IFunction +{ +public: + static constexpr auto name = "tupleConcat"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + + size_t getNumberOfArguments() const override { return 0; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} requires at least one argument.", + getName()); + + DataTypes tuple_arg_types; + + for (const auto arg_idx : collections::range(0, arguments.size())) + { + const auto * arg = arguments[arg_idx].get(); + if (!isTuple(arg)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}", + arg->getName(), + arg_idx + 1, + getName()); + + const auto * type = checkAndGetDataType(arg); + for (const auto & elem : type->getElements()) + tuple_arg_types.push_back(elem); + } + + return std::make_shared(tuple_arg_types); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + { + const size_t num_arguments = arguments.size(); + Columns columns; + + for (size_t i = 0; i < num_arguments; i++) + { + const DataTypeTuple * arg_type = checkAndGetDataType(arguments[i].type.get()); + + if (!arg_type) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}", + arguments[i].type->getName(), + i + 1, + getName()); + + ColumnPtr arg_col = arguments[i].column->convertToFullColumnIfConst(); + const ColumnTuple * tuple_col = checkAndGetColumn(arg_col.get()); + + if (!tuple_col) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of argument of function {}", + arguments[i].column->getName(), + getName()); + + for (const auto & inner_col : tuple_col->getColumns()) + columns.push_back(inner_col); + } + + return ColumnTuple::create(columns); + } +}; + +REGISTER_FUNCTION(TupleConcat) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/tupleHammingDistance.cpp b/src/Functions/tupleHammingDistance.cpp index adc063bfa81..ffdf8c93f15 100644 --- a/src/Functions/tupleHammingDistance.cpp +++ b/src/Functions/tupleHammingDistance.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -86,7 +85,7 @@ public: auto plus_elem = plus->build({left_type, right_type}); res_type = plus_elem->getResultType(); } - catch (DB::Exception & e) + catch (Exception & e) { e.addMessage("While executing function {} for tuple element {}", getName(), i); throw; diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp index db907af972d..35ba49e4545 100644 --- a/src/Functions/vectorFunctions.cpp +++ b/src/Functions/vectorFunctions.cpp @@ -23,6 +23,9 @@ struct PlusName { static constexpr auto name = "plus"; }; struct MinusName { static constexpr auto name = "minus"; }; struct MultiplyName { static constexpr auto name = "multiply"; }; struct DivideName { static constexpr auto name = "divide"; }; +struct ModuloName { static constexpr auto name = "modulo"; }; +struct IntDivName { static constexpr auto name = "intDiv"; }; +struct IntDivOrZeroName { static constexpr auto name = "intDivOrZero"; }; struct L1Label { static constexpr auto name = "1"; }; struct L2Label { static constexpr auto name = "2"; }; @@ -95,7 +98,7 @@ public: auto elem_func = func->build(ColumnsWithTypeAndName{left, right}); types[i] = elem_func->getResultType(); } - catch (DB::Exception & e) + catch (Exception & e) { e.addMessage("While executing function {} for tuple element {}", getName(), i); throw; @@ -141,6 +144,12 @@ using FunctionTupleMultiply = FunctionTupleOperator; using FunctionTupleDivide = FunctionTupleOperator; +using FunctionTupleModulo = FunctionTupleOperator; + +using FunctionTupleIntDiv = FunctionTupleOperator; + +using FunctionTupleIntDivOrZero = FunctionTupleOperator; + class FunctionTupleNegate : public ITupleFunction { public: @@ -181,7 +190,7 @@ public: auto elem_negate = negate->build(ColumnsWithTypeAndName{cur}); types[i] = elem_negate->getResultType(); } - catch (DB::Exception & e) + catch (Exception & e) { e.addMessage("While executing function {} for tuple element {}", getName(), i); throw; @@ -258,7 +267,7 @@ public: auto elem_func = func->build(ColumnsWithTypeAndName{cur, p_column}); types[i] = elem_func->getResultType(); } - catch (DB::Exception & e) + catch (Exception & e) { e.addMessage("While executing function {} for tuple element {}", getName(), i); throw; @@ -297,6 +306,12 @@ using FunctionTupleMultiplyByNumber = FunctionTupleOperatorByNumber; +using FunctionTupleModuloByNumber = FunctionTupleOperatorByNumber; + +using FunctionTupleIntDivByNumber = FunctionTupleOperatorByNumber; + +using FunctionTupleIntDivOrZeroByNumber = FunctionTupleOperatorByNumber; + class FunctionDotProduct : public ITupleFunction { public: @@ -363,7 +378,7 @@ public: auto plus_elem = plus->build({left_type, right_type}); res_type = plus_elem->getResultType(); } - catch (DB::Exception & e) + catch (Exception & e) { e.addMessage("While executing function {} for tuple element {}", getName(), i); throw; @@ -467,7 +482,7 @@ public: auto plus_elem = plus->build({left, right}); res_type = plus_elem->getResultType(); } - catch (DB::Exception & e) + catch (Exception & e) { e.addMessage("While executing function {} for tuple element {}", getName(), i); throw; @@ -740,7 +755,7 @@ public: auto plus_elem = plus->build({left_type, right_type}); res_type = plus_elem->getResultType(); } - catch (DB::Exception & e) + catch (Exception & e) { e.addMessage("While executing function {} for tuple element {}", getName(), i); throw; @@ -842,7 +857,7 @@ public: auto plus_elem = plus->build({left_type, right_type}); res_type = plus_elem->getResultType(); } - catch (DB::Exception & e) + catch (Exception & e) { e.addMessage("While executing function {} for tuple element {}", getName(), i); throw; @@ -993,7 +1008,7 @@ public: auto max_elem = max->build({left_type, right_type}); res_type = max_elem->getResultType(); } - catch (DB::Exception & e) + catch (Exception & e) { e.addMessage("While executing function {} for tuple element {}", getName(), i); throw; @@ -1103,7 +1118,7 @@ public: auto plus_elem = plus->build({left_type, right_type}); res_type = plus_elem->getResultType(); } - catch (DB::Exception & e) + catch (Exception & e) { e.addMessage("While executing function {} for tuple element {}", getName(), i); throw; @@ -1563,6 +1578,9 @@ REGISTER_FUNCTION(VectorFunctions) factory.registerAlias("vectorDifference", FunctionTupleMinus::name, FunctionFactory::CaseInsensitive); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(FunctionDocumentation @@ -1626,6 +1644,9 @@ If the types of the first interval (or the interval in the tuple) and the second factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerAlias("scalarProduct", TupleOrArrayFunctionDotProduct::name, FunctionFactory::CaseInsensitive); diff --git a/src/IO/Archives/ArchiveUtils.h b/src/IO/Archives/ArchiveUtils.h new file mode 100644 index 00000000000..810b9d8d730 --- /dev/null +++ b/src/IO/Archives/ArchiveUtils.h @@ -0,0 +1,14 @@ +#pragma once + +#include "config.h" + +#if USE_LIBARCHIVE + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" + +#include +#include +#endif +#endif diff --git a/src/IO/Archives/IArchiveReader.h b/src/IO/Archives/IArchiveReader.h index 37629cd7eed..84a1dc21f5b 100644 --- a/src/IO/Archives/IArchiveReader.h +++ b/src/IO/Archives/IArchiveReader.h @@ -40,18 +40,26 @@ public: virtual bool nextFile() = 0; }; + virtual const std::string & getPath() const = 0; + /// Starts enumerating files in the archive. virtual std::unique_ptr firstFile() = 0; + using NameFilter = std::function; + /// Starts reading a file from the archive. The function returns a read buffer, /// you can read that buffer to extract uncompressed data from the archive. /// Several read buffers can be used at the same time in parallel. - virtual std::unique_ptr readFile(const String & filename) = 0; + virtual std::unique_ptr readFile(const String & filename, bool throw_on_not_found) = 0; + virtual std::unique_ptr readFile(NameFilter filter, bool throw_on_not_found) = 0; /// It's possible to convert a file enumerator to a read buffer and vice versa. virtual std::unique_ptr readFile(std::unique_ptr enumerator) = 0; virtual std::unique_ptr nextFile(std::unique_ptr read_buffer) = 0; + virtual std::vector getAllFiles() = 0; + virtual std::vector getAllFiles(NameFilter filter) = 0; + /// Sets password used to decrypt files in the archive. virtual void setPassword(const String & /* password */) {} diff --git a/src/IO/Archives/LibArchiveReader.cpp b/src/IO/Archives/LibArchiveReader.cpp new file mode 100644 index 00000000000..a411b4bb4b6 --- /dev/null +++ b/src/IO/Archives/LibArchiveReader.cpp @@ -0,0 +1,354 @@ +#include +#include +#include +#include + +#include + +#include + +namespace DB +{ + +#if USE_LIBARCHIVE + +namespace ErrorCodes +{ + extern const int CANNOT_UNPACK_ARCHIVE; + extern const int LOGICAL_ERROR; + extern const int CANNOT_READ_ALL_DATA; + extern const int UNSUPPORTED_METHOD; +} + +class LibArchiveReader::Handle +{ +public: + explicit Handle(std::string path_to_archive_, bool lock_on_reading_) + : path_to_archive(path_to_archive_), lock_on_reading(lock_on_reading_) + { + current_archive = open(path_to_archive); + } + + Handle(const Handle &) = delete; + Handle(Handle && other) noexcept + : current_archive(other.current_archive) + , current_entry(other.current_entry) + , lock_on_reading(other.lock_on_reading) + { + other.current_archive = nullptr; + other.current_entry = nullptr; + } + + ~Handle() + { + close(current_archive); + } + + bool locateFile(const std::string & filename) + { + return locateFile([&](const std::string & file) { return file == filename; }); + } + + bool locateFile(NameFilter filter) + { + resetFileInfo(); + int err = ARCHIVE_OK; + while (true) + { + err = readNextHeader(current_archive, ¤t_entry); + + if (err == ARCHIVE_RETRY) + continue; + + if (err != ARCHIVE_OK) + break; + + if (filter(archive_entry_pathname(current_entry))) + return true; + } + + checkError(err); + return false; + } + + bool nextFile() + { + resetFileInfo(); + int err = ARCHIVE_OK; + do + { + err = readNextHeader(current_archive, ¤t_entry); + } while (err == ARCHIVE_RETRY); + + checkError(err); + return err == ARCHIVE_OK; + } + + std::vector getAllFiles(NameFilter filter) + { + auto * archive = open(path_to_archive); + SCOPE_EXIT( + close(archive); + ); + + struct archive_entry * entry = nullptr; + + std::vector files; + int error = readNextHeader(archive, &entry); + while (error == ARCHIVE_OK || error == ARCHIVE_RETRY) + { + chassert(entry != nullptr); + std::string name = archive_entry_pathname(entry); + if (!filter || filter(name)) + files.push_back(std::move(name)); + + error = readNextHeader(archive, &entry); + } + + checkError(error); + return files; + } + + const String & getFileName() const + { + chassert(current_entry); + if (!file_name) + file_name.emplace(archive_entry_pathname(current_entry)); + + return *file_name; + } + + const FileInfo & getFileInfo() const + { + chassert(current_entry); + if (!file_info) + { + file_info.emplace(); + file_info->uncompressed_size = archive_entry_size(current_entry); + file_info->compressed_size = archive_entry_size(current_entry); + file_info->is_encrypted = false; + } + + return *file_info; + } + + struct archive * current_archive; + struct archive_entry * current_entry = nullptr; +private: + void checkError(int error) const + { + if (error == ARCHIVE_FATAL) + throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Failed to read archive while fetching all files: {}", archive_error_string(current_archive)); + } + + void resetFileInfo() + { + file_name.reset(); + file_info.reset(); + } + + static struct archive * open(const String & path_to_archive) + { + auto * archive = archive_read_new(); + try + { + archive_read_support_filter_all(archive); + archive_read_support_format_all(archive); + if (archive_read_open_filename(archive, path_to_archive.c_str(), 10240) != ARCHIVE_OK) + throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open archive {}: {}", quoteString(path_to_archive), archive_error_string(archive)); + } + catch (...) + { + close(archive); + throw; + } + + return archive; + } + + static void close(struct archive * archive) + { + if (archive) + { + archive_read_close(archive); + archive_read_free(archive); + } + } + + int readNextHeader(struct archive * archive, struct archive_entry ** entry) const + { + std::unique_lock lock(Handle::read_lock, std::defer_lock); + if (lock_on_reading) + lock.lock(); + + return archive_read_next_header(archive, entry); + } + + const String path_to_archive; + + /// for some archive types when we are reading headers static variables are used + /// which are not thread-safe + const bool lock_on_reading; + static inline std::mutex read_lock; + + mutable std::optional file_name; + mutable std::optional file_info; +}; + +class LibArchiveReader::FileEnumeratorImpl : public FileEnumerator +{ +public: + explicit FileEnumeratorImpl(Handle handle_) : handle(std::move(handle_)) {} + + const String & getFileName() const override { return handle.getFileName(); } + const FileInfo & getFileInfo() const override { return handle.getFileInfo(); } + bool nextFile() override { return handle.nextFile(); } + + /// Releases owned handle to pass it to a read buffer. + Handle releaseHandle() && { return std::move(handle); } +private: + Handle handle; +}; + +class LibArchiveReader::ReadBufferFromLibArchive : public ReadBufferFromFileBase +{ +public: + explicit ReadBufferFromLibArchive(Handle handle_, std::string path_to_archive_) + : ReadBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) + , handle(std::move(handle_)) + , path_to_archive(std::move(path_to_archive_)) + {} + + off_t seek(off_t /* off */, int /* whence */) override + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Seek is not supported when reading from archive"); + } + + off_t getPosition() override + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getPosition not supported when reading from archive"); + } + + String getFileName() const override { return handle.getFileName(); } + + size_t getFileSize() override { return handle.getFileInfo().uncompressed_size; } + + Handle releaseHandle() && + { + return std::move(handle); + } + +private: + bool nextImpl() override + { + auto bytes_read = archive_read_data(handle.current_archive, internal_buffer.begin(), static_cast(internal_buffer.size())); + + if (bytes_read < 0) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Failed to read file {} from {}: {}", handle.getFileName(), path_to_archive, archive_error_string(handle.current_archive)); + + if (!bytes_read) + return false; + + total_bytes_read += bytes; + + working_buffer = internal_buffer; + working_buffer.resize(bytes_read); + return true; + } + + Handle handle; + const String path_to_archive; + size_t total_bytes_read = 0; +}; + +LibArchiveReader::LibArchiveReader(std::string archive_name_, bool lock_on_reading_, std::string path_to_archive_) + : archive_name(std::move(archive_name_)), lock_on_reading(lock_on_reading_), path_to_archive(std::move(path_to_archive_)) +{} + +LibArchiveReader::~LibArchiveReader() = default; + +const std::string & LibArchiveReader::getPath() const +{ + return path_to_archive; +} + +bool LibArchiveReader::fileExists(const String & filename) +{ + Handle handle(path_to_archive, lock_on_reading); + return handle.locateFile(filename); +} + +LibArchiveReader::FileInfo LibArchiveReader::getFileInfo(const String & filename) +{ + Handle handle(path_to_archive, lock_on_reading); + if (!handle.locateFile(filename)) + throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: file not found", path_to_archive); + return handle.getFileInfo(); +} + +std::unique_ptr LibArchiveReader::firstFile() +{ + Handle handle(path_to_archive, lock_on_reading); + if (!handle.nextFile()) + return nullptr; + + return std::make_unique(std::move(handle)); +} + +std::unique_ptr LibArchiveReader::readFile(const String & filename, bool throw_on_not_found) +{ + return readFile([&](const std::string & file) { return file == filename; }, throw_on_not_found); +} + +std::unique_ptr LibArchiveReader::readFile(NameFilter filter, bool throw_on_not_found) +{ + Handle handle(path_to_archive, lock_on_reading); + if (!handle.locateFile(filter)) + { + if (throw_on_not_found) + throw Exception( + ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: no file found satisfying the filter", path_to_archive); + return nullptr; + } + return std::make_unique(std::move(handle), path_to_archive); +} + +std::unique_ptr LibArchiveReader::readFile(std::unique_ptr enumerator) +{ + if (!dynamic_cast(enumerator.get())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong enumerator passed to readFile()"); + auto enumerator_impl = std::unique_ptr(static_cast(enumerator.release())); + auto handle = std::move(*enumerator_impl).releaseHandle(); + return std::make_unique(std::move(handle), path_to_archive); +} + +std::unique_ptr LibArchiveReader::nextFile(std::unique_ptr read_buffer) +{ + if (!dynamic_cast(read_buffer.get())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong ReadBuffer passed to nextFile()"); + auto read_buffer_from_libarchive = std::unique_ptr(static_cast(read_buffer.release())); + auto handle = std::move(*read_buffer_from_libarchive).releaseHandle(); + if (!handle.nextFile()) + return nullptr; + return std::make_unique(std::move(handle)); +} + +std::vector LibArchiveReader::getAllFiles() +{ + return getAllFiles({}); +} + +std::vector LibArchiveReader::getAllFiles(NameFilter filter) +{ + Handle handle(path_to_archive, lock_on_reading); + return handle.getAllFiles(filter); +} + +void LibArchiveReader::setPassword(const String & /*password_*/) +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not set password to {} archive", archive_name); +} + +#endif + +} diff --git a/src/IO/Archives/LibArchiveReader.h b/src/IO/Archives/LibArchiveReader.h new file mode 100644 index 00000000000..3dadd710089 --- /dev/null +++ b/src/IO/Archives/LibArchiveReader.h @@ -0,0 +1,78 @@ +#pragma once + +#include "config.h" + +#include + + +namespace DB +{ + +#if USE_LIBARCHIVE + +class ReadBuffer; +class ReadBufferFromFileBase; +class SeekableReadBuffer; + +/// Implementation of IArchiveReader for reading archives using libarchive. +class LibArchiveReader : public IArchiveReader +{ +public: + ~LibArchiveReader() override; + + const std::string & getPath() const override; + + /// Returns true if there is a specified file in the archive. + bool fileExists(const String & filename) override; + + /// Returns the information about a file stored in the archive. + FileInfo getFileInfo(const String & filename) override; + + /// Starts enumerating files in the archive. + std::unique_ptr firstFile() override; + + /// Starts reading a file from the archive. The function returns a read buffer, + /// you can read that buffer to extract uncompressed data from the archive. + /// Several read buffers can be used at the same time in parallel. + std::unique_ptr readFile(const String & filename, bool throw_on_not_found) override; + std::unique_ptr readFile(NameFilter filter, bool throw_on_not_found) override; + + /// It's possible to convert a file enumerator to a read buffer and vice versa. + std::unique_ptr readFile(std::unique_ptr enumerator) override; + std::unique_ptr nextFile(std::unique_ptr read_buffer) override; + + std::vector getAllFiles() override; + std::vector getAllFiles(NameFilter filter) override; + + /// Sets password used to decrypt the contents of the files in the archive. + void setPassword(const String & password_) override; + +protected: + /// Constructs an archive's reader that will read from a file in the local filesystem. + LibArchiveReader(std::string archive_name_, bool lock_on_reading_, std::string path_to_archive_); + +private: + class ReadBufferFromLibArchive; + class Handle; + class FileEnumeratorImpl; + + const std::string archive_name; + const bool lock_on_reading; + const String path_to_archive; +}; + +class TarArchiveReader : public LibArchiveReader +{ +public: + explicit TarArchiveReader(std::string path_to_archive) : LibArchiveReader("tar", /*lock_on_reading_=*/ true, std::move(path_to_archive)) { } +}; + +class SevenZipArchiveReader : public LibArchiveReader +{ +public: + explicit SevenZipArchiveReader(std::string path_to_archive) : LibArchiveReader("7z", /*lock_on_reading_=*/ false, std::move(path_to_archive)) { } +}; + +#endif + +} diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp index 206c2c45ee5..fd7a09c4f20 100644 --- a/src/IO/Archives/ZipArchiveReader.cpp +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -75,14 +75,35 @@ public: RawHandle getRawHandle() const { return raw_handle; } std::shared_ptr getReader() const { return reader; } - void locateFile(const String & file_name_) + bool locateFile(const String & file_name_) { resetFileInfo(); bool case_sensitive = true; int err = unzLocateFile(raw_handle, file_name_.c_str(), reinterpret_cast(static_cast(case_sensitive))); if (err == UNZ_END_OF_LIST_OF_FILE) - showError("File " + quoteString(file_name_) + " not found"); + return false; file_name = file_name_; + return true; + } + + bool locateFile(NameFilter filter) + { + int err = unzGoToFirstFile(raw_handle); + if (err == UNZ_END_OF_LIST_OF_FILE) + return false; + + do + { + checkResult(err); + resetFileInfo(); + retrieveFileInfo(); + if (filter(getFileName())) + return true; + + err = unzGoToNextFile(raw_handle); + } while (err != UNZ_END_OF_LIST_OF_FILE); + + return false; } bool tryLocateFile(const String & file_name_) @@ -131,6 +152,27 @@ public: return *file_info; } + std::vector getAllFiles(NameFilter filter) + { + std::vector files; + resetFileInfo(); + int err = unzGoToFirstFile(raw_handle); + if (err == UNZ_END_OF_LIST_OF_FILE) + return files; + + do + { + checkResult(err); + resetFileInfo(); + retrieveFileInfo(); + if (!filter || filter(getFileName())) + files.push_back(*file_name); + err = unzGoToNextFile(raw_handle); + } while (err != UNZ_END_OF_LIST_OF_FILE); + + return files; + } + void closeFile() { int err = unzCloseCurrentFile(raw_handle); @@ -270,6 +312,8 @@ public: String getFileName() const override { return handle.getFileName(); } + size_t getFileSize() override { return handle.getFileInfo().uncompressed_size; } + /// Releases owned handle to pass it to an enumerator. HandleHolder releaseHandle() && { @@ -459,6 +503,11 @@ ZipArchiveReader::~ZipArchiveReader() } } +const std::string & ZipArchiveReader::getPath() const +{ + return path_to_archive; +} + bool ZipArchiveReader::fileExists(const String & filename) { return acquireHandle().tryLocateFile(filename); @@ -467,7 +516,9 @@ bool ZipArchiveReader::fileExists(const String & filename) ZipArchiveReader::FileInfo ZipArchiveReader::getFileInfo(const String & filename) { auto handle = acquireHandle(); - handle.locateFile(filename); + if (!handle.locateFile(filename)) + showError(fmt::format("File {} was not found in archive", quoteString(filename))); + return handle.getFileInfo(); } @@ -479,10 +530,31 @@ std::unique_ptr ZipArchiveReader::firstFile() return std::make_unique(std::move(handle)); } -std::unique_ptr ZipArchiveReader::readFile(const String & filename) +std::unique_ptr ZipArchiveReader::readFile(const String & filename, bool throw_on_not_found) { auto handle = acquireHandle(); - handle.locateFile(filename); + if (!handle.locateFile(filename)) + { + if (throw_on_not_found) + showError(fmt::format("File {} was not found in archive", quoteString(filename))); + + return nullptr; + } + + return std::make_unique(std::move(handle)); +} + +std::unique_ptr ZipArchiveReader::readFile(NameFilter filter, bool throw_on_not_found) +{ + auto handle = acquireHandle(); + if (!handle.locateFile(filter)) + { + if (throw_on_not_found) + showError(fmt::format("No file satisfying filter in archive")); + + return nullptr; + } + return std::make_unique(std::move(handle)); } @@ -506,6 +578,17 @@ std::unique_ptr ZipArchiveReader::nextFile(std return std::make_unique(std::move(handle)); } +std::vector ZipArchiveReader::getAllFiles() +{ + return getAllFiles({}); +} + +std::vector ZipArchiveReader::getAllFiles(NameFilter filter) +{ + auto handle = acquireHandle(); + return handle.getAllFiles(filter); +} + void ZipArchiveReader::setPassword(const String & password_) { std::lock_guard lock{mutex}; diff --git a/src/IO/Archives/ZipArchiveReader.h b/src/IO/Archives/ZipArchiveReader.h index 9d0da28b080..a8788064fec 100644 --- a/src/IO/Archives/ZipArchiveReader.h +++ b/src/IO/Archives/ZipArchiveReader.h @@ -27,6 +27,8 @@ public: ~ZipArchiveReader() override; + const std::string & getPath() const override; + /// Returns true if there is a specified file in the archive. bool fileExists(const String & filename) override; @@ -39,12 +41,16 @@ public: /// Starts reading a file from the archive. The function returns a read buffer, /// you can read that buffer to extract uncompressed data from the archive. /// Several read buffers can be used at the same time in parallel. - std::unique_ptr readFile(const String & filename) override; + std::unique_ptr readFile(const String & filename, bool throw_on_not_found) override; + std::unique_ptr readFile(NameFilter filter, bool throw_on_not_found) override; /// It's possible to convert a file enumerator to a read buffer and vice versa. std::unique_ptr readFile(std::unique_ptr enumerator) override; std::unique_ptr nextFile(std::unique_ptr read_buffer) override; + std::vector getAllFiles() override; + std::vector getAllFiles(NameFilter filter) override; + /// Sets password used to decrypt the contents of the files in the archive. void setPassword(const String & password_) override; diff --git a/src/IO/Archives/createArchiveReader.cpp b/src/IO/Archives/createArchiveReader.cpp index 3cb4802792b..0c998971de1 100644 --- a/src/IO/Archives/createArchiveReader.cpp +++ b/src/IO/Archives/createArchiveReader.cpp @@ -1,5 +1,6 @@ #include #include +#include #include @@ -23,16 +24,47 @@ std::shared_ptr createArchiveReader( [[maybe_unused]] const std::function()> & archive_read_function, [[maybe_unused]] size_t archive_size) { + using namespace std::literals; + static constexpr std::array tar_extensions + { + ".tar"sv, + ".tar.gz"sv, + ".tgz"sv, + ".tar.zst"sv, + ".tzst"sv, + ".tar.xz"sv, + ".tar.bz2"sv + }; + if (path_to_archive.ends_with(".zip") || path_to_archive.ends_with(".zipx")) { #if USE_MINIZIP return std::make_shared(path_to_archive, archive_read_function, archive_size); #else throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "minizip library is disabled"); +#endif + } + else if (std::any_of( + tar_extensions.begin(), tar_extensions.end(), [&](const auto extension) { return path_to_archive.ends_with(extension); })) + { +#if USE_LIBARCHIVE + return std::make_shared(path_to_archive); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "libarchive library is disabled"); +#endif + } + else if (path_to_archive.ends_with(".7z")) + { +#if USE_LIBARCHIVE + return std::make_shared(path_to_archive); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "libarchive library is disabled"); #endif } else + { throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Cannot determine the type of archive {}", path_to_archive); + } } } diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp index d30773f88f4..b6170d93d0c 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -14,6 +15,7 @@ namespace ProfileEvents { extern const Event AsynchronousReadWaitMicroseconds; + extern const Event SynchronousReadWaitMicroseconds; extern const Event LocalReadThrottlerBytes; extern const Event LocalReadThrottlerSleepMicroseconds; } @@ -74,68 +76,43 @@ void AsynchronousReadBufferFromFileDescriptor::prefetch(Priority priority) bool AsynchronousReadBufferFromFileDescriptor::nextImpl() { + IAsynchronousReader::Result result; if (prefetch_future.valid()) { /// Read request already in flight. Wait for its completion. - size_t size = 0; - size_t offset = 0; - { - Stopwatch watch; - CurrentMetrics::Increment metric_increment{CurrentMetrics::AsynchronousReadWait}; - auto result = prefetch_future.get(); - ProfileEvents::increment(ProfileEvents::AsynchronousReadWaitMicroseconds, watch.elapsedMicroseconds()); - size = result.size; - offset = result.offset; - assert(offset < size || size == 0); - } + CurrentMetrics::Increment metric_increment{CurrentMetrics::AsynchronousReadWait}; + ProfileEventTimeIncrement watch(ProfileEvents::AsynchronousReadWaitMicroseconds); + result = prefetch_future.get(); prefetch_future = {}; - file_offset_of_buffer_end += size; - - assert(offset <= size); - size_t bytes_read = size - offset; - if (throttler) - throttler->add(bytes_read, ProfileEvents::LocalReadThrottlerBytes, ProfileEvents::LocalReadThrottlerSleepMicroseconds); - - if (bytes_read) - { + if (result.size - result.offset > 0) prefetch_buffer.swap(memory); - /// Adjust the working buffer so that it ignores `offset` bytes. - internal_buffer = Buffer(memory.data(), memory.data() + memory.size()); - working_buffer = Buffer(memory.data() + offset, memory.data() + size); - pos = working_buffer.begin(); - return true; - } - - return false; } else { /// No pending request. Do synchronous read. - Stopwatch watch; - auto [size, offset, _] = asyncReadInto(memory.data(), memory.size(), DEFAULT_PREFETCH_PRIORITY).get(); - ProfileEvents::increment(ProfileEvents::AsynchronousReadWaitMicroseconds, watch.elapsedMicroseconds()); - - file_offset_of_buffer_end += size; - - assert(offset <= size); - size_t bytes_read = size - offset; - if (throttler) - throttler->add(bytes_read, ProfileEvents::LocalReadThrottlerBytes, ProfileEvents::LocalReadThrottlerSleepMicroseconds); - - if (bytes_read) - { - /// Adjust the working buffer so that it ignores `offset` bytes. - internal_buffer = Buffer(memory.data(), memory.data() + memory.size()); - working_buffer = Buffer(memory.data() + offset, memory.data() + size); - pos = working_buffer.begin(); - return true; - } - - return false; + ProfileEventTimeIncrement watch(ProfileEvents::SynchronousReadWaitMicroseconds); + result = asyncReadInto(memory.data(), memory.size(), DEFAULT_PREFETCH_PRIORITY).get(); } + + chassert(result.size >= result.offset); + size_t bytes_read = result.size - result.offset; + file_offset_of_buffer_end += result.size; + + if (throttler) + throttler->add(result.size, ProfileEvents::LocalReadThrottlerBytes, ProfileEvents::LocalReadThrottlerSleepMicroseconds); + + if (bytes_read) + { + /// Adjust the working buffer so that it ignores `offset` bytes. + internal_buffer = Buffer(memory.data(), memory.data() + memory.size()); + working_buffer = Buffer(memory.data() + result.offset, memory.data() + result.size); + pos = working_buffer.begin(); + } + + return bytes_read; } diff --git a/src/IO/AsynchronousReader.h b/src/IO/AsynchronousReader.h index 6b8b93fcc20..279a399caad 100644 --- a/src/IO/AsynchronousReader.h +++ b/src/IO/AsynchronousReader.h @@ -64,7 +64,7 @@ public: /// Optional. Useful when implementation needs to do ignore(). size_t offset = 0; - std::unique_ptr execution_watch; + std::unique_ptr execution_watch = {}; operator std::tuple() { return {size, offset}; } }; @@ -74,6 +74,7 @@ public: /// or destroy the whole reader before destroying the buffer for request. /// The method can be called concurrently from multiple threads. virtual std::future submit(Request request) = 0; + virtual Result execute(Request request) = 0; virtual void wait() = 0; diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index ddd7ccbe483..add3e96c2c1 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -137,7 +137,12 @@ namespace throw Exception(ErrorCodes::UNSUPPORTED_URI_SCHEME, "Unsupported scheme in URI '{}'", uri.toString()); } - HTTPSessionPtr makeHTTPSessionImpl(const std::string & host, UInt16 port, bool https, bool keep_alive) + HTTPSessionPtr makeHTTPSessionImpl( + const std::string & host, + UInt16 port, + bool https, + bool keep_alive, + Poco::Net::HTTPClientSession::ProxyConfig proxy_config = {}) { HTTPSessionPtr session; @@ -158,6 +163,9 @@ namespace /// doesn't work properly without patch session->setKeepAlive(keep_alive); + + session->setProxyConfig(proxy_config); + return session; } @@ -333,13 +341,17 @@ void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_ response.set("Keep-Alive", "timeout=" + std::to_string(timeout.totalSeconds())); } -HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts) +HTTPSessionPtr makeHTTPSession( + const Poco::URI & uri, + const ConnectionTimeouts & timeouts, + Poco::Net::HTTPClientSession::ProxyConfig proxy_config +) { const std::string & host = uri.getHost(); UInt16 port = uri.getPort(); bool https = isHTTPS(uri); - auto session = makeHTTPSessionImpl(host, port, https, false); + auto session = makeHTTPSessionImpl(host, port, https, false, proxy_config); setTimeouts(*session, timeouts); return session; } diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index 082491b2851..04ca85925af 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include @@ -63,6 +62,7 @@ struct HTTPSessionReuseTag { }; +void markSessionForReuse(Poco::Net::HTTPSession & session); void markSessionForReuse(HTTPSessionPtr session); void markSessionForReuse(PooledHTTPSessionPtr session); @@ -70,7 +70,11 @@ void markSessionForReuse(PooledHTTPSessionPtr session); void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout); /// Create session object to perform requests and set required parameters. -HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts); +HTTPSessionPtr makeHTTPSession( + const Poco::URI & uri, + const ConnectionTimeouts & timeouts, + Poco::Net::HTTPClientSession::ProxyConfig proxy_config = {} +); /// As previous method creates session, but tooks it from pool, without and with proxy uri. PooledHTTPSessionPtr makePooledHTTPSession( diff --git a/src/IO/IResourceManager.h b/src/IO/IResourceManager.h index f084a903cb1..39fede0e19f 100644 --- a/src/IO/IResourceManager.h +++ b/src/IO/IResourceManager.h @@ -7,7 +7,7 @@ #include #include -#include +#include namespace DB { @@ -23,7 +23,7 @@ class IClassifier : private boost::noncopyable public: virtual ~IClassifier() {} - /// Returns ResouceLink that should be used to access resource. + /// Returns ResourceLink that should be used to access resource. /// Returned link is valid until classifier destruction. virtual ResourceLink get(const String & resource_name) = 0; }; @@ -46,6 +46,10 @@ public: /// Obtain a classifier instance required to get access to resources. /// Note that it holds resource configuration, so should be destructed when query is done. virtual ClassifierPtr acquire(const String & classifier_name) = 0; + + /// For introspection, see `system.scheduler` table + using VisitorFunc = std::function; + virtual void forEachNode(VisitorFunc visitor) = 0; }; using ResourceManagerPtr = std::shared_ptr; diff --git a/src/IO/ISchedulerConstraint.h b/src/IO/ISchedulerConstraint.h index 47f6905e265..05bed7c3df8 100644 --- a/src/IO/ISchedulerConstraint.h +++ b/src/IO/ISchedulerConstraint.h @@ -21,7 +21,7 @@ namespace DB class ISchedulerConstraint : public ISchedulerNode { public: - ISchedulerConstraint(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) + explicit ISchedulerConstraint(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) : ISchedulerNode(event_queue_, config, config_prefix) {} @@ -44,6 +44,9 @@ public: } } + /// For introspection of current state (true = satisfied, false = violated) + virtual bool isSatisfied() = 0; + protected: // Reference to nearest parent that is also derived from ISchedulerConstraint. // Request can traverse through multiple constraints while being dequeue from hierarchy, diff --git a/src/IO/ISchedulerNode.h b/src/IO/ISchedulerNode.h index 1c33c033744..5cf1ae94216 100644 --- a/src/IO/ISchedulerNode.h +++ b/src/IO/ISchedulerNode.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -68,6 +69,13 @@ struct SchedulerNodeInfo { priority.value = value; } + + // To check if configuration update required + bool equals(const SchedulerNodeInfo & o) const + { + // `parent` data is not compared intentionally (it is not part of configuration settings) + return weight == o.weight && priority == o.priority; + } }; /* @@ -157,8 +165,11 @@ public: virtual ~ISchedulerNode() {} - // Checks if two nodes configuration is equal - virtual bool equals(ISchedulerNode * other) = 0; + /// Checks if two nodes configuration is equal + virtual bool equals(ISchedulerNode * other) + { + return info.equals(other->info); + } /// Attach new child virtual void attachChild(const std::shared_ptr & child) = 0; @@ -176,7 +187,10 @@ public: /// Returns true iff node is active virtual bool isActive() = 0; - /// Returns the first request to be executed as the first component of resuting pair. + /// Returns number of active children + virtual size_t activeChildren() = 0; + + /// Returns the first request to be executed as the first component of resulting pair. /// The second pair component is `true` iff node is still active after dequeueing. virtual std::pair dequeueRequest() = 0; @@ -215,6 +229,11 @@ public: String basename; SchedulerNodeInfo info; ISchedulerNode * parent = nullptr; + + /// Introspection + std::atomic dequeued_requests{0}; + std::atomic dequeued_cost{0}; + std::atomic busy_periods{0}; }; using SchedulerNodePtr = std::shared_ptr; diff --git a/src/IO/ISchedulerQueue.h b/src/IO/ISchedulerQueue.h index fc2f3943d26..2e190a529e3 100644 --- a/src/IO/ISchedulerQueue.h +++ b/src/IO/ISchedulerQueue.h @@ -50,6 +50,12 @@ public: /// Should be called outside of scheduling subsystem, implementation must be thread-safe. virtual void enqueueRequest(ResourceRequest * request) = 0; + /// For introspection + ResourceCost getBudget() const + { + return budget.get(); + } + private: // Allows multiple consumers to synchronize with common "debit/credit" balance. // 1) (positive) to avoid wasting of allocated but not used resource (e.g in case of a failure); diff --git a/src/IO/MMappedFileCache.h b/src/IO/MMappedFileCache.h index 0a8a80d15d0..bb30829ed69 100644 --- a/src/IO/MMappedFileCache.h +++ b/src/IO/MMappedFileCache.h @@ -33,15 +33,12 @@ public: /// Calculate key from path to file and offset. static UInt128 hash(const String & path_to_file, size_t offset, ssize_t length = -1) { - UInt128 key; - SipHash hash; hash.update(path_to_file.data(), path_to_file.size() + 1); hash.update(offset); hash.update(length); - hash.get128(key); - return key; + return hash.get128(); } template diff --git a/src/IO/MySQLPacketPayloadReadBuffer.cpp b/src/IO/MySQLPacketPayloadReadBuffer.cpp index ab58624d0fa..2c5167ed038 100644 --- a/src/IO/MySQLPacketPayloadReadBuffer.cpp +++ b/src/IO/MySQLPacketPayloadReadBuffer.cpp @@ -45,6 +45,9 @@ bool MySQLPacketPayloadReadBuffer::nextImpl() } in.nextIfAtEnd(); + /// Don't return a buffer when no bytes available + if (!in.hasPendingData()) + return false; working_buffer = ReadBuffer::Buffer(in.position(), in.buffer().end()); size_t count = std::min(in.available(), payload_length - offset); working_buffer.resize(count); diff --git a/src/IO/OpenedFileCache.h b/src/IO/OpenedFileCache.h index 61e502a494b..2cecc675af7 100644 --- a/src/IO/OpenedFileCache.h +++ b/src/IO/OpenedFileCache.h @@ -4,14 +4,18 @@ #include #include -#include #include +#include +#include + +#include namespace ProfileEvents { extern const Event OpenedFileCacheHits; extern const Event OpenedFileCacheMisses; + extern const Event OpenedFileCacheMicroseconds; } namespace DB @@ -26,57 +30,79 @@ namespace DB */ class OpenedFileCache { -private: - using Key = std::pair; + class OpenedFileMap + { + using Key = std::pair; - using OpenedFileWeakPtr = std::weak_ptr; - using Files = std::map; + using OpenedFileWeakPtr = std::weak_ptr; + using Files = std::map; - Files files; - std::mutex mutex; + Files files; + std::mutex mutex; + + public: + using OpenedFilePtr = std::shared_ptr; + + OpenedFilePtr get(const std::string & path, int flags) + { + Key key(path, flags); + + std::lock_guard lock(mutex); + + auto [it, inserted] = files.emplace(key, OpenedFilePtr{}); + if (!inserted) + { + if (auto res = it->second.lock()) + { + ProfileEvents::increment(ProfileEvents::OpenedFileCacheHits); + return res; + } + } + ProfileEvents::increment(ProfileEvents::OpenedFileCacheMisses); + + OpenedFilePtr res + { + new OpenedFile(path, flags), + [key, this](auto ptr) + { + { + std::lock_guard another_lock(mutex); + files.erase(key); + } + delete ptr; + } + }; + + it->second = res; + return res; + } + + void remove(const std::string & path, int flags) + { + Key key(path, flags); + std::lock_guard lock(mutex); + files.erase(key); + } + }; + + static constexpr size_t buckets = 1024; + std::vector impls{buckets}; public: - using OpenedFilePtr = std::shared_ptr; + using OpenedFilePtr = OpenedFileMap::OpenedFilePtr; OpenedFilePtr get(const std::string & path, int flags) { - Key key(path, flags); - - std::lock_guard lock(mutex); - - auto [it, inserted] = files.emplace(key, OpenedFilePtr{}); - if (!inserted) - { - if (auto res = it->second.lock()) - { - ProfileEvents::increment(ProfileEvents::OpenedFileCacheHits); - return res; - } - } - ProfileEvents::increment(ProfileEvents::OpenedFileCacheMisses); - - OpenedFilePtr res - { - new OpenedFile(path, flags), - [key, this](auto ptr) - { - { - std::lock_guard another_lock(mutex); - files.erase(key); - } - delete ptr; - } - }; - - it->second = res; - return res; + ProfileEventTimeIncrement watch(ProfileEvents::OpenedFileCacheMicroseconds); + const auto bucket = CityHash_v1_0_2::CityHash64(path.data(), path.length()) % buckets; + return impls[bucket].get(path, flags); } void remove(const std::string & path, int flags) { - Key key(path, flags); - std::lock_guard lock(mutex); - files.erase(key); + ProfileEventTimeIncrement watch(ProfileEvents::OpenedFileCacheMicroseconds); + const auto bucket = CityHash_v1_0_2::CityHash64(path.data(), path.length()) % buckets; + impls[bucket].remove(path, flags); } static OpenedFileCache & instance() @@ -87,5 +113,4 @@ public: }; using OpenedFileCachePtr = std::shared_ptr; - } diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp index 92af1ed0b04..8d73f221748 100644 --- a/src/IO/ParallelReadBuffer.cpp +++ b/src/IO/ParallelReadBuffer.cpp @@ -8,7 +8,7 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; + extern const int UNEXPECTED_END_OF_FILE; extern const int CANNOT_SEEK_THROUGH_FILE; extern const int SEEK_POSITION_OUT_OF_BOUND; @@ -260,7 +260,7 @@ void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker) if (!on_progress(r) && r < read_worker->segment.size()) throw Exception( - ErrorCodes::LOGICAL_ERROR, + ErrorCodes::UNEXPECTED_END_OF_FILE, "Failed to read all the data from the reader at offset {}, got {}/{} bytes", read_worker->start_offset, r, read_worker->segment.size()); } diff --git a/src/IO/Progress.cpp b/src/IO/Progress.cpp index bf42cdf91d6..1dcb206496f 100644 --- a/src/IO/Progress.cpp +++ b/src/IO/Progress.cpp @@ -69,12 +69,14 @@ void ProgressValues::write(WriteBuffer & out, UInt64 client_revision) const } } -void ProgressValues::writeJSON(WriteBuffer & out) const +void ProgressValues::writeJSON(WriteBuffer & out, bool add_braces) const { /// Numbers are written in double quotes (as strings) to avoid loss of precision /// of 64-bit integers after interpretation by JavaScript. - writeCString("{\"read_rows\":\"", out); + if (add_braces) + writeCString("{", out); + writeCString("\"read_rows\":\"", out); writeText(read_rows, out); writeCString("\",\"read_bytes\":\"", out); writeText(read_bytes, out); @@ -88,7 +90,11 @@ void ProgressValues::writeJSON(WriteBuffer & out) const writeText(result_rows, out); writeCString("\",\"result_bytes\":\"", out); writeText(result_bytes, out); - writeCString("\"}", out); + writeCString("\",\"elapsed_ns\":\"", out); + writeText(elapsed_ns, out); + writeCString("\"", out); + if (add_braces) + writeCString("}", out); } bool Progress::incrementPiecewiseAtomically(const Progress & rhs) @@ -230,9 +236,14 @@ void Progress::write(WriteBuffer & out, UInt64 client_revision) const getValues().write(out, client_revision); } -void Progress::writeJSON(WriteBuffer & out) const +void Progress::writeJSON(WriteBuffer & out, bool add_braces) const { - getValues().writeJSON(out); + getValues().writeJSON(out, add_braces); +} + +void Progress::incrementElapsedNs(UInt64 elapsed_ns_) +{ + elapsed_ns.fetch_add(elapsed_ns_, std::memory_order_relaxed); } } diff --git a/src/IO/Progress.h b/src/IO/Progress.h index c21b1b854b0..288598cae78 100644 --- a/src/IO/Progress.h +++ b/src/IO/Progress.h @@ -32,7 +32,7 @@ struct ProgressValues void read(ReadBuffer & in, UInt64 server_revision); void write(WriteBuffer & out, UInt64 client_revision) const; - void writeJSON(WriteBuffer & out) const; + void writeJSON(WriteBuffer & out, bool add_braces = true) const; }; struct ReadProgress @@ -40,9 +40,10 @@ struct ReadProgress UInt64 read_rows = 0; UInt64 read_bytes = 0; UInt64 total_rows_to_read = 0; + UInt64 total_bytes_to_read = 0; - ReadProgress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0) - : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_) {} + ReadProgress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0) + : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_), total_bytes_to_read(total_bytes_to_read_) {} }; struct WriteProgress @@ -98,8 +99,8 @@ struct Progress Progress() = default; - Progress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0) - : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_) {} + Progress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0) + : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_), total_bytes_to_read(total_bytes_to_read_) {} explicit Progress(ReadProgress read_progress) : read_rows(read_progress.read_rows), read_bytes(read_progress.read_bytes), total_rows_to_read(read_progress.total_rows_to_read) {} @@ -118,11 +119,13 @@ struct Progress void write(WriteBuffer & out, UInt64 client_revision) const; /// Progress in JSON format (single line, without whitespaces) is used in HTTP headers. - void writeJSON(WriteBuffer & out) const; + void writeJSON(WriteBuffer & out, bool add_braces = true) const; /// Each value separately is changed atomically (but not whole object). bool incrementPiecewiseAtomically(const Progress & rhs); + void incrementElapsedNs(UInt64 elapsed_ns_); + void reset(); ProgressValues getValues() const; diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h index a4ae12f5069..b45bc8f3dbc 100644 --- a/src/IO/ReadBuffer.h +++ b/src/IO/ReadBuffer.h @@ -18,7 +18,6 @@ namespace ErrorCodes { extern const int ATTEMPT_TO_READ_AFTER_EOF; extern const int CANNOT_READ_ALL_DATA; - extern const int NOT_IMPLEMENTED; } static constexpr auto DEFAULT_PREFETCH_PRIORITY = Priority{0}; @@ -236,14 +235,6 @@ public: virtual void setReadUntilEnd() {} - /// Read at most `size` bytes into data at specified offset `offset`. First ignore `ignore` bytes if `ignore` > 0. - /// Notice: this function only need to be implemented in synchronous read buffers to be wrapped in asynchronous read. - /// Such as ReadBufferFromRemoteFSGather and AsynchronousReadIndirectBufferFromRemoteFS. - virtual IAsynchronousReader::Result readInto(char * /*data*/, size_t /*size*/, size_t /*offset*/, size_t /*ignore*/) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "readInto not implemented"); - } - protected: /// The number of bytes to ignore from the initial position of `working_buffer` /// buffer. Apparently this is an additional out-parameter for nextImpl(), diff --git a/src/IO/ReadBufferFromFileBase.cpp b/src/IO/ReadBufferFromFileBase.cpp index 4181615bc52..4ac3f984f78 100644 --- a/src/IO/ReadBufferFromFileBase.cpp +++ b/src/IO/ReadBufferFromFileBase.cpp @@ -42,7 +42,7 @@ void ReadBufferFromFileBase::setProgressCallback(ContextPtr context) setProfileCallback([file_progress_callback](const ProfileInfo & progress) { - file_progress_callback(FileProgress(progress.bytes_read, 0)); + file_progress_callback(FileProgress(progress.bytes_read)); }); } diff --git a/src/IO/ReadBufferFromIStream.h b/src/IO/ReadBufferFromIStream.h index 67cc60c053f..8c3f62728b5 100644 --- a/src/IO/ReadBufferFromIStream.h +++ b/src/IO/ReadBufferFromIStream.h @@ -1,7 +1,5 @@ #pragma once -#include - #include #include diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 5c562d32fbc..c038523bdaa 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -40,9 +40,15 @@ DB::PooledHTTPSessionPtr getSession(Aws::S3::Model::GetObjectResult & read_resul { if (auto * session_aware_stream = dynamic_cast *>(&read_result.GetBody())) return static_cast(session_aware_stream->getSession()); - else if (!dynamic_cast *>(&read_result.GetBody())) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session of unexpected type encountered"); - return {}; + + if (dynamic_cast *>(&read_result.GetBody())) + return {}; + + /// accept result from S# mock in gtest_writebuffer_s3.cpp + if (dynamic_cast(&read_result.GetBody())) + return {}; + + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session of unexpected type encountered"); } void resetSession(Aws::S3::Model::GetObjectResult & read_result) @@ -215,13 +221,12 @@ bool ReadBufferFromS3::nextImpl() size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) { - if (n == 0) - return 0; - + size_t initial_n = n; size_t sleep_time_with_backoff_milliseconds = 100; - for (size_t attempt = 0;; ++attempt) + for (size_t attempt = 0; n > 0; ++attempt) { bool last_attempt = attempt + 1 >= request_settings.max_single_read_retries; + size_t bytes_copied = 0; ProfileEventTimeIncrement watch(ProfileEvents::ReadBufferFromS3Microseconds); @@ -230,14 +235,12 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons auto result = sendRequest(range_begin, range_begin + n - 1); std::istream & istr = result.GetBody(); - size_t bytes = copyFromIStreamWithProgressCallback(istr, to, n, progress_callback); + copyFromIStreamWithProgressCallback(istr, to, n, progress_callback, &bytes_copied); - ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Bytes, bytes); + ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Bytes, bytes_copied); if (read_settings.remote_throttler) - read_settings.remote_throttler->add(bytes, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds); - - return bytes; + read_settings.remote_throttler->add(bytes_copied, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds); } catch (Poco::Exception & e) { @@ -247,7 +250,13 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons sleepForMilliseconds(sleep_time_with_backoff_milliseconds); sleep_time_with_backoff_milliseconds *= 2; } + + range_begin += bytes_copied; + to += bytes_copied; + n -= bytes_copied; } + + return initial_n; } bool ReadBufferFromS3::processException(Poco::Exception & e, size_t read_offset, size_t attempt) const @@ -260,6 +269,7 @@ bool ReadBufferFromS3::processException(Poco::Exception & e, size_t read_offset, "Attempt: {}, Message: {}", bucket, key, version_id.empty() ? "Latest" : version_id, read_offset, attempt, e.message()); + if (auto * s3_exception = dynamic_cast(&e)) { /// It doesn't make sense to retry Access Denied or No Such Key diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index d58971bea5b..0835e52a5b2 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -16,11 +16,6 @@ #include -namespace Aws::S3 -{ -class Client; -} - namespace DB { /** diff --git a/src/IO/ReadBufferFromString.h b/src/IO/ReadBufferFromString.h index 7ea6afc3543..f20e319b931 100644 --- a/src/IO/ReadBufferFromString.h +++ b/src/IO/ReadBufferFromString.h @@ -19,7 +19,10 @@ public: class ReadBufferFromOwnString : public String, public ReadBufferFromString { public: - explicit ReadBufferFromOwnString(const String & s_): String(s_), ReadBufferFromString(*this) {} + template + explicit ReadBufferFromOwnString(S && s_) : String(std::forward(s_)), ReadBufferFromString(*this) + { + } }; } diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 9896468e616..bf3215d5823 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -12,6 +12,8 @@ #include #include +#include + #ifdef __SSE2__ #include #endif @@ -51,36 +53,25 @@ UUID parseUUID(std::span src) { UUID uuid; const auto * src_ptr = src.data(); - auto * dst = reinterpret_cast(&uuid); const auto size = src.size(); #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - const std::reverse_iterator dst_it(dst + sizeof(UUID)); + const std::reverse_iterator dst(reinterpret_cast(&uuid) + sizeof(UUID)); +#else + auto * dst = reinterpret_cast(&uuid); #endif if (size == 36) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - parseHex<4>(src_ptr, dst_it + 8); - parseHex<2>(src_ptr + 9, dst_it + 12); - parseHex<2>(src_ptr + 14, dst_it + 14); - parseHex<2>(src_ptr + 19, dst_it); - parseHex<6>(src_ptr + 24, dst_it + 2); -#else - parseHex<4>(src_ptr, dst); - parseHex<2>(src_ptr + 9, dst + 4); - parseHex<2>(src_ptr + 14, dst + 6); - parseHex<2>(src_ptr + 19, dst + 8); - parseHex<6>(src_ptr + 24, dst + 10); -#endif + parseHex<4>(src_ptr, dst + 8); + parseHex<2>(src_ptr + 9, dst + 12); + parseHex<2>(src_ptr + 14, dst + 14); + parseHex<2>(src_ptr + 19, dst); + parseHex<6>(src_ptr + 24, dst + 2); } else if (size == 32) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - parseHex<8>(src_ptr, dst_it + 8); - parseHex<8>(src_ptr + 16, dst_it); -#else - parseHex<16>(src_ptr, dst); -#endif + parseHex<8>(src_ptr, dst + 8); + parseHex<8>(src_ptr + 16, dst); } else throw Exception(ErrorCodes::CANNOT_PARSE_UUID, "Unexpected length when trying to parse UUID ({})", size); @@ -819,14 +810,11 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & auto rc = vdupq_n_u8('\r'); auto nc = vdupq_n_u8('\n'); auto dc = vdupq_n_u8(delimiter); - /// Returns a 64 bit mask of nibbles (4 bits for each byte). - auto get_nibble_mask = [](uint8x16_t input) -> uint64_t - { return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(input), 4)), 0); }; for (; next_pos + 15 < buf.buffer().end(); next_pos += 16) { uint8x16_t bytes = vld1q_u8(reinterpret_cast(next_pos)); auto eq = vorrq_u8(vorrq_u8(vceqq_u8(bytes, rc), vceqq_u8(bytes, nc)), vceqq_u8(bytes, dc)); - uint64_t bit_mask = get_nibble_mask(eq); + uint64_t bit_mask = getNibbleMask(eq); if (bit_mask) { next_pos += std::countr_zero(bit_mask) >> 2; @@ -1354,7 +1342,7 @@ Exception readException(ReadBuffer & buf, const String & additional_message, boo String stack_trace; bool has_nested = false; /// Obsolete - readBinary(code, buf); + readBinaryLittleEndian(code, buf); readBinary(name, buf); readBinary(message, buf); readBinary(stack_trace, buf); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 2636898c1b3..f99c78fdf16 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -116,6 +116,13 @@ inline void readPODBinary(T & x, ReadBuffer & buf) buf.readStrict(reinterpret_cast(&x), sizeof(x)); /// NOLINT } +inline void readUUIDBinary(UUID & x, ReadBuffer & buf) +{ + auto & uuid = x.toUnderType(); + readPODBinary(uuid.items[0], buf); + readPODBinary(uuid.items[1], buf); +} + template inline void readIntBinary(T & x, ReadBuffer & buf) { @@ -529,6 +536,11 @@ void tryReadIntTextUnsafe(T & x, ReadBuffer & buf) template void readFloatText(T & x, ReadBuffer & in); template bool tryReadFloatText(T & x, ReadBuffer & in); +template void readFloatTextPrecise(T & x, ReadBuffer & in); +template bool tryReadFloatTextPrecise(T & x, ReadBuffer & in); +template void readFloatTextFast(T & x, ReadBuffer & in); +template bool tryReadFloatTextFast(T & x, ReadBuffer & in); + /// simple: all until '\n' or '\t' void readString(String & s, ReadBuffer & buf); @@ -1096,24 +1108,31 @@ inline void readBinary(bool & x, ReadBuffer & buf) } inline void readBinary(String & x, ReadBuffer & buf) { readStringBinary(x, buf); } -inline void readBinary(Int32 & x, ReadBuffer & buf) { readPODBinary(x, buf); } -inline void readBinary(Int128 & x, ReadBuffer & buf) { readPODBinary(x, buf); } -inline void readBinary(Int256 & x, ReadBuffer & buf) { readPODBinary(x, buf); } -inline void readBinary(UInt32 & x, ReadBuffer & buf) { readPODBinary(x, buf); } -inline void readBinary(UInt128 & x, ReadBuffer & buf) { readPODBinary(x, buf); } -inline void readBinary(UInt256 & x, ReadBuffer & buf) { readPODBinary(x, buf); } inline void readBinary(Decimal32 & x, ReadBuffer & buf) { readPODBinary(x, buf); } inline void readBinary(Decimal64 & x, ReadBuffer & buf) { readPODBinary(x, buf); } inline void readBinary(Decimal128 & x, ReadBuffer & buf) { readPODBinary(x, buf); } inline void readBinary(Decimal256 & x, ReadBuffer & buf) { readPODBinary(x.value, buf); } inline void readBinary(LocalDate & x, ReadBuffer & buf) { readPODBinary(x, buf); } +inline void readBinary(IPv4 & x, ReadBuffer & buf) { readPODBinary(x, buf); } +inline void readBinary(IPv6 & x, ReadBuffer & buf) { readPODBinary(x, buf); } + +inline void readBinary(UUID & x, ReadBuffer & buf) +{ + readUUIDBinary(x, buf); +} + +inline void readBinary(CityHash_v1_0_2::uint128 & x, ReadBuffer & buf) +{ + readPODBinary(x.low64, buf); + readPODBinary(x.high64, buf); +} inline void readBinary(StackTrace::FramePointers & x, ReadBuffer & buf) { readPODBinary(x, buf); } template inline void readBinaryEndian(T & x, ReadBuffer & buf) { - readPODBinary(x, buf); + readBinary(x, buf); transformEndianness(x); } diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 6d1c0f7aafa..d0384cfa524 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -40,7 +40,12 @@ void UpdatableSession::updateSession(const Poco::URI & uri) if (redirects <= max_redirects) session = session_factory->buildNewSession(uri); else - throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, "Too many redirects while trying to access {}", initial_uri.toString()); + throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, + "Too many redirects while trying to access {}." + " You can {} redirects by changing the setting 'max_http_get_redirects'." + " Example: `SET max_http_get_redirects = 10`." + " Redirects are restricted to prevent possible attack when a malicious server redirects to an internal resource, bypassing the authentication or firewall.", + initial_uri.toString(), max_redirects ? "increase the allowed maximum number of" : "allow"); } template @@ -245,7 +250,8 @@ ReadWriteBufferFromHTTPBase::ReadWriteBufferFromHTTPBase( bool delay_initialization, bool use_external_buffer_, bool http_skip_not_found_url_, - std::optional file_info_) + std::optional file_info_, + Poco::Net::HTTPClientSession::ProxyConfig proxy_config_) : SeekableReadBuffer(nullptr, 0) , uri {uri_} , method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET} @@ -260,6 +266,7 @@ ReadWriteBufferFromHTTPBase::ReadWriteBufferFromHTTPBase( , http_skip_not_found_url(http_skip_not_found_url_) , settings {settings_} , log(&Poco::Logger::get("ReadWriteBufferFromHTTP")) + , proxy_config(proxy_config_) { if (settings.http_max_tries <= 0 || settings.http_retry_initial_backoff_ms <= 0 || settings.http_retry_initial_backoff_ms >= settings.http_retry_max_backoff_ms) @@ -305,12 +312,12 @@ void ReadWriteBufferFromHTTPBase::callWithRedirects(Poco::N current_session = session; call(current_session, response, method_, throw_on_all_errors, for_object_info); - Poco::URI prev_uri = uri; + saved_uri_redirect = uri; while (isRedirect(response.getStatus())) { - Poco::URI uri_redirect = getUriAfterRedirect(prev_uri, response); - prev_uri = uri_redirect; + Poco::URI uri_redirect = getUriAfterRedirect(*saved_uri_redirect, response); + saved_uri_redirect = uri_redirect; if (remote_host_filter) remote_host_filter->checkURL(uri_redirect); @@ -587,16 +594,14 @@ size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, si /// This ensures we've sent at least one HTTP request and populated saved_uri_redirect. chassert(file_info && file_info->seekable); - if (n == 0) - return 0; - Poco::URI uri_ = saved_uri_redirect.value_or(uri); if (uri_.getPath().empty()) uri_.setPath("/"); + size_t initial_n = n; size_t milliseconds_to_wait = settings.http_retry_initial_backoff_ms; - for (size_t attempt = 0;; ++attempt) + for (size_t attempt = 0; n > 0; ++attempt) { bool last_attempt = attempt + 1 >= settings.http_max_tries; @@ -609,6 +614,7 @@ size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, si Poco::Net::HTTPResponse response; std::istream * result_istr; + size_t bytes_copied = 0; try { @@ -622,17 +628,14 @@ size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, si "Expected 206 Partial Content, got {} when reading {} range [{}, {})", toString(response.getStatus()), uri_.toString(), offset, offset + n); - bool cancelled; - size_t r = copyFromIStreamWithProgressCallback(*result_istr, to, n, progress_callback, &cancelled); - - if (!cancelled) + copyFromIStreamWithProgressCallback(*result_istr, to, n, progress_callback, &bytes_copied); + if (bytes_copied == n) { + result_istr->ignore(UINT64_MAX); /// Response was fully read. - markSessionForReuse(sess); + markSessionForReuse(*sess); ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); } - - return r; } catch (const Poco::Exception & e) { @@ -657,9 +660,15 @@ size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, si sleepForMilliseconds(milliseconds_to_wait); milliseconds_to_wait = std::min(milliseconds_to_wait * 2, settings.http_retry_max_backoff_ms); - continue; } + + /// Make sure retries don't re-read the bytes that we've already reported to progress_callback. + offset += bytes_copied; + to += bytes_copied; + n -= bytes_copied; } + + return initial_n; } template @@ -779,9 +788,21 @@ template const std::string & ReadWriteBufferFromHTTPBase::getCompressionMethod() const { return content_encoding; } template -std::optional ReadWriteBufferFromHTTPBase::getLastModificationTime() +std::optional ReadWriteBufferFromHTTPBase::tryGetLastModificationTime() { - return getFileInfo().last_modified; + if (!file_info) + { + try + { + file_info = getFileInfo(); + } + catch (...) + { + return std::nullopt; + } + } + + return file_info->last_modified; } template @@ -843,12 +864,12 @@ HTTPFileInfo ReadWriteBufferFromHTTPBase::parseFileInfo(con } -SessionFactory::SessionFactory(const ConnectionTimeouts & timeouts_) - : timeouts(timeouts_) {} +SessionFactory::SessionFactory(const ConnectionTimeouts & timeouts_, Poco::Net::HTTPClientSession::ProxyConfig proxy_config_) + : timeouts(timeouts_), proxy_config(proxy_config_) {} SessionFactory::SessionType SessionFactory::buildNewSession(const Poco::URI & uri) { - return makeHTTPSession(uri, timeouts); + return makeHTTPSession(uri, timeouts, proxy_config); } ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP( @@ -865,9 +886,10 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP( bool delay_initialization_, bool use_external_buffer_, bool skip_not_found_url_, - std::optional file_info_) + std::optional file_info_, + Poco::Net::HTTPClientSession::ProxyConfig proxy_config_) : Parent( - std::make_shared(uri_, max_redirects, std::make_shared(timeouts)), + std::make_shared(uri_, max_redirects, std::make_shared(timeouts, proxy_config_)), uri_, credentials_, method_, @@ -879,7 +901,8 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP( delay_initialization_, use_external_buffer_, skip_not_found_url_, - file_info_) {} + file_info_, + proxy_config_) {} PooledSessionFactory::PooledSessionFactory( diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 2d2ae5fe724..e67572b2714 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -111,6 +111,8 @@ namespace detail ReadSettings settings; Poco::Logger * log; + Poco::Net::HTTPClientSession::ProxyConfig proxy_config; + bool withPartialContent(const HTTPRange & range) const; size_t getOffset() const; @@ -161,7 +163,8 @@ namespace detail bool delay_initialization = false, bool use_external_buffer_ = false, bool http_skip_not_found_url_ = false, - std::optional file_info_ = std::nullopt); + std::optional file_info_ = std::nullopt, + Poco::Net::HTTPClientSession::ProxyConfig proxy_config_ = {}); void callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false, bool for_object_info = false); @@ -201,7 +204,7 @@ namespace detail const std::string & getCompressionMethod() const; - std::optional getLastModificationTime(); + std::optional tryGetLastModificationTime(); HTTPFileInfo getFileInfo(); @@ -212,13 +215,14 @@ namespace detail class SessionFactory { public: - explicit SessionFactory(const ConnectionTimeouts & timeouts_); + explicit SessionFactory(const ConnectionTimeouts & timeouts_, Poco::Net::HTTPClientSession::ProxyConfig proxy_config_ = {}); using SessionType = HTTPSessionPtr; SessionType buildNewSession(const Poco::URI & uri); private: ConnectionTimeouts timeouts; + Poco::Net::HTTPClientSession::ProxyConfig proxy_config; }; class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase>> @@ -241,7 +245,8 @@ public: bool delay_initialization_ = true, bool use_external_buffer_ = false, bool skip_not_found_url_ = false, - std::optional file_info_ = std::nullopt); + std::optional file_info_ = std::nullopt, + Poco::Net::HTTPClientSession::ProxyConfig proxy_config_ = {}); }; class PooledSessionFactory diff --git a/src/IO/Resource/ClassifiersConfig.cpp b/src/IO/Resource/ClassifiersConfig.cpp index fcd4655e2e4..7dc4d517138 100644 --- a/src/IO/Resource/ClassifiersConfig.cpp +++ b/src/IO/Resource/ClassifiersConfig.cpp @@ -21,7 +21,7 @@ ClassifierDescription::ClassifierDescription(const Poco::Util::AbstractConfigura ClassifiersConfig::ClassifiersConfig(const Poco::Util::AbstractConfiguration & config) { Poco::Util::AbstractConfiguration::Keys keys; - const String config_prefix = "classifiers"; + const String config_prefix = "workload_classifiers"; config.keys(config_prefix, keys); for (const auto & key : keys) classifiers.emplace(std::piecewise_construct, @@ -34,7 +34,7 @@ const ClassifierDescription & ClassifiersConfig::get(const String & classifier_n if (auto it = classifiers.find(classifier_name); it != classifiers.end()) return it->second; else - throw Exception(ErrorCodes::RESOURCE_NOT_FOUND, "Unknown classifier '{}' to access resources", classifier_name); + throw Exception(ErrorCodes::RESOURCE_NOT_FOUND, "Unknown workload classifier '{}' to access resources", classifier_name); } } diff --git a/src/IO/Resource/ClassifiersConfig.h b/src/IO/Resource/ClassifiersConfig.h index 96e2bd0f0b9..186c49943ad 100644 --- a/src/IO/Resource/ClassifiersConfig.h +++ b/src/IO/Resource/ClassifiersConfig.h @@ -15,14 +15,14 @@ struct ClassifierDescription : std::unordered_map /* * Loads a config with the following format: - * + * * * /path/to/queue * /path/to/another/queue * * ... * ... - * + * */ class ClassifiersConfig { diff --git a/src/IO/Resource/DynamicResourceManager.cpp b/src/IO/Resource/DynamicResourceManager.cpp index df0de6575f4..b9803d8079d 100644 --- a/src/IO/Resource/DynamicResourceManager.cpp +++ b/src/IO/Resource/DynamicResourceManager.cpp @@ -9,6 +9,7 @@ #include #include +#include namespace DB { @@ -217,13 +218,36 @@ void DynamicResourceManager::updateConfiguration(const Poco::Util::AbstractConfi ClassifierPtr DynamicResourceManager::acquire(const String & classifier_name) { // Acquire a reference to the current state - StatePtr state_; + StatePtr state_ref; { std::lock_guard lock{mutex}; - state_ = state; + state_ref = state; } - return std::make_shared(state_, classifier_name); + return std::make_shared(state_ref, classifier_name); +} + +void DynamicResourceManager::forEachNode(IResourceManager::VisitorFunc visitor) +{ + // Acquire a reference to the current state + StatePtr state_ref; + { + std::lock_guard lock{mutex}; + state_ref = state; + } + + std::promise promise; + auto future = promise.get_future(); + scheduler.event_queue->enqueue([state_ref, visitor, &promise] + { + for (auto & [name, resource] : state_ref->resources) + for (auto & [path, node] : resource->nodes) + visitor(name, path, node.type, node.ptr); + promise.set_value(); + }); + + // Block until execution is done in the scheduler thread + future.get(); } void registerDynamicResourceManager(ResourceManagerFactory & factory) diff --git a/src/IO/Resource/DynamicResourceManager.h b/src/IO/Resource/DynamicResourceManager.h index aa1147f1fb2..3372d40a285 100644 --- a/src/IO/Resource/DynamicResourceManager.h +++ b/src/IO/Resource/DynamicResourceManager.h @@ -19,7 +19,7 @@ namespace DB * `ClassifierPtr` is acquired and held. * * Manager can update configuration after initialization. During update, new version of resources are also - * attached to scheduler, so multiple version can coexist for a short perid. This will violate constraints + * attached to scheduler, so multiple version can coexist for a short period. This will violate constraints * (e.g. in-fly-limit), because different version have independent nodes to impose constraints, the same * violation will apply to fairness. Old version exists as long as there is at least one classifier * instance referencing it. Classifiers are typically attached to queries and will be destructed with them. @@ -30,6 +30,7 @@ public: DynamicResourceManager(); void updateConfiguration(const Poco::Util::AbstractConfiguration & config) override; ClassifierPtr acquire(const String & classifier_name) override; + void forEachNode(VisitorFunc visitor) override; private: /// Holds everything required to work with one specific configuration diff --git a/src/IO/Resource/FairPolicy.h b/src/IO/Resource/FairPolicy.h index 9c0c78f057c..57b26344658 100644 --- a/src/IO/Resource/FairPolicy.h +++ b/src/IO/Resource/FairPolicy.h @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -50,6 +51,8 @@ public: bool equals(ISchedulerNode * other) override { + if (!ISchedulerNode::equals(other)) + return false; if (auto * o = dynamic_cast(other)) return true; return false; @@ -176,8 +179,11 @@ public: max_vruntime = 0; } system_vruntime = max_vruntime; + busy_periods++; } + dequeued_requests++; + dequeued_cost += request->cost; return {request, heap_size > 0}; } @@ -186,12 +192,33 @@ public: return heap_size > 0; } + size_t activeChildren() override + { + return heap_size; + } + void activateChild(ISchedulerNode * child) override { // Find this child; this is O(1), thanks to inactive index we hold in `parent.idx` activateChildImpl(child->info.parent.idx); } + // For introspection + double getSystemVRuntime() const + { + return system_vruntime; + } + + std::optional getChildVRuntime(ISchedulerNode * child) const + { + for (const auto & item : items) + { + if (child == item.child) + return item.vruntime; + } + return std::nullopt; + } + private: void activateChildImpl(size_t inactive_idx) { diff --git a/src/IO/Resource/FifoQueue.h b/src/IO/Resource/FifoQueue.h index f3ff15ad461..e95e817719a 100644 --- a/src/IO/Resource/FifoQueue.h +++ b/src/IO/Resource/FifoQueue.h @@ -30,6 +30,8 @@ public: bool equals(ISchedulerNode * other) override { + if (!ISchedulerNode::equals(other)) + return false; if (auto * o = dynamic_cast(other)) return true; return false; @@ -39,6 +41,7 @@ public: { std::unique_lock lock(mutex); request->enqueue_ns = clock_gettime_ns(); + queue_cost += request->cost; bool was_empty = requests.empty(); requests.push_back(request); if (was_empty) @@ -52,6 +55,11 @@ public: return {nullptr, false}; ResourceRequest * result = requests.front(); requests.pop_front(); + if (requests.empty()) + busy_periods++; + queue_cost -= result->cost; + dequeued_requests++; + dequeued_cost += result->cost; return {result, !requests.empty()}; } @@ -61,6 +69,11 @@ public: return !requests.empty(); } + size_t activeChildren() override + { + return 0; + } + void activateChild(ISchedulerNode *) override { assert(false); // queue cannot have children @@ -83,8 +96,15 @@ public: return nullptr; } + std::pair getQueueLengthAndCost() + { + std::unique_lock lock(mutex); + return {requests.size(), queue_cost}; + } + private: std::mutex mutex; + Int64 queue_cost = 0; std::deque requests; }; diff --git a/src/IO/Resource/PriorityPolicy.h b/src/IO/Resource/PriorityPolicy.h index 3c091dcc85a..86d4fadb9dc 100644 --- a/src/IO/Resource/PriorityPolicy.h +++ b/src/IO/Resource/PriorityPolicy.h @@ -42,6 +42,8 @@ public: bool equals(ISchedulerNode * other) override { + if (!ISchedulerNode::equals(other)) + return false; if (auto * o = dynamic_cast(other)) return true; return false; @@ -113,8 +115,12 @@ public: { std::pop_heap(items.begin(), items.end()); items.pop_back(); + if (items.empty()) + busy_periods++; } + dequeued_requests++; + dequeued_cost += request->cost; return {request, !items.empty()}; } @@ -123,6 +129,11 @@ public: return !items.empty(); } + size_t activeChildren() override + { + return items.size(); + } + void activateChild(ISchedulerNode * child) override { bool activate_parent = items.empty(); diff --git a/src/IO/Resource/SemaphoreConstraint.h b/src/IO/Resource/SemaphoreConstraint.h index 237e63eaddb..9c6ce43d6ea 100644 --- a/src/IO/Resource/SemaphoreConstraint.h +++ b/src/IO/Resource/SemaphoreConstraint.h @@ -27,6 +27,8 @@ public: bool equals(ISchedulerNode * other) override { + if (!ISchedulerNode::equals(other)) + return false; if (auto * o = dynamic_cast(other)) return max_requests == o->max_requests && max_cost == o->max_cost; return false; @@ -78,7 +80,10 @@ public: requests++; cost += request->cost; child_active = child_now_active; - + if (!active()) + busy_periods++; + dequeued_requests++; + dequeued_cost += request->cost; return {request, active()}; } @@ -113,6 +118,30 @@ public: return active(); } + size_t activeChildren() override + { + std::unique_lock lock(mutex); + return child_active; + } + + bool isSatisfied() override + { + std::unique_lock lock(mutex); + return satisfied(); + } + + std::pair getInflights() + { + std::unique_lock lock(mutex); + return {requests, cost}; + } + + std::pair getLimits() + { + std::unique_lock lock(mutex); + return {max_requests, max_cost}; + } + private: bool satisfied() const { diff --git a/src/IO/Resource/StaticResourceManager.h b/src/IO/Resource/StaticResourceManager.h index 066dbf4ebf8..5ec6a35750b 100644 --- a/src/IO/Resource/StaticResourceManager.h +++ b/src/IO/Resource/StaticResourceManager.h @@ -22,6 +22,11 @@ public: ClassifierPtr acquire(const String & classifier_name) override; + void forEachNode(VisitorFunc visitor) override + { + UNUSED(visitor); + } + private: struct Resource { diff --git a/src/IO/Resource/tests/gtest_resource_manager_hierarchical.cpp b/src/IO/Resource/tests/gtest_resource_manager_hierarchical.cpp index 43773559f03..949a1ee0264 100644 --- a/src/IO/Resource/tests/gtest_resource_manager_hierarchical.cpp +++ b/src/IO/Resource/tests/gtest_resource_manager_hierarchical.cpp @@ -24,10 +24,10 @@ TEST(IOResourceDynamicResourceManager, Smoke) fifo3 - + /fair/A /fair/B - +
)CONFIG"); @@ -71,11 +71,11 @@ TEST(IOResourceDynamicResourceManager, Fairness) fifo - + /fair/A /fair/B /fair/leader - + )CONFIG"); diff --git a/src/IO/Resource/tests/gtest_resource_manager_static.cpp b/src/IO/Resource/tests/gtest_resource_manager_static.cpp index 976eac41a49..9c5e86e9ffc 100644 --- a/src/IO/Resource/tests/gtest_resource_manager_static.cpp +++ b/src/IO/Resource/tests/gtest_resource_manager_static.cpp @@ -24,10 +24,10 @@ TEST(IOResourceStaticResourceManager, Smoke) 1 - + /prio/A /prio/B - + )CONFIG"); @@ -70,13 +70,13 @@ TEST(IOResourceStaticResourceManager, Prioritization) - + /prio/A /prio/B /prio/C /prio/D /prio/leader - + )CONFIG"); diff --git a/src/IO/ResourceBudget.h b/src/IO/ResourceBudget.h index 7f67f9cfc10..0adad45ba91 100644 --- a/src/IO/ResourceBudget.h +++ b/src/IO/ResourceBudget.h @@ -48,6 +48,11 @@ public: available.fetch_add(estimated_cost - real_cost); } + ResourceCost get() const + { + return available.load(); + } + private: std::atomic available = 0; // requested - consumed }; diff --git a/src/IO/ResourceRequest.h b/src/IO/ResourceRequest.h index 989349148cf..3d2230746f9 100644 --- a/src/IO/ResourceRequest.h +++ b/src/IO/ResourceRequest.h @@ -45,8 +45,7 @@ class ResourceRequest { public: /// Cost of request execution; should be filled before request enqueueing. - /// NOTE: If cost is not known in advance, credit model can be used: - /// NOTE: for the first request use 1 and + /// NOTE: If cost is not known in advance, ResourceBudget should be used (note that every ISchedulerQueue has it) ResourceCost cost; /// Request outcome diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 7e20b1a9e8f..4b6968f363a 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -13,15 +13,29 @@ #include #include +#include + #include #include #include #include #include +#include #include #include +#include + + +namespace ProfileEvents +{ + extern const Event S3WriteRequestsErrors; + extern const Event S3ReadRequestsErrors; + + extern const Event DiskS3WriteRequestsErrors; + extern const Event DiskS3ReadRequestsErrors; +} namespace DB { @@ -177,7 +191,7 @@ Client::Client( } } - LOG_TRACE(log, "API mode: {}", toString(api_mode)); + LOG_TRACE(log, "API mode of the S3 client: {}", api_mode); detect_region = provider_type == ProviderType::AWS && explicit_region == Aws::Region::AWS_GLOBAL; @@ -346,12 +360,14 @@ Model::HeadObjectOutcome Client::HeadObject(const HeadObjectRequest & request) c Model::ListObjectsV2Outcome Client::ListObjectsV2(const ListObjectsV2Request & request) const { - return doRequest(request, [this](const Model::ListObjectsV2Request & req) { return ListObjectsV2(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::ListObjectsV2Request & req) { return ListObjectsV2(req); }); } Model::ListObjectsOutcome Client::ListObjects(const ListObjectsRequest & request) const { - return doRequest(request, [this](const Model::ListObjectsRequest & req) { return ListObjects(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::ListObjectsRequest & req) { return ListObjects(req); }); } Model::GetObjectOutcome Client::GetObject(const GetObjectRequest & request) const @@ -361,19 +377,19 @@ Model::GetObjectOutcome Client::GetObject(const GetObjectRequest & request) cons Model::AbortMultipartUploadOutcome Client::AbortMultipartUpload(const AbortMultipartUploadRequest & request) const { - return doRequest( + return doRequestWithRetryNetworkErrors( request, [this](const Model::AbortMultipartUploadRequest & req) { return AbortMultipartUpload(req); }); } Model::CreateMultipartUploadOutcome Client::CreateMultipartUpload(const CreateMultipartUploadRequest & request) const { - return doRequest( + return doRequestWithRetryNetworkErrors( request, [this](const Model::CreateMultipartUploadRequest & req) { return CreateMultipartUpload(req); }); } Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(const CompleteMultipartUploadRequest & request) const { - auto outcome = doRequest( + auto outcome = doRequestWithRetryNetworkErrors( request, [this](const Model::CompleteMultipartUploadRequest & req) { return CompleteMultipartUpload(req); }); if (!outcome.IsSuccess() || provider_type != ProviderType::GCS) @@ -403,32 +419,38 @@ Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(const Comp Model::CopyObjectOutcome Client::CopyObject(const CopyObjectRequest & request) const { - return doRequest(request, [this](const Model::CopyObjectRequest & req) { return CopyObject(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::CopyObjectRequest & req) { return CopyObject(req); }); } Model::PutObjectOutcome Client::PutObject(const PutObjectRequest & request) const { - return doRequest(request, [this](const Model::PutObjectRequest & req) { return PutObject(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::PutObjectRequest & req) { return PutObject(req); }); } Model::UploadPartOutcome Client::UploadPart(const UploadPartRequest & request) const { - return doRequest(request, [this](const Model::UploadPartRequest & req) { return UploadPart(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::UploadPartRequest & req) { return UploadPart(req); }); } Model::UploadPartCopyOutcome Client::UploadPartCopy(const UploadPartCopyRequest & request) const { - return doRequest(request, [this](const Model::UploadPartCopyRequest & req) { return UploadPartCopy(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::UploadPartCopyRequest & req) { return UploadPartCopy(req); }); } Model::DeleteObjectOutcome Client::DeleteObject(const DeleteObjectRequest & request) const { - return doRequest(request, [this](const Model::DeleteObjectRequest & req) { return DeleteObject(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::DeleteObjectRequest & req) { return DeleteObject(req); }); } Model::DeleteObjectsOutcome Client::DeleteObjects(const DeleteObjectsRequest & request) const { - return doRequest(request, [this](const Model::DeleteObjectsRequest & req) { return DeleteObjects(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::DeleteObjectsRequest & req) { return DeleteObjects(req); }); } Client::ComposeObjectOutcome Client::ComposeObject(const ComposeObjectRequest & request) const @@ -457,7 +479,8 @@ Client::ComposeObjectOutcome Client::ComposeObject(const ComposeObjectRequest & return ComposeObjectOutcome(MakeRequest(req, endpointResolutionOutcome.GetResult(), Aws::Http::HttpMethod::HTTP_PUT)); }; - return doRequest(request, request_fn); + return doRequestWithRetryNetworkErrors( + request, request_fn); } template @@ -538,6 +561,65 @@ Client::doRequest(const RequestType & request, RequestFn request_fn) const throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, "Too many redirects"); } +template +std::invoke_result_t +Client::doRequestWithRetryNetworkErrors(const RequestType & request, RequestFn request_fn) const +{ + auto with_retries = [this, request_fn_ = std::move(request_fn)] (const RequestType & request_) + { + chassert(client_configuration.retryStrategy); + const Int64 max_attempts = client_configuration.retryStrategy->GetMaxAttempts(); + std::exception_ptr last_exception = nullptr; + for (Int64 attempt_no = 0; attempt_no < max_attempts; ++attempt_no) + { + try + { + /// S3 does retries network errors actually. + /// But it is matter when errors occur. + /// This code retries a specific case when + /// network error happens when XML document is being read from the response body. + /// Hence, the response body is a stream, network errors are possible at reading. + /// S3 doesn't retry them. + + /// Not all requests can be retried in that way. + /// Requests that read out response body to build the result are possible to retry. + /// Requests that expose the response stream as an answer are not retried with that code. E.g. GetObject. + return request_fn_(request_); + } + catch (Poco::Net::ConnectionResetException &) + { + + if constexpr (IsReadMethod) + { + if (client_configuration.for_disk_s3) + ProfileEvents::increment(ProfileEvents::DiskS3ReadRequestsErrors); + else + ProfileEvents::increment(ProfileEvents::S3ReadRequestsErrors); + } + else + { + if (client_configuration.for_disk_s3) + ProfileEvents::increment(ProfileEvents::DiskS3WriteRequestsErrors); + else + ProfileEvents::increment(ProfileEvents::S3WriteRequestsErrors); + } + + tryLogCurrentException(log, "Will retry"); + last_exception = std::current_exception(); + + auto error = Aws::Client::AWSError(Aws::Client::CoreErrors::NETWORK_CONNECTION, /*retry*/ true); + client_configuration.retryStrategy->CalculateDelayBeforeNextRetry(error, attempt_no); + continue; + } + } + + chassert(last_exception); + std::rethrow_exception(last_exception); + }; + + return doRequest(request, with_retries); +} + bool Client::supportsMultiPartCopy() const { return provider_type != ProviderType::GCS; @@ -782,16 +864,30 @@ PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT bool enable_s3_requests_logging, bool for_disk_s3, const ThrottlerPtr & get_request_throttler, - const ThrottlerPtr & put_request_throttler) + const ThrottlerPtr & put_request_throttler, + const String & protocol) { - return PocoHTTPClientConfiguration( + auto context = Context::getGlobalContextInstance(); + chassert(context); + auto proxy_configuration_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::protocolFromString(protocol), context->getConfigRef()); + + auto per_request_configuration = [=] () { return proxy_configuration_resolver->resolve(); }; + auto error_report = [=] (const DB::ProxyConfiguration & req) { proxy_configuration_resolver->errorReport(req); }; + + auto config = PocoHTTPClientConfiguration( + per_request_configuration, force_region, remote_host_filter, s3_max_redirects, enable_s3_requests_logging, for_disk_s3, get_request_throttler, - put_request_throttler); + put_request_throttler, + error_report); + + config.scheme = Aws::Http::SchemeMapper::FromString(protocol.c_str()); + + return config; } } diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index 8904c850553..a2be7ff5566 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -250,6 +250,10 @@ private: std::invoke_result_t doRequest(const RequestType & request, RequestFn request_fn) const; + template + std::invoke_result_t + doRequestWithRetryNetworkErrors(const RequestType & request, RequestFn request_fn) const; + void updateURIForBucket(const std::string & bucket, S3::URI new_uri) const; std::optional getURIFromError(const Aws::S3::S3Error & error) const; std::optional updateURIForBucketForHead(const std::string & bucket) const; @@ -310,7 +314,8 @@ public: bool enable_s3_requests_logging, bool for_disk_s3, const ThrottlerPtr & get_request_throttler, - const ThrottlerPtr & put_request_throttler); + const ThrottlerPtr & put_request_throttler, + const String & protocol = "https"); private: ClientFactory(); diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp index 84550d3f7c5..6ffd6fde43e 100644 --- a/src/IO/S3/Credentials.cpp +++ b/src/IO/S3/Credentials.cpp @@ -11,29 +11,32 @@ # include # include +# include +# include + # include # include -# include # include -# include # include +# include namespace DB { + namespace ErrorCodes { extern const int AWS_ERROR; } -} -namespace DB::S3 +namespace S3 { namespace { + bool areCredentialsEmptyOrExpired(const Aws::Auth::AWSCredentials & credentials, uint64_t expiration_window_seconds) { if (credentials.IsEmpty()) @@ -43,6 +46,8 @@ bool areCredentialsEmptyOrExpired(const Aws::Auth::AWSCredentials & credentials, return now >= credentials.GetExpiration() - std::chrono::seconds(expiration_window_seconds); } +const char SSO_CREDENTIALS_PROVIDER_LOG_TAG[] = "SSOCredentialsProvider"; + } AWSEC2MetadataClient::AWSEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration, const char * endpoint_) @@ -449,6 +454,139 @@ void AwsAuthSTSAssumeRoleWebIdentityCredentialsProvider::refreshIfExpired() Reload(); } + +SSOCredentialsProvider::SSOCredentialsProvider(DB::S3::PocoHTTPClientConfiguration aws_client_configuration_, uint64_t expiration_window_seconds_) + : profile_to_use(Aws::Auth::GetConfigProfileName()) + , aws_client_configuration(std::move(aws_client_configuration_)) + , expiration_window_seconds(expiration_window_seconds_) + , logger(&Poco::Logger::get(SSO_CREDENTIALS_PROVIDER_LOG_TAG)) +{ + LOG_INFO(logger, "Setting sso credentials provider to read config from {}", profile_to_use); +} + +Aws::Auth::AWSCredentials SSOCredentialsProvider::GetAWSCredentials() +{ + refreshIfExpired(); + Aws::Utils::Threading::ReaderLockGuard guard(m_reloadLock); + return credentials; +} + +void SSOCredentialsProvider::Reload() +{ + auto profile = Aws::Config::GetCachedConfigProfile(profile_to_use); + const auto access_token = [&] + { + // If we have an SSO Session set, use the refreshed token. + if (profile.IsSsoSessionSet()) + { + sso_region = profile.GetSsoSession().GetSsoRegion(); + auto token = bearer_token_provider.GetAWSBearerToken(); + expires_at = token.GetExpiration(); + return token.GetToken(); + } + + Aws::String hashed_start_url = Aws::Utils::HashingUtils::HexEncode(Aws::Utils::HashingUtils::CalculateSHA1(profile.GetSsoStartUrl())); + auto profile_directory = Aws::Auth::ProfileConfigFileAWSCredentialsProvider::GetProfileDirectory(); + Aws::StringStream ss_token; + ss_token << profile_directory; + ss_token << Aws::FileSystem::PATH_DELIM << "sso" << Aws::FileSystem::PATH_DELIM << "cache" << Aws::FileSystem::PATH_DELIM << hashed_start_url << ".json"; + auto sso_token_path = ss_token.str(); + LOG_INFO(logger, "Loading token from: {}", sso_token_path); + sso_region = profile.GetSsoRegion(); + return loadAccessTokenFile(sso_token_path); + }(); + + if (access_token.empty()) + { + LOG_TRACE(logger, "Access token for SSO not available"); + return; + } + if (expires_at < Aws::Utils::DateTime::Now()) + { + LOG_TRACE(logger, "Cached Token expired at {}", expires_at.ToGmtString(Aws::Utils::DateFormat::ISO_8601)); + return; + } + + Aws::Internal::SSOCredentialsClient::SSOGetRoleCredentialsRequest request; + request.m_ssoAccountId = profile.GetSsoAccountId(); + request.m_ssoRoleName = profile.GetSsoRoleName(); + request.m_accessToken = access_token; + + aws_client_configuration.scheme = Aws::Http::Scheme::HTTPS; + aws_client_configuration.region = sso_region; + LOG_TRACE(logger, "Passing config to client for region: {}", sso_region); + + Aws::Vector retryable_errors; + retryable_errors.push_back("TooManyRequestsException"); + + aws_client_configuration.retryStrategy = Aws::MakeShared(SSO_CREDENTIALS_PROVIDER_LOG_TAG, retryable_errors, /*maxRetries=*/3); + client = Aws::MakeUnique(SSO_CREDENTIALS_PROVIDER_LOG_TAG, aws_client_configuration); + + LOG_TRACE(logger, "Requesting credentials with AWS_ACCESS_KEY: {}", sso_account_id); + auto result = client->GetSSOCredentials(request); + LOG_TRACE(logger, "Successfully retrieved credentials with AWS_ACCESS_KEY: {}", result.creds.GetAWSAccessKeyId()); + + credentials = result.creds; +} + +void SSOCredentialsProvider::refreshIfExpired() +{ + Aws::Utils::Threading::ReaderLockGuard guard(m_reloadLock); + if (!areCredentialsEmptyOrExpired(credentials, expiration_window_seconds)) + return; + + guard.UpgradeToWriterLock(); + + if (!areCredentialsEmptyOrExpired(credentials, expiration_window_seconds)) // double-checked lock to avoid refreshing twice + return; + + Reload(); +} + +Aws::String SSOCredentialsProvider::loadAccessTokenFile(const Aws::String & sso_access_token_path) +{ + LOG_TRACE(logger, "Preparing to load token from: {}", sso_access_token_path); + + Aws::IFStream input_file(sso_access_token_path.c_str()); + + if (input_file) + { + LOG_TRACE(logger, "Reading content from token file: {}", sso_access_token_path); + + Aws::Utils::Json::JsonValue token_doc(input_file); + if (!token_doc.WasParseSuccessful()) + { + LOG_TRACE(logger, "Failed to parse token file: {}", sso_access_token_path); + return ""; + } + Aws::Utils::Json::JsonView token_view(token_doc); + Aws::String tmp_access_token, expiration_str; + tmp_access_token = token_view.GetString("accessToken"); + expiration_str = token_view.GetString("expiresAt"); + Aws::Utils::DateTime expiration(expiration_str, Aws::Utils::DateFormat::ISO_8601); + + LOG_TRACE(logger, "Token cache file contains accessToken [{}], expiration [{}]", tmp_access_token, expiration_str); + + if (tmp_access_token.empty() || !expiration.WasParseSuccessful()) + { + LOG_TRACE(logger, R"(The SSO session associated with this profile has expired or is otherwise invalid. To refresh this SSO session run aws sso login with the corresponding profile.)"); + LOG_TRACE( + logger, + "Token cache file failed because {}{}", + (tmp_access_token.empty() ? "AccessToken was empty " : ""), + (!expiration.WasParseSuccessful() ? "failed to parse expiration" : "")); + return ""; + } + expires_at = expiration; + return tmp_access_token; + } + else + { + LOG_TRACE(logger, "Unable to open token file on path: {}", sso_access_token_path); + return ""; + } +} + S3CredentialsProviderChain::S3CredentialsProviderChain( const DB::S3::PocoHTTPClientConfiguration & configuration, const Aws::Auth::AWSCredentials & credentials, @@ -494,6 +632,18 @@ S3CredentialsProviderChain::S3CredentialsProviderChain( AddProvider(std::make_shared()); + { + DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration( + configuration.region, + configuration.remote_host_filter, + configuration.s3_max_redirects, + configuration.enable_s3_requests_logging, + configuration.for_disk_s3, + configuration.get_request_throttler, + configuration.put_request_throttler); + AddProvider(std::make_shared( + std::move(aws_client_configuration), credentials_configuration.expiration_window_seconds)); + } /// ECS TaskRole Credentials only available when ENVIRONMENT VARIABLE is set. const auto relative_uri = Aws::Environment::GetEnv(AWS_ECS_CONTAINER_CREDENTIALS_RELATIVE_URI); @@ -532,13 +682,13 @@ S3CredentialsProviderChain::S3CredentialsProviderChain( configuration.enable_s3_requests_logging, configuration.for_disk_s3, configuration.get_request_throttler, - configuration.put_request_throttler); + configuration.put_request_throttler, + Aws::Http::SchemeMapper::ToString(Aws::Http::Scheme::HTTP)); /// See MakeDefaultHttpResourceClientConfiguration(). /// This is part of EC2 metadata client, but unfortunately it can't be accessed from outside /// of contrib/aws/aws-cpp-sdk-core/source/internal/AWSHttpResourceClient.cpp aws_client_configuration.maxConnections = 2; - aws_client_configuration.scheme = Aws::Http::Scheme::HTTP; /// Explicitly set the proxy settings to empty/zero to avoid relying on defaults that could potentially change /// in the future. @@ -569,4 +719,6 @@ S3CredentialsProviderChain::S3CredentialsProviderChain( } +} + #endif diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h index 324b750c683..0243e8e4986 100644 --- a/src/IO/S3/Credentials.h +++ b/src/IO/S3/Credentials.h @@ -8,6 +8,7 @@ # include # include # include +# include # include @@ -124,6 +125,39 @@ private: uint64_t expiration_window_seconds; }; +class SSOCredentialsProvider : public Aws::Auth::AWSCredentialsProvider +{ +public: + SSOCredentialsProvider(DB::S3::PocoHTTPClientConfiguration aws_client_configuration_, uint64_t expiration_window_seconds_); + + Aws::Auth::AWSCredentials GetAWSCredentials() override; + +private: + Aws::UniquePtr client; + Aws::Auth::AWSCredentials credentials; + + // Profile description variables + Aws::String profile_to_use; + + // The AWS account ID that temporary AWS credentials are resolved for. + Aws::String sso_account_id; + // The AWS region where the SSO directory for the given sso_start_url is hosted. + // This is independent of the general region configuration and MUST NOT be conflated. + Aws::String sso_region; + // The expiration time of the accessToken. + Aws::Utils::DateTime expires_at; + // The SSO Token Provider + Aws::Auth::SSOBearerTokenProvider bearer_token_provider; + + DB::S3::PocoHTTPClientConfiguration aws_client_configuration; + uint64_t expiration_window_seconds; + Poco::Logger * logger; + + void Reload() override; + void refreshIfExpired(); + Aws::String loadAccessTokenFile(const Aws::String & sso_access_token_path); +}; + struct CredentialsConfiguration { bool use_environment_credentials = false; diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 1a367a8199d..90327d4dc2e 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -85,20 +85,24 @@ namespace DB::S3 { PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( + std::function per_request_configuration_, const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, bool enable_s3_requests_logging_, bool for_disk_s3_, const ThrottlerPtr & get_request_throttler_, - const ThrottlerPtr & put_request_throttler_) - : force_region(force_region_) + const ThrottlerPtr & put_request_throttler_, + std::function error_report_) + : per_request_configuration(per_request_configuration_) + , force_region(force_region_) , remote_host_filter(remote_host_filter_) , s3_max_redirects(s3_max_redirects_) , enable_s3_requests_logging(enable_s3_requests_logging_) , for_disk_s3(for_disk_s3_) , get_request_throttler(get_request_throttler_) , put_request_throttler(put_request_throttler_) + , error_report(error_report_) { } @@ -258,12 +262,12 @@ void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricT void PocoHTTPClient::makeRequestInternal( Aws::Http::HttpRequest & request, std::shared_ptr & response, - Aws::Utils::RateLimits::RateLimiterInterface * readLimiter , + Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const { /// Most sessions in pool are already connected and it is not possible to set proxy host/port to a connected session. - const auto request_configuration = per_request_configuration(request); - if (http_connection_pool_size && request_configuration.proxy_host.empty()) + const auto request_configuration = per_request_configuration(); + if (http_connection_pool_size && request_configuration.host.empty()) makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); else makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); @@ -272,7 +276,7 @@ void PocoHTTPClient::makeRequestInternal( template void PocoHTTPClient::makeRequestInternalImpl( Aws::Http::HttpRequest & request, - const ClientConfigurationPerRequest & request_configuration, + const DB::ProxyConfiguration & request_configuration, std::shared_ptr & response, Aws::Utils::RateLimits::RateLimiterInterface *, Aws::Utils::RateLimits::RateLimiterInterface *) const @@ -327,7 +331,7 @@ void PocoHTTPClient::makeRequestInternalImpl( Poco::URI target_uri(uri); SessionPtr session; - if (!request_configuration.proxy_host.empty()) + if (!request_configuration.host.empty()) { if (enable_s3_requests_logging) LOG_TEST(log, "Due to reverse proxy host name ({}) won't be resolved on ClickHouse side", uri); @@ -339,12 +343,12 @@ void PocoHTTPClient::makeRequestInternalImpl( target_uri, timeouts, http_connection_pool_size, wait_on_pool_size_limit); else session = makeHTTPSession(target_uri, timeouts); - bool use_tunnel = request_configuration.proxy_scheme == Aws::Http::Scheme::HTTP && target_uri.getScheme() == "https"; + bool use_tunnel = request_configuration.protocol == DB::ProxyConfiguration::Protocol::HTTP && target_uri.getScheme() == "https"; session->setProxy( - request_configuration.proxy_host, - request_configuration.proxy_port, - Aws::Http::SchemeMapper::ToString(request_configuration.proxy_scheme), + request_configuration.host, + request_configuration.port, + DB::ProxyConfiguration::protocolToString(request_configuration.protocol), use_tunnel ); } @@ -536,7 +540,10 @@ void PocoHTTPClient::makeRequestInternalImpl( } catch (...) { - tryLogCurrentException(log, fmt::format("Failed to make request to: {}", uri)); + auto error_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ true); + error_message.text = fmt::format("Failed to make request to: {}: {}", uri, error_message.text); + LOG_INFO(log, error_message); + response->SetClientErrorType(Aws::Client::CoreErrors::NETWORK_CONNECTION); response->SetClientErrorMessage(getCurrentExceptionMessage(false)); diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 92d3d5c5747..d1ce148a707 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -34,16 +35,9 @@ namespace DB::S3 { class ClientFactory; -struct ClientConfigurationPerRequest -{ - Aws::Http::Scheme proxy_scheme = Aws::Http::Scheme::HTTPS; - String proxy_host; - unsigned proxy_port = 0; -}; - struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration { - std::function per_request_configuration = [] (const Aws::Http::HttpRequest &) { return ClientConfigurationPerRequest(); }; + std::function per_request_configuration; String force_region; const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; @@ -62,17 +56,19 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration void updateSchemeAndRegion(); - std::function error_report; + std::function error_report; private: PocoHTTPClientConfiguration( + std::function per_request_configuration_, const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, bool enable_s3_requests_logging_, bool for_disk_s3_, const ThrottlerPtr & get_request_throttler_, - const ThrottlerPtr & put_request_throttler_ + const ThrottlerPtr & put_request_throttler_, + std::function error_report_ ); /// Constructor of Aws::Client::ClientConfiguration must be called after AWS SDK initialization. @@ -165,7 +161,7 @@ private: template void makeRequestInternalImpl( Aws::Http::HttpRequest & request, - const ClientConfigurationPerRequest & per_request_configuration, + const DB::ProxyConfiguration & per_request_configuration, std::shared_ptr & response, Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const; @@ -174,8 +170,8 @@ protected: static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request); void addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount = 1) const; - std::function per_request_configuration; - std::function error_report; + std::function per_request_configuration; + std::function error_report; ConnectionTimeouts timeouts; const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; diff --git a/src/IO/S3/SessionAwareIOStream.h b/src/IO/S3/SessionAwareIOStream.h index f7e42f99f51..babe52545d1 100644 --- a/src/IO/S3/SessionAwareIOStream.h +++ b/src/IO/S3/SessionAwareIOStream.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB::S3 diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 9d3496377ce..34590df5397 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -52,9 +52,9 @@ URI::URI(const std::string & uri_) has_version_id = true; } - /// Poco::URI will ignore '?' when parsing the path, but if there is a vestionId in the http parameter, + /// Poco::URI will ignore '?' when parsing the path, but if there is a versionId in the http parameter, /// '?' can not be used as a wildcard, otherwise it will be ambiguous. - /// If no "vertionId" in the http parameter, '?' can be used as a wildcard. + /// If no "versionId" in the http parameter, '?' can be used as a wildcard. /// It is necessary to encode '?' to avoid deletion during parsing path. if (!has_version_id && uri_.find('?') != String::npos) { diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 2de2ccd0f9f..002b8dde566 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -53,6 +53,7 @@ namespace public: UploadHelper( const std::shared_ptr & client_ptr_, + const std::shared_ptr & client_with_long_timeout_ptr_, const String & dest_bucket_, const String & dest_key_, const S3Settings::RequestSettings & request_settings_, @@ -61,6 +62,7 @@ namespace bool for_disk_s3_, const Poco::Logger * log_) : client_ptr(client_ptr_) + , client_with_long_timeout_ptr(client_with_long_timeout_ptr_) , dest_bucket(dest_bucket_) , dest_key(dest_key_) , request_settings(request_settings_) @@ -76,6 +78,7 @@ namespace protected: std::shared_ptr client_ptr; + std::shared_ptr client_with_long_timeout_ptr; const String & dest_bucket; const String & dest_key; const S3Settings::RequestSettings & request_settings; @@ -176,7 +179,7 @@ namespace if (for_disk_s3) ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload); - auto outcome = client_ptr->CompleteMultipartUpload(request); + auto outcome = client_with_long_timeout_ptr->CompleteMultipartUpload(request); if (outcome.IsSuccess()) { @@ -430,13 +433,14 @@ namespace size_t offset_, size_t size_, const std::shared_ptr & client_ptr_, + const std::shared_ptr & client_with_long_timeout_ptr_, const String & dest_bucket_, const String & dest_key_, const S3Settings::RequestSettings & request_settings_, const std::optional> & object_metadata_, ThreadPoolCallbackRunner schedule_, bool for_disk_s3_) - : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyDataToS3File")) + : UploadHelper(client_ptr_, client_with_long_timeout_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyDataToS3File")) , create_read_buffer(create_read_buffer_) , offset(offset_) , size(size_) @@ -598,6 +602,7 @@ namespace public: CopyFileHelper( const std::shared_ptr & client_ptr_, + const std::shared_ptr & client_with_long_timeout_ptr_, const String & src_bucket_, const String & src_key_, size_t src_offset_, @@ -608,7 +613,7 @@ namespace const std::optional> & object_metadata_, ThreadPoolCallbackRunner schedule_, bool for_disk_s3_) - : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyS3File")) + : UploadHelper(client_ptr_, client_with_long_timeout_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyS3File")) , src_bucket(src_bucket_) , src_key(src_key_) , offset(src_offset_) @@ -669,7 +674,7 @@ namespace /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840 request.SetContentType("binary/octet-stream"); - client_ptr->setKMSHeaders(request); + client_with_long_timeout_ptr->setKMSHeaders(request); } void processCopyRequest(const S3::CopyObjectRequest & request) @@ -681,7 +686,7 @@ namespace if (for_disk_s3) ProfileEvents::increment(ProfileEvents::DiskS3CopyObject); - auto outcome = client_ptr->CopyObject(request); + auto outcome = client_with_long_timeout_ptr->CopyObject(request); if (outcome.IsSuccess()) { LOG_TRACE( @@ -706,6 +711,7 @@ namespace offset, size, client_ptr, + client_with_long_timeout_ptr, dest_bucket, dest_key, request_settings, @@ -779,11 +785,11 @@ namespace if (for_disk_s3) ProfileEvents::increment(ProfileEvents::DiskS3UploadPartCopy); - auto outcome = client_ptr->UploadPartCopy(req); + auto outcome = client_with_long_timeout_ptr->UploadPartCopy(req); if (!outcome.IsSuccess()) { abortMultipartUpload(); - throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType()); } return outcome.GetResult().GetCopyPartResult().GetETag(); @@ -797,6 +803,7 @@ void copyDataToS3File( size_t offset, size_t size, const std::shared_ptr & dest_s3_client, + const std::shared_ptr & dest_s3_client_with_long_timeout, const String & dest_bucket, const String & dest_key, const S3Settings::RequestSettings & settings, @@ -804,13 +811,14 @@ void copyDataToS3File( ThreadPoolCallbackRunner schedule, bool for_disk_s3) { - CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3}; + CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_s3_client_with_long_timeout, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3}; helper.performCopy(); } void copyS3File( const std::shared_ptr & s3_client, + const std::shared_ptr & s3_client_with_long_timeout, const String & src_bucket, const String & src_key, size_t src_offset, @@ -824,7 +832,7 @@ void copyS3File( { if (settings.allow_native_copy) { - CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3}; + CopyFileHelper helper{s3_client, s3_client_with_long_timeout, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3}; helper.performCopy(); } else @@ -833,7 +841,7 @@ void copyS3File( { return std::make_unique(s3_client, src_bucket, src_key, "", settings, Context::getGlobalContextInstance()->getReadSettings()); }; - copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3); + copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, s3_client_with_long_timeout, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3); } } diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h index 5d35e5ebe2d..3477f5a20ab 100644 --- a/src/IO/S3/copyS3File.h +++ b/src/IO/S3/copyS3File.h @@ -26,8 +26,14 @@ using CreateReadBuffer = std::function()>; /// has been disabled (with settings.allow_native_copy) or request failed /// because it is a known issue, it is fallbacks to read-write copy /// (copyDataToS3File()). +/// +/// s3_client_with_long_timeout (may be equal to s3_client) is used for native copy and +/// CompleteMultipartUpload requests. These requests need longer timeout because S3 servers often +/// block on them for multiple seconds without sending or receiving data from us (maybe the servers +/// are copying data internally, or maybe throttling, idk). void copyS3File( const std::shared_ptr & s3_client, + const std::shared_ptr & s3_client_with_long_timeout, const String & src_bucket, const String & src_key, size_t src_offset, @@ -49,6 +55,7 @@ void copyDataToS3File( size_t offset, size_t size, const std::shared_ptr & dest_s3_client, + const std::shared_ptr & dest_s3_client_with_long_timeout, const String & dest_bucket, const String & dest_key, const S3Settings::RequestSettings & settings, diff --git a/src/IO/S3/getObjectInfo.cpp b/src/IO/S3/getObjectInfo.cpp index c652f16ab20..88f79f8d8d5 100644 --- a/src/IO/S3/getObjectInfo.cpp +++ b/src/IO/S3/getObjectInfo.cpp @@ -85,7 +85,7 @@ ObjectInfo getObjectInfo( } else if (throw_on_error) { - throw DB::Exception(ErrorCodes::S3_ERROR, + throw S3Exception(error.GetErrorType(), "Failed to get object info: {}. HTTP response code: {}", error.GetMessage(), static_cast(error.GetResponseCode())); } diff --git a/src/IO/S3/getObjectInfo.h b/src/IO/S3/getObjectInfo.h index 06c7d386e43..a57d807644b 100644 --- a/src/IO/S3/getObjectInfo.h +++ b/src/IO/S3/getObjectInfo.h @@ -16,7 +16,7 @@ struct ObjectInfo size_t size = 0; time_t last_modification_time = 0; - std::map metadata; /// Set only if getObjectInfo() is called with `with_metadata = true`. + std::map metadata = {}; /// Set only if getObjectInfo() is called with `with_metadata = true`. }; ObjectInfo getObjectInfo( diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index 5731e9061d6..6d589bcedd5 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -26,9 +26,19 @@ #include #include #include +#include #include "TestPocoHTTPServer.h" +/* + * When all tests are executed together, `Context::getGlobalContextInstance()` is not null. Global context is used by + * ProxyResolvers to get proxy configuration (used by S3 clients). If global context does not have a valid ConfigRef, it relies on + * Poco::Util::Application::instance() to grab the config. However, at this point, the application is not yet initialized and + * `Poco::Util::Application::instance()` returns nullptr. This causes the test to fail. To fix this, we create a dummy application that takes + * care of initialization. + * */ +[[maybe_unused]] static Poco::Util::ServerApplication app; + class NoRetryStrategy : public Aws::Client::StandardRetryStrategy { @@ -125,7 +135,8 @@ void testServerSideEncryption( enable_s3_requests_logging, /* for_disk_s3 = */ false, /* get_request_throttler = */ {}, - /* put_request_throttler = */ {} + /* put_request_throttler = */ {}, + uri.uri.getScheme() ); client_configuration.endpointOverride = uri.endpoint; diff --git a/src/IO/SchedulerRoot.h b/src/IO/SchedulerRoot.h index f9af2099b8c..748632615bc 100644 --- a/src/IO/SchedulerRoot.h +++ b/src/IO/SchedulerRoot.h @@ -97,6 +97,8 @@ public: bool equals(ISchedulerNode * other) override { + if (!ISchedulerNode::equals(other)) + return false; if (auto * o = dynamic_cast(other)) return true; return false; @@ -156,6 +158,8 @@ public: else current = current->next; // Just move round-robin pointer + dequeued_requests++; + dequeued_cost += request->cost; return {request, current != nullptr}; } @@ -164,6 +168,11 @@ public: return current != nullptr; } + size_t activeChildren() override + { + return 0; + } + void activateChild(ISchedulerNode * child) override { activate(TResource::get(child->info)); @@ -205,6 +214,7 @@ private: value->next = nullptr; value->prev = nullptr; current = nullptr; + busy_periods++; return; } else // Just move current to next to avoid invalidation diff --git a/src/IO/SeekableReadBuffer.cpp b/src/IO/SeekableReadBuffer.cpp index b83e382db01..5d83f4e1b4a 100644 --- a/src/IO/SeekableReadBuffer.cpp +++ b/src/IO/SeekableReadBuffer.cpp @@ -64,7 +64,7 @@ std::unique_ptr wrapSeekableReadBufferPointer(SeekableReadBu return std::make_unique>(*ptr, SeekableReadBufferPtr{ptr}); } -size_t copyFromIStreamWithProgressCallback(std::istream & istr, char * to, size_t n, const std::function & progress_callback, bool * out_cancelled) +void copyFromIStreamWithProgressCallback(std::istream & istr, char * to, size_t n, const std::function & progress_callback, size_t * out_bytes_copied, bool * out_cancelled) { const size_t chunk = DBMS_DEFAULT_BUFFER_SIZE; if (out_cancelled) @@ -82,6 +82,7 @@ size_t copyFromIStreamWithProgressCallback(std::istream & istr, char * to, size_ bool cancelled = false; if (gcount && progress_callback) cancelled = progress_callback(copied); + *out_bytes_copied = copied; if (gcount != to_copy) { @@ -103,7 +104,7 @@ size_t copyFromIStreamWithProgressCallback(std::istream & istr, char * to, size_ } } - return copied; + *out_bytes_copied = copied; } } diff --git a/src/IO/SeekableReadBuffer.h b/src/IO/SeekableReadBuffer.h index 5770948be20..e21513e0ea2 100644 --- a/src/IO/SeekableReadBuffer.h +++ b/src/IO/SeekableReadBuffer.h @@ -98,6 +98,7 @@ std::unique_ptr wrapSeekableReadBufferReference(SeekableRead std::unique_ptr wrapSeekableReadBufferPointer(SeekableReadBufferPtr ptr); /// Helper for implementing readBigAt(). -size_t copyFromIStreamWithProgressCallback(std::istream & istr, char * to, size_t n, const std::function & progress_callback, bool * out_cancelled = nullptr); +/// Updates *out_bytes_copied after each call to the callback, as well as at the end. +void copyFromIStreamWithProgressCallback(std::istream & istr, char * to, size_t n, const std::function & progress_callback, size_t * out_bytes_copied, bool * out_cancelled = nullptr); } diff --git a/src/IO/StdStreamFromReadBuffer.h b/src/IO/StdStreamFromReadBuffer.h index eae939a28b5..ff327dc342e 100644 --- a/src/IO/StdStreamFromReadBuffer.h +++ b/src/IO/StdStreamFromReadBuffer.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include diff --git a/src/IO/SynchronousReader.cpp b/src/IO/SynchronousReader.cpp index e1c654e48a3..35dee762370 100644 --- a/src/IO/SynchronousReader.cpp +++ b/src/IO/SynchronousReader.cpp @@ -39,51 +39,56 @@ std::future SynchronousReader::submit(Request reque /// If size is zero, then read() cannot be distinguished from EOF assert(request.size); - int fd = assert_cast(*request.descriptor).fd; - #if defined(POSIX_FADV_WILLNEED) + int fd = assert_cast(*request.descriptor).fd; if (0 != posix_fadvise(fd, request.offset, request.size, POSIX_FADV_WILLNEED)) throwFromErrno("Cannot posix_fadvise", ErrorCodes::CANNOT_ADVISE); #endif - return std::async(std::launch::deferred, [fd, request] + return std::async(std::launch::deferred, [request, this] { - ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorRead); - Stopwatch watch(CLOCK_MONOTONIC); - - size_t bytes_read = 0; - while (!bytes_read) - { - ssize_t res = 0; - - { - CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; - res = ::pread(fd, request.buf, request.size, request.offset); - } - if (!res) - break; - - if (-1 == res && errno != EINTR) - { - ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadFailed); - throwFromErrno(fmt::format("Cannot read from file {}", fd), ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); - } - - if (res > 0) - bytes_read += res; - } - - ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read); - - /// It reports real time spent including the time spent while thread was preempted doing nothing. - /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables). - /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it - /// (NetlinkMetricsProvider has about 500K RPS). - watch.stop(); - ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); - - return Result{ .size = bytes_read, .offset = request.ignore }; + return execute(request); }); } +IAsynchronousReader::Result SynchronousReader::execute(Request request) +{ + ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorRead); + Stopwatch watch(CLOCK_MONOTONIC); + + int fd = assert_cast(*request.descriptor).fd; + size_t bytes_read = 0; + while (!bytes_read) + { + ssize_t res = 0; + + { + CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; + res = ::pread(fd, request.buf, request.size, request.offset); + } + if (!res) + break; + + if (-1 == res && errno != EINTR) + { + ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadFailed); + throwFromErrno(fmt::format("Cannot read from file {}", fd), ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); + } + + if (res > 0) + bytes_read += res; + } + + ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read); + + /// It reports real time spent including the time spent while thread was preempted doing nothing. + /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables). + /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it + /// (NetlinkMetricsProvider has about 500K RPS). + watch.stop(); + ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); + + return Result{ .size = bytes_read, .offset = request.ignore }; +} + } diff --git a/src/IO/SynchronousReader.h b/src/IO/SynchronousReader.h index 238d6e9371e..e6a91c98131 100644 --- a/src/IO/SynchronousReader.h +++ b/src/IO/SynchronousReader.h @@ -14,6 +14,8 @@ class SynchronousReader final : public IAsynchronousReader public: std::future submit(Request request) override; + Result execute(Request request) override; + void wait() override {} }; diff --git a/src/IO/UncompressedCache.h b/src/IO/UncompressedCache.h index 2e654b27ed7..702804cdda3 100644 --- a/src/IO/UncompressedCache.h +++ b/src/IO/UncompressedCache.h @@ -42,23 +42,17 @@ private: using Base = CacheBase; public: - explicit UncompressedCache(size_t max_size_in_bytes) - : Base(max_size_in_bytes) {} - - UncompressedCache(const String & uncompressed_cache_policy, size_t max_size_in_bytes) - : Base(uncompressed_cache_policy, max_size_in_bytes) {} + UncompressedCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio) + : Base(cache_policy, max_size_in_bytes, 0, size_ratio) {} /// Calculate key from path to file and offset. static UInt128 hash(const String & path_to_file, size_t offset) { - UInt128 key; - SipHash hash; hash.update(path_to_file.data(), path_to_file.size() + 1); hash.update(offset); - hash.get128(key); - return key; + return hash.get128(); } template diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index 2a2743e3407..8d10055a3df 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h index ea032302235..d29ca6d5c6c 100644 --- a/src/IO/WriteBuffer.h +++ b/src/IO/WriteBuffer.h @@ -2,7 +2,6 @@ #include #include -#include #include #include diff --git a/src/IO/WriteBufferFromHTTP.cpp b/src/IO/WriteBufferFromHTTP.cpp index 355c42a23c9..056b965266e 100644 --- a/src/IO/WriteBufferFromHTTP.cpp +++ b/src/IO/WriteBufferFromHTTP.cpp @@ -13,9 +13,10 @@ WriteBufferFromHTTP::WriteBufferFromHTTP( const std::string & content_encoding, const HTTPHeaderEntries & additional_headers, const ConnectionTimeouts & timeouts, - size_t buffer_size_) + size_t buffer_size_, + Poco::Net::HTTPClientSession::ProxyConfig proxy_configuration) : WriteBufferFromOStream(buffer_size_) - , session{makeHTTPSession(uri, timeouts)} + , session{makeHTTPSession(uri, timeouts, proxy_configuration)} , request{method, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1} { request.setHost(uri.getHost()); diff --git a/src/IO/WriteBufferFromHTTP.h b/src/IO/WriteBufferFromHTTP.h index ce5020dfa78..65dc10213dc 100644 --- a/src/IO/WriteBufferFromHTTP.h +++ b/src/IO/WriteBufferFromHTTP.h @@ -25,7 +25,8 @@ public: const std::string & content_encoding = "", const HTTPHeaderEntries & additional_headers = {}, const ConnectionTimeouts & timeouts = {}, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE); + size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, + Poco::Net::HTTPClientSession::ProxyConfig proxy_configuration = {}); private: /// Receives response from the server after sending all data. diff --git a/src/IO/WriteBufferFromOStream.h b/src/IO/WriteBufferFromOStream.h index 5a933739cb1..3f9d3ee3d92 100644 --- a/src/IO/WriteBufferFromOStream.h +++ b/src/IO/WriteBufferFromOStream.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/src/IO/WriteBufferValidUTF8.cpp b/src/IO/WriteBufferValidUTF8.cpp index b72bc627220..d611befac37 100644 --- a/src/IO/WriteBufferValidUTF8.cpp +++ b/src/IO/WriteBufferValidUTF8.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #ifdef __SSE2__ #include @@ -84,16 +85,13 @@ void WriteBufferValidUTF8::nextImpl() /// Fast skip of ASCII for aarch64. static constexpr size_t SIMD_BYTES = 16; const char * simd_end = p + (pos - p) / SIMD_BYTES * SIMD_BYTES; - /// Returns a 64 bit mask of nibbles (4 bits for each byte). - auto get_nibble_mask = [](uint8x16_t input) -> uint64_t - { return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(input), 4)), 0); }; /// Other options include /// vmaxvq_u8(input) < 0b10000000; /// Used by SIMDJSON, has latency 3 for M1, 6 for everything else /// SIMDJSON uses it for 64 byte masks, so it's a little different. /// vmaxvq_u32(vandq_u32(input, vdupq_n_u32(0x80808080))) // u32 version has latency 3 /// shrn version has universally <=3 cycles, on servers 2 cycles. - while (p < simd_end && get_nibble_mask(vcgeq_u8(vld1q_u8(reinterpret_cast(p)), vdupq_n_u8(0x80))) == 0) + while (p < simd_end && getNibbleMask(vcgeq_u8(vld1q_u8(reinterpret_cast(p)), vdupq_n_u8(0x80))) == 0) p += SIMD_BYTES; if (!(p < pos)) diff --git a/src/IO/WriteHelpers.cpp b/src/IO/WriteHelpers.cpp index 4f1a95181d4..34eabe55d7f 100644 --- a/src/IO/WriteHelpers.cpp +++ b/src/IO/WriteHelpers.cpp @@ -23,30 +23,23 @@ void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes) std::array formatUUID(const UUID & uuid) { std::array dst; - const auto * src_ptr = reinterpret_cast(&uuid); auto * dst_ptr = dst.data(); + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - const std::reverse_iterator src_it(src_ptr + 16); - formatHex(src_it + 8, dst_ptr, 4); - dst[8] = '-'; - formatHex(src_it + 12, dst_ptr + 9, 2); - dst[13] = '-'; - formatHex(src_it + 14, dst_ptr + 14, 2); - dst[18] = '-'; - formatHex(src_it, dst_ptr + 19, 2); - dst[23] = '-'; - formatHex(src_it + 2, dst_ptr + 24, 6); + const auto * src_ptr = reinterpret_cast(&uuid); + const std::reverse_iterator src(src_ptr + 16); #else - formatHex(src_ptr, dst_ptr, 4); - dst[8] = '-'; - formatHex(src_ptr + 4, dst_ptr + 9, 2); - dst[13] = '-'; - formatHex(src_ptr + 6, dst_ptr + 14, 2); - dst[18] = '-'; - formatHex(src_ptr + 8, dst_ptr + 19, 2); - dst[23] = '-'; - formatHex(src_ptr + 10, dst_ptr + 24, 6); + const auto * src = reinterpret_cast(&uuid); #endif + formatHex(src + 8, dst_ptr, 4); + dst[8] = '-'; + formatHex(src + 12, dst_ptr + 9, 2); + dst[13] = '-'; + formatHex(src + 14, dst_ptr + 14, 2); + dst[18] = '-'; + formatHex(src, dst_ptr + 19, 2); + dst[23] = '-'; + formatHex(src + 2, dst_ptr + 24, 6); return dst; } @@ -77,7 +70,7 @@ void writeIPv6Text(const IPv6 & ip, WriteBuffer & buf) void writeException(const Exception & e, WriteBuffer & buf, bool with_stack_trace) { - writeBinary(e.code(), buf); + writeBinaryLittleEndian(e.code(), buf); writeBinary(String(e.name()), buf); writeBinary(e.displayText() + getExtraExceptionInfo(e), buf); diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 76778543bd0..02a24aeb01f 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -88,6 +88,13 @@ inline void writePODBinary(const T & x, WriteBuffer & buf) buf.write(reinterpret_cast(&x), sizeof(x)); /// NOLINT } +inline void writeUUIDBinary(const UUID & x, WriteBuffer & buf) +{ + const auto & uuid = x.toUnderType(); + writePODBinary(uuid.items[0], buf); + writePODBinary(uuid.items[1], buf); +} + template inline void writeIntBinary(const T & x, WriteBuffer & buf) { @@ -304,9 +311,10 @@ inline void writeJSONString(const char * begin, const char * end, WriteBuffer & /** Will escape quote_character and a list of special characters('\b', '\f', '\n', '\r', '\t', '\0', '\\'). * - when escape_quote_with_quote is true, use backslash to escape list of special characters, * and use quote_character to escape quote_character. such as: 'hello''world' - * - otherwise use backslash to escape list of special characters and quote_character + * otherwise use backslash to escape list of special characters and quote_character + * - when escape_backslash_with_backslash is true, backslash is escaped with another backslash */ -template +template void writeAnyEscapedString(const char * begin, const char * end, WriteBuffer & buf) { const char * pos = begin; @@ -360,7 +368,8 @@ void writeAnyEscapedString(const char * begin, const char * end, WriteBuffer & b writeChar('0', buf); break; case '\\': - writeChar('\\', buf); + if constexpr (escape_backslash_with_backslash) + writeChar('\\', buf); writeChar('\\', buf); break; default: @@ -371,6 +380,146 @@ void writeAnyEscapedString(const char * begin, const char * end, WriteBuffer & b } } +/// Define special characters in Markdown according to the standards specified by CommonMark. +inline void writeAnyMarkdownEscapedString(const char * begin, const char * end, WriteBuffer & buf) +{ + for (const char * it = begin; it != end; ++it) + { + switch (*it) + { + case '!': + writeChar('\\', buf); + writeChar('!', buf); + break; + case '"': + writeChar('\\', buf); + writeChar('"', buf); + break; + case '#': + writeChar('\\', buf); + writeChar('#', buf); + break; + case '$': + writeChar('\\', buf); + writeChar('$', buf); + break; + case '%': + writeChar('\\', buf); + writeChar('%', buf); + break; + case '&': + writeChar('\\', buf); + writeChar('&', buf); + break; + case '\'': + writeChar('\\', buf); + writeChar('\'', buf); + break; + case '(': + writeChar('\\', buf); + writeChar('(', buf); + break; + case ')': + writeChar('\\', buf); + writeChar(')', buf); + break; + case '*': + writeChar('\\', buf); + writeChar('*', buf); + break; + case '+': + writeChar('\\', buf); + writeChar('+', buf); + break; + case ',': + writeChar('\\', buf); + writeChar(',', buf); + break; + case '-': + writeChar('\\', buf); + writeChar('-', buf); + break; + case '.': + writeChar('\\', buf); + writeChar('.', buf); + break; + case '/': + writeChar('\\', buf); + writeChar('/', buf); + break; + case ':': + writeChar('\\', buf); + writeChar(':', buf); + break; + case ';': + writeChar('\\', buf); + writeChar(';', buf); + break; + case '<': + writeChar('\\', buf); + writeChar('<', buf); + break; + case '=': + writeChar('\\', buf); + writeChar('=', buf); + break; + case '>': + writeChar('\\', buf); + writeChar('>', buf); + break; + case '?': + writeChar('\\', buf); + writeChar('?', buf); + break; + case '@': + writeChar('\\', buf); + writeChar('@', buf); + break; + case '[': + writeChar('\\', buf); + writeChar('[', buf); + break; + case '\\': + writeChar('\\', buf); + writeChar('\\', buf); + break; + case ']': + writeChar('\\', buf); + writeChar(']', buf); + break; + case '^': + writeChar('\\', buf); + writeChar('^', buf); + break; + case '_': + writeChar('\\', buf); + writeChar('_', buf); + break; + case '`': + writeChar('\\', buf); + writeChar('`', buf); + break; + case '{': + writeChar('\\', buf); + writeChar('{', buf); + break; + case '|': + writeChar('\\', buf); + writeChar('|', buf); + break; + case '}': + writeChar('\\', buf); + writeChar('}', buf); + break; + case '~': + writeChar('\\', buf); + writeChar('~', buf); + break; + default: + writeChar(*it, buf); + } + } +} inline void writeJSONString(std::string_view s, WriteBuffer & buf, const FormatSettings & settings) { @@ -435,6 +584,16 @@ inline void writeEscapedString(std::string_view ref, WriteBuffer & buf) writeEscapedString(ref.data(), ref.size(), buf); } +inline void writeMarkdownEscapedString(const char * str, size_t size, WriteBuffer & buf) +{ + writeAnyMarkdownEscapedString(str, str + size, buf); +} + +inline void writeMarkdownEscapedString(std::string_view ref, WriteBuffer & buf) +{ + writeMarkdownEscapedString(ref.data(), ref.size(), buf); +} + template void writeAnyQuotedString(const char * begin, const char * end, WriteBuffer & buf) { @@ -466,6 +625,13 @@ inline void writeQuotedString(std::string_view ref, WriteBuffer & buf) writeAnyQuotedString<'\''>(ref.data(), ref.data() + ref.size(), buf); } +inline void writeQuotedStringPostgreSQL(std::string_view ref, WriteBuffer & buf) +{ + writeChar('\'', buf); + writeAnyEscapedString<'\'', true, false>(ref.data(), ref.data() + ref.size(), buf); + writeChar('\'', buf); +} + inline void writeDoubleQuotedString(const String & s, WriteBuffer & buf) { writeAnyQuotedString<'"'>(s, buf); @@ -873,10 +1039,20 @@ inline void writeBinary(const Decimal128 & x, WriteBuffer & buf) { writePODBinar inline void writeBinary(const Decimal256 & x, WriteBuffer & buf) { writePODBinary(x.value, buf); } inline void writeBinary(const LocalDate & x, WriteBuffer & buf) { writePODBinary(x, buf); } inline void writeBinary(const LocalDateTime & x, WriteBuffer & buf) { writePODBinary(x, buf); } -inline void writeBinary(const UUID & x, WriteBuffer & buf) { writePODBinary(x, buf); } inline void writeBinary(const IPv4 & x, WriteBuffer & buf) { writePODBinary(x, buf); } inline void writeBinary(const IPv6 & x, WriteBuffer & buf) { writePODBinary(x, buf); } +inline void writeBinary(const UUID & x, WriteBuffer & buf) +{ + writeUUIDBinary(x, buf); +} + +inline void writeBinary(const CityHash_v1_0_2::uint128 & x, WriteBuffer & buf) +{ + writePODBinary(x.low64, buf); + writePODBinary(x.high64, buf); +} + inline void writeBinary(const StackTrace::FramePointers & x, WriteBuffer & buf) { writePODBinary(x, buf); } /// Methods for outputting the value in text form for a tab-separated format. @@ -905,26 +1081,26 @@ inline void writeText(const IPv4 & x, WriteBuffer & buf) { writeIPv4Text(x, buf) inline void writeText(const IPv6 & x, WriteBuffer & buf) { writeIPv6Text(x, buf); } template -void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros) +void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros, + bool fixed_fractional_length, UInt32 fractional_length) { /// If it's big integer, but the number of digits is small, /// use the implementation for smaller integers for more efficient arithmetic. - if constexpr (std::is_same_v) { if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } } @@ -932,24 +1108,36 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool { if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } } constexpr size_t max_digits = std::numeric_limits::digits10; assert(scale <= max_digits); + assert(fractional_length <= max_digits); + char buf[max_digits]; - memset(buf, '0', scale); + memset(buf, '0', std::max(scale, fractional_length)); T value = x; Int32 last_nonzero_pos = 0; - for (Int32 pos = scale - 1; pos >= 0; --pos) + + if (fixed_fractional_length && fractional_length < scale) + { + T new_value = value / DecimalUtils::scaleMultiplier(scale - fractional_length - 1); + auto round_carry = new_value % 10; + value = new_value / 10; + if (round_carry >= 5) + value += 1; + } + + for (Int32 pos = fixed_fractional_length ? std::min(scale - 1, fractional_length - 1) : scale - 1; pos >= 0; --pos) { auto remainder = value % 10; value /= 10; @@ -961,11 +1149,12 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool } writeChar('.', ostr); - ostr.write(buf, trailing_zeros ? scale : last_nonzero_pos + 1); + ostr.write(buf, fixed_fractional_length ? fractional_length : (trailing_zeros ? scale : last_nonzero_pos + 1)); } template -void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros) +void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros, + bool fixed_fractional_length = false, UInt32 fractional_length = 0) { T part = DecimalUtils::getWholePart(x, scale); @@ -976,7 +1165,7 @@ void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer writeIntText(part, ostr); - if (scale) + if (scale || (fixed_fractional_length && fractional_length > 0)) { part = DecimalUtils::getFractionalPart(x, scale); if (part || trailing_zeros) @@ -984,7 +1173,7 @@ void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer if (part < 0) part *= T(-1); - writeDecimalFractional(part, scale, ostr, trailing_zeros); + writeDecimalFractional(part, scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); } } } @@ -1148,6 +1337,15 @@ inline String toString(const T & x) return buf.str(); } +inline String toString(const CityHash_v1_0_2::uint128 & hash) +{ + WriteBufferFromOwnString buf; + writeText(hash.low64, buf); + writeChar('_', buf); + writeText(hash.high64, buf); + return buf.str(); +} + template inline String toStringWithFinalSeparator(const std::vector & x, const String & final_sep) { @@ -1177,7 +1375,7 @@ template inline void writeBinaryEndian(T x, WriteBuffer & buf) { transformEndianness(x); - writePODBinary(x, buf); + writeBinary(x, buf); } template diff --git a/src/IO/ZstdDeflatingWriteBuffer.cpp b/src/IO/ZstdDeflatingWriteBuffer.cpp index 8fec5c5fadb..83d8487e3e7 100644 --- a/src/IO/ZstdDeflatingWriteBuffer.cpp +++ b/src/IO/ZstdDeflatingWriteBuffer.cpp @@ -32,13 +32,8 @@ ZstdDeflatingWriteBuffer::ZstdDeflatingWriteBuffer( ZstdDeflatingWriteBuffer::~ZstdDeflatingWriteBuffer() = default; -void ZstdDeflatingWriteBuffer::nextImpl() +void ZstdDeflatingWriteBuffer::flush(ZSTD_EndDirective mode) { - if (!offset()) - return; - - ZSTD_EndDirective mode = ZSTD_e_flush; - input.src = reinterpret_cast(working_buffer.begin()); input.size = offset(); input.pos = 0; @@ -54,7 +49,6 @@ void ZstdDeflatingWriteBuffer::nextImpl() output.size = out->buffer().size(); output.pos = out->offset(); - size_t compression_result = ZSTD_compressStream2(cctx, &output, &input, mode); if (ZSTD_isError(compression_result)) throw Exception( @@ -78,24 +72,15 @@ void ZstdDeflatingWriteBuffer::nextImpl() } } +void ZstdDeflatingWriteBuffer::nextImpl() +{ + if (offset()) + flush(ZSTD_e_flush); +} + void ZstdDeflatingWriteBuffer::finalizeBefore() { - next(); - - out->nextIfAtEnd(); - - input.src = reinterpret_cast(working_buffer.begin()); - input.size = offset(); - input.pos = 0; - - output.dst = reinterpret_cast(out->buffer().begin()); - output.size = out->buffer().size(); - output.pos = out->offset(); - - size_t remaining = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end); - if (ZSTD_isError(remaining)) - throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder end failed: zstd version: {}", ZSTD_VERSION_STRING); - out->position() = out->buffer().begin() + output.pos; + flush(ZSTD_e_end); } void ZstdDeflatingWriteBuffer::finalizeAfter() diff --git a/src/IO/ZstdDeflatingWriteBuffer.h b/src/IO/ZstdDeflatingWriteBuffer.h index ba83c18d354..a66d6085a74 100644 --- a/src/IO/ZstdDeflatingWriteBuffer.h +++ b/src/IO/ZstdDeflatingWriteBuffer.h @@ -37,6 +37,8 @@ private: void finalizeBefore() override; void finalizeAfter() override; + void flush(ZSTD_EndDirective mode); + ZSTD_CCtx * cctx; ZSTD_inBuffer input; ZSTD_outBuffer output; diff --git a/src/IO/examples/read_buffer.cpp b/src/IO/examples/read_buffer.cpp index 85675c0d613..221da24715b 100644 --- a/src/IO/examples/read_buffer.cpp +++ b/src/IO/examples/read_buffer.cpp @@ -40,7 +40,7 @@ int readAndPrint(DB::ReadBuffer & in) int main(int, char **) { { - std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'"; + std::string s = "-123456 123.456 вася pe\\ttya\t'\\'xyz\\\\'"; DB::ReadBufferFromString in(s); if (readAndPrint(in)) std::cout << "readAndPrint from ReadBufferFromString failed" << std::endl; @@ -49,7 +49,7 @@ int main(int, char **) std::shared_ptr in; { - std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'"; + std::string s = "-123456 123.456 вася pe\\ttya\t'\\'xyz\\\\'"; in = std::make_shared(s); } if (readAndPrint(*in)) diff --git a/src/IO/examples/read_buffer_from_hdfs.cpp b/src/IO/examples/read_buffer_from_hdfs.cpp index da4e5298681..977dd2ae227 100644 --- a/src/IO/examples/read_buffer_from_hdfs.cpp +++ b/src/IO/examples/read_buffer_from_hdfs.cpp @@ -1,4 +1,3 @@ -#include #include #include #include diff --git a/src/IO/examples/write_buffer.cpp b/src/IO/examples/write_buffer.cpp index bca0be24b1a..999f9b1bb34 100644 --- a/src/IO/examples/write_buffer.cpp +++ b/src/IO/examples/write_buffer.cpp @@ -14,7 +14,7 @@ int main(int, char **) { DB::Int64 a = -123456; DB::Float64 b = 123.456; - DB::String c = "вася пе\tтя"; + DB::String c = "вася pe\ttya"; DB::String d = "'xyz\\"; std::stringstream s; // STYLE_CHECK_ALLOW_STD_STRING_STREAM diff --git a/src/IO/examples/write_buffer_perf.cpp b/src/IO/examples/write_buffer_perf.cpp index 0b3d0a61241..3f57ddb9a4f 100644 --- a/src/IO/examples/write_buffer_perf.cpp +++ b/src/IO/examples/write_buffer_perf.cpp @@ -14,7 +14,7 @@ int main(int, char **) { DB::Int64 a = -123456; DB::Float64 b = 123.456; - DB::String c = "вася пе\tтя"; + DB::String c = "вася pe\ttya"; DB::String d = "'xyz\\"; std::ofstream s("test"); diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index da4719b8dcb..feab9589c2e 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -238,8 +238,15 @@ ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & buf) ++num_copied_chars; } - auto res = fast_float::from_chars(tmp_buf, tmp_buf + num_copied_chars, x); - + fast_float::from_chars_result res; + if constexpr (std::endian::native == std::endian::little) + res = fast_float::from_chars(tmp_buf, tmp_buf + num_copied_chars, x); + else + { + Float64 x64 = 0.0; + res = fast_float::from_chars(tmp_buf, tmp_buf + num_copied_chars, x64); + x = static_cast(x64); + } if (unlikely(res.ec != std::errc())) { if constexpr (throw_exception) diff --git a/src/IO/tests/gtest_archive_reader_and_writer.cpp b/src/IO/tests/gtest_archive_reader_and_writer.cpp index 3bc9d670f05..b48955c25e7 100644 --- a/src/IO/tests/gtest_archive_reader_and_writer.cpp +++ b/src/IO/tests/gtest_archive_reader_and_writer.cpp @@ -1,6 +1,7 @@ #include #include "config.h" +#include #include #include #include @@ -19,11 +20,52 @@ namespace DB::ErrorCodes { extern const int CANNOT_UNPACK_ARCHIVE; + extern const int LOGICAL_ERROR; } namespace fs = std::filesystem; using namespace DB; +enum class ArchiveType : uint8_t +{ + Tar, + SevenZip +}; + +template +bool createArchiveWithFiles(const std::string & archivename, const std::map & files) +{ + struct archive * a; + struct archive_entry * entry; + + a = archive_write_new(); + + if constexpr (archive_type == ArchiveType::Tar) + archive_write_set_format_pax_restricted(a); + else if constexpr (archive_type == ArchiveType::SevenZip) + archive_write_set_format_7zip(a); + else + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Invalid archive type requested: {}", static_cast(archive_type)); + + archive_write_open_filename(a, archivename.c_str()); + + for (const auto & [filename, content] : files) { + entry = archive_entry_new(); + archive_entry_set_pathname(entry, filename.c_str()); + archive_entry_set_size(entry, content.size()); + archive_entry_set_mode(entry, S_IFREG | 0644); // regular file with rw-r--r-- permissions + archive_entry_set_mtime(entry, time(nullptr), 0); + archive_write_header(a, entry); + archive_write_data(a, content.c_str(), content.size()); + archive_entry_free(entry); + } + + archive_write_close(a); + archive_write_free(a); + + return true; + +} class ArchiveReaderAndWriterTest : public ::testing::TestWithParam { @@ -71,11 +113,11 @@ TEST_P(ArchiveReaderAndWriterTest, EmptyArchive) EXPECT_FALSE(reader->fileExists("nofile.txt")); - expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' not found", + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' was not found in archive", [&]{ reader->getFileInfo("nofile.txt"); }); - expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' not found", - [&]{ reader->readFile("nofile.txt"); }); + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' was not found in archive", + [&]{ reader->readFile("nofile.txt", /*throw_on_not_found=*/true); }); EXPECT_EQ(reader->firstFile(), nullptr); } @@ -103,7 +145,7 @@ TEST_P(ArchiveReaderAndWriterTest, SingleFileInArchive) EXPECT_GT(file_info.compressed_size, 0); { - auto in = reader->readFile("a.txt"); + auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true); String str; readStringUntilEOF(str, *in); EXPECT_EQ(str, contents); @@ -173,14 +215,14 @@ TEST_P(ArchiveReaderAndWriterTest, TwoFilesInArchive) EXPECT_EQ(reader->getFileInfo("b/c.txt").uncompressed_size, c_contents.size()); { - auto in = reader->readFile("a.txt"); + auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true); String str; readStringUntilEOF(str, *in); EXPECT_EQ(str, a_contents); } { - auto in = reader->readFile("b/c.txt"); + auto in = reader->readFile("b/c.txt", /*throw_on_not_found=*/true); String str; readStringUntilEOF(str, *in); EXPECT_EQ(str, c_contents); @@ -188,7 +230,7 @@ TEST_P(ArchiveReaderAndWriterTest, TwoFilesInArchive) { /// Read a.txt again. - auto in = reader->readFile("a.txt"); + auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true); String str; readStringUntilEOF(str, *in); EXPECT_EQ(str, a_contents); @@ -260,14 +302,14 @@ TEST_P(ArchiveReaderAndWriterTest, InMemory) EXPECT_EQ(reader->getFileInfo("b.txt").uncompressed_size, b_contents.size()); { - auto in = reader->readFile("a.txt"); + auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true); String str; readStringUntilEOF(str, *in); EXPECT_EQ(str, a_contents); } { - auto in = reader->readFile("b.txt"); + auto in = reader->readFile("b.txt", /*throw_on_not_found=*/true); String str; readStringUntilEOF(str, *in); EXPECT_EQ(str, b_contents); @@ -275,7 +317,7 @@ TEST_P(ArchiveReaderAndWriterTest, InMemory) { /// Read a.txt again. - auto in = reader->readFile("a.txt"); + auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true); String str; readStringUntilEOF(str, *in); EXPECT_EQ(str, a_contents); @@ -301,19 +343,19 @@ TEST_P(ArchiveReaderAndWriterTest, Password) /// Try to read without a password. expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Password is required", - [&]{ reader->readFile("a.txt"); }); + [&]{ reader->readFile("a.txt", /*throw_on_not_found=*/true); }); { /// Try to read with a wrong password. reader->setPassword("123Qwe"); expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Wrong password", - [&]{ reader->readFile("a.txt"); }); + [&]{ reader->readFile("a.txt", /*throw_on_not_found=*/true); }); } { /// Reading with the right password is successful. reader->setPassword("Qwe123"); - auto in = reader->readFile("a.txt"); + auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true); String str; readStringUntilEOF(str, *in); EXPECT_EQ(str, contents); @@ -327,6 +369,127 @@ TEST_P(ArchiveReaderAndWriterTest, ArchiveNotExist) [&]{ createArchiveReader(getPathToArchive()); }); } +TEST(TarArchiveReaderTest, FileExists) { + String archive_path = "archive.tar"; + String filename = "file.txt"; + String contents = "test"; + bool created = createArchiveWithFiles(archive_path, {{filename, contents}}); + EXPECT_EQ(created, true); + auto reader = createArchiveReader(archive_path); + EXPECT_EQ(reader->fileExists(filename), true); + fs::remove(archive_path); +} + +TEST(TarArchiveReaderTest, ReadFile) { + String archive_path = "archive.tar"; + String filename = "file.txt"; + String contents = "test"; + bool created = createArchiveWithFiles(archive_path, {{filename, contents}}); + EXPECT_EQ(created, true); + auto reader = createArchiveReader(archive_path); + auto in = reader->readFile(filename, /*throw_on_not_found=*/true); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, contents); + fs::remove(archive_path); +} + +TEST(TarArchiveReaderTest, ReadTwoFiles) { + String archive_path = "archive.tar"; + String file1 = "file1.txt"; + String contents1 = "test1"; + String file2 = "file2.txt"; + String contents2 = "test2"; + bool created = createArchiveWithFiles(archive_path, {{file1, contents1}, {file2, contents2}}); + EXPECT_EQ(created, true); + auto reader = createArchiveReader(archive_path); + EXPECT_EQ(reader->fileExists(file1), true); + EXPECT_EQ(reader->fileExists(file2), true); + auto in = reader->readFile(file1, /*throw_on_not_found=*/true); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, contents1); + in = reader->readFile(file2, /*throw_on_not_found=*/true); + + readStringUntilEOF(str, *in); + EXPECT_EQ(str, contents2); + fs::remove(archive_path); +} + + +TEST(TarArchiveReaderTest, CheckFileInfo) { + String archive_path = "archive.tar"; + String filename = "file.txt"; + String contents = "test"; + bool created = createArchiveWithFiles(archive_path, {{filename, contents}}); + EXPECT_EQ(created, true); + auto reader = createArchiveReader(archive_path); + auto info = reader->getFileInfo(filename); + EXPECT_EQ(info.uncompressed_size, contents.size()); + EXPECT_GT(info.compressed_size, 0); + fs::remove(archive_path); +} + +TEST(SevenZipArchiveReaderTest, FileExists) { + String archive_path = "archive.7z"; + String filename = "file.txt"; + String contents = "test"; + bool created = createArchiveWithFiles(archive_path, {{filename, contents}}); + EXPECT_EQ(created, true); + auto reader = createArchiveReader(archive_path); + EXPECT_EQ(reader->fileExists(filename), true); + fs::remove(archive_path); +} + +TEST(SevenZipArchiveReaderTest, ReadFile) { + String archive_path = "archive.7z"; + String filename = "file.txt"; + String contents = "test"; + bool created = createArchiveWithFiles(archive_path, {{filename, contents}}); + EXPECT_EQ(created, true); + auto reader = createArchiveReader(archive_path); + auto in = reader->readFile(filename, /*throw_on_not_found=*/true); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, contents); + fs::remove(archive_path); +} + +TEST(SevenZipArchiveReaderTest, CheckFileInfo) { + String archive_path = "archive.7z"; + String filename = "file.txt"; + String contents = "test"; + bool created = createArchiveWithFiles(archive_path, {{filename, contents}}); + EXPECT_EQ(created, true); + auto reader = createArchiveReader(archive_path); + auto info = reader->getFileInfo(filename); + EXPECT_EQ(info.uncompressed_size, contents.size()); + EXPECT_GT(info.compressed_size, 0); + fs::remove(archive_path); +} + +TEST(SevenZipArchiveReaderTest, ReadTwoFiles) { + String archive_path = "archive.7z"; + String file1 = "file1.txt"; + String contents1 = "test1"; + String file2 = "file2.txt"; + String contents2 = "test2"; + bool created = createArchiveWithFiles(archive_path, {{file1, contents1}, {file2, contents2}}); + EXPECT_EQ(created, true); + auto reader = createArchiveReader(archive_path); + EXPECT_EQ(reader->fileExists(file1), true); + EXPECT_EQ(reader->fileExists(file2), true); + auto in = reader->readFile(file1, /*throw_on_not_found=*/true); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, contents1); + in = reader->readFile(file2, /*throw_on_not_found=*/true); + + readStringUntilEOF(str, *in); + EXPECT_EQ(str, contents2); + fs::remove(archive_path); +} + #if USE_MINIZIP @@ -334,7 +497,7 @@ namespace { const char * supported_archive_file_exts[] = { - ".zip", + ".zip" }; } diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index e26b08e1bfd..d14893c4f3e 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -23,11 +23,21 @@ #include #include +#include +#include +#include +#include +#include +#include + +#include +#include +#include #include -#include #include + namespace DB { @@ -258,10 +268,22 @@ struct Client : DB::S3::Client ++counters.getObject; auto & bStore = store->GetBucketStore(request.GetBucket()); + const String data = bStore.objects[request.GetKey()]; + + size_t begin = 0; + size_t end = data.size() - 1; + + const String & range = request.GetRange(); + const String prefix = "bytes="; + if (range.starts_with(prefix)) + { + int ret = sscanf(range.c_str(), "bytes=%zu-%zu", &begin, &end); /// NOLINT + chassert(ret == 2); + } auto factory = request.GetResponseStreamFactory(); Aws::Utils::Stream::ResponseStream responseStream(factory); - responseStream.GetUnderlyingStream() << std::stringstream(bStore.objects[request.GetKey()]).rdbuf(); + responseStream.GetUnderlyingStream() << std::stringstream(data.substr(begin, end - begin + 1)).rdbuf(); Aws::AmazonWebServiceResult awsStream(std::move(responseStream), Aws::Http::HeaderValueCollection()); Aws::S3::Model::GetObjectResult getObjectResult(std::move(awsStream)); @@ -1148,4 +1170,108 @@ TEST_P(SyncAsync, StrictUploadPartSize) { } } +String fillStringWithPattern(String pattern, int n) +{ + String data; + for (int i = 0; i < n; ++i) + { + data += pattern; + } + return data; +} + +TEST_F(WBS3Test, ReadBeyondLastOffset) { + const String remote_file = "ReadBeyondLastOffset"; + + const String key = "1234567812345678"; + const String data = fillStringWithPattern("0123456789", 10); + + ReadSettings disk_read_settings; + disk_read_settings.enable_filesystem_cache = false; + disk_read_settings.local_fs_buffer_size = 70; + disk_read_settings.remote_fs_buffer_size = FileEncryption::Header::kSize + 60; + + { + /// write encrypted file + + FileEncryption::Header header; + header.algorithm = FileEncryption::Algorithm::AES_128_CTR; + header.key_fingerprint = FileEncryption::calculateKeyFingerprint(key); + header.init_vector = FileEncryption::InitVector::random(); + + auto wbs3 = getWriteBuffer(remote_file); + getAsyncPolicy().setAutoExecute(true); + + WriteBufferFromEncryptedFile wb(10, std::move(wbs3), key, header); + wb.write(data.data(), data.size()); + wb.finalize(); + } + + auto reader = std::make_unique(1, 1); + std::unique_ptr encrypted_read_buffer; + + { + /// create encrypted file reader + + auto cache_log = std::shared_ptr(); + const StoredObjects objects = { StoredObject(remote_file, data.size() + FileEncryption::Header::kSize) }; + auto async_read_counters = std::make_shared(); + auto prefetch_log = std::shared_ptr(); + + auto rb_creator = [this, disk_read_settings] (const std::string & path, size_t read_until_position) -> std::unique_ptr + { + S3Settings::RequestSettings request_settings; + return std::make_unique( + client, + bucket, + path, + "Latest", + request_settings, + disk_read_settings, + /* use_external_buffer */true, + /* offset */0, + read_until_position, + /* restricted_seek */true); + }; + + auto rb_remote_fs = std::make_unique( + std::move(rb_creator), + objects, + disk_read_settings, + cache_log, + true); + + auto rb_async = std::make_unique( + std::move(rb_remote_fs), *reader, disk_read_settings, async_read_counters, prefetch_log); + + /// read the header from the buffer + /// as a result AsynchronousBoundedReadBuffer consists some data from the file inside working buffer + FileEncryption::Header header; + header.read(*rb_async); + + ASSERT_EQ(rb_async->available(), disk_read_settings.remote_fs_buffer_size - FileEncryption::Header::kSize); + ASSERT_EQ(rb_async->getPosition(), FileEncryption::Header::kSize); + ASSERT_EQ(rb_async->getFileOffsetOfBufferEnd(), disk_read_settings.remote_fs_buffer_size); + + /// ReadBufferFromEncryptedFile is constructed over a ReadBuffer which was already in use. + /// The 'FileEncryption::Header' has been read from `rb_async`. + /// 'rb_async' will read the data from `rb_async` working buffer + encrypted_read_buffer = std::make_unique( + disk_read_settings.local_fs_buffer_size, std::move(rb_async), key, header); + } + + /// When header is read, file is read into working buffer till some position. Tn the test the file is read until remote_fs_buffer_size (124) position. + /// Set the right border before that position and make sure that encrypted_read_buffer does not have access to it + ASSERT_GT(disk_read_settings.remote_fs_buffer_size, 50); + encrypted_read_buffer->setReadUntilPosition(50); + + /// encrypted_read_buffer reads the data with buffer size `local_fs_buffer_size` + /// If the impl file has read the data beyond the ReadUntilPosition, encrypted_read_buffer does not read it + /// getFileOffsetOfBufferEnd should read data till `ReadUntilPosition` + String res; + readStringUntilEOF(res, *encrypted_read_buffer); + ASSERT_EQ(res, data.substr(0, 50)); + ASSERT_TRUE(encrypted_read_buffer->eof()); +} + #endif diff --git a/src/Interpreters/Access/InterpreterCreateQuotaQuery.cpp b/src/Interpreters/Access/InterpreterCreateQuotaQuery.cpp index 83a620d11c6..b62f3a8b0bd 100644 --- a/src/Interpreters/Access/InterpreterCreateQuotaQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateQuotaQuery.cpp @@ -1,19 +1,26 @@ #include -#include -#include + #include #include #include #include #include +#include +#include +#include #include #include -#include #include - +#include namespace DB { + +namespace ErrorCodes +{ + extern const int ACCESS_ENTITY_ALREADY_EXISTS; +} + namespace { void updateQuotaFromQueryImpl( @@ -76,20 +83,31 @@ namespace BlockIO InterpreterCreateQuotaQuery::execute() { - auto & query = query_ptr->as(); + const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + auto & query = updated_query_ptr->as(); + auto & access_control = getContext()->getAccessControl(); getContext()->checkAccess(query.alter ? AccessType::ALTER_QUOTA : AccessType::CREATE_QUOTA); if (!query.cluster.empty()) { query.replaceCurrentUserTag(getContext()->getUserName()); - return executeDDLQueryOnCluster(query_ptr, getContext()); + return executeDDLQueryOnCluster(updated_query_ptr, getContext()); } std::optional roles_from_query; if (query.roles) roles_from_query = RolesOrUsersSet{*query.roles, access_control, getContext()->getUserID()}; + IAccessStorage * storage = &access_control; + MultipleAccessStorage::StoragePtr storage_ptr; + + if (!query.storage_name.empty()) + { + storage_ptr = access_control.getStorageByName(query.storage_name); + storage = storage_ptr.get(); + } + if (query.alter) { auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr @@ -100,11 +118,11 @@ BlockIO InterpreterCreateQuotaQuery::execute() }; if (query.if_exists) { - auto ids = access_control.find(query.names); - access_control.tryUpdate(ids, update_func); + auto ids = storage->find(query.names); + storage->tryUpdate(ids, update_func); } else - access_control.update(access_control.getIDs(query.names), update_func); + storage->update(storage->getIDs(query.names), update_func); } else { @@ -116,12 +134,21 @@ BlockIO InterpreterCreateQuotaQuery::execute() new_quotas.emplace_back(std::move(new_quota)); } + if (!query.storage_name.empty()) + { + for (const auto & name : query.names) + { + if (auto another_storage_ptr = access_control.findExcludingStorage(AccessEntityType::QUOTA, name, storage_ptr)) + throw Exception(ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS, "Quota {} already exists in storage {}", name, another_storage_ptr->getStorageName()); + } + } + if (query.if_not_exists) - access_control.tryInsert(new_quotas); + storage->tryInsert(new_quotas); else if (query.or_replace) - access_control.insertOrReplace(new_quotas); + storage->insertOrReplace(new_quotas); else - access_control.insert(new_quotas); + storage->insert(new_quotas); } return {}; diff --git a/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp b/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp index 3386dfb8792..fef1f285c8b 100644 --- a/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp @@ -1,13 +1,21 @@ #include -#include + #include #include #include #include +#include +#include namespace DB { + +namespace ErrorCodes +{ + extern const int ACCESS_ENTITY_ALREADY_EXISTS; +} + namespace { void updateRoleFromQueryImpl( @@ -33,7 +41,9 @@ namespace BlockIO InterpreterCreateRoleQuery::execute() { - const auto & query = query_ptr->as(); + const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query_ptr->as(); + auto & access_control = getContext()->getAccessControl(); if (query.alter) getContext()->checkAccess(AccessType::ALTER_ROLE); @@ -46,11 +56,20 @@ BlockIO InterpreterCreateRoleQuery::execute() settings_from_query = SettingsProfileElements{*query.settings, access_control}; if (!query.attach) - getContext()->checkSettingsConstraints(*settings_from_query); + getContext()->checkSettingsConstraints(*settings_from_query, SettingSource::ROLE); } if (!query.cluster.empty()) - return executeDDLQueryOnCluster(query_ptr, getContext()); + return executeDDLQueryOnCluster(updated_query_ptr, getContext()); + + IAccessStorage * storage = &access_control; + MultipleAccessStorage::StoragePtr storage_ptr; + + if (!query.storage_name.empty()) + { + storage_ptr = access_control.getStorageByName(query.storage_name); + storage = storage_ptr.get(); + } if (query.alter) { @@ -62,11 +81,11 @@ BlockIO InterpreterCreateRoleQuery::execute() }; if (query.if_exists) { - auto ids = access_control.find(query.names); - access_control.tryUpdate(ids, update_func); + auto ids = storage->find(query.names); + storage->tryUpdate(ids, update_func); } else - access_control.update(access_control.getIDs(query.names), update_func); + storage->update(storage->getIDs(query.names), update_func); } else { @@ -78,12 +97,21 @@ BlockIO InterpreterCreateRoleQuery::execute() new_roles.emplace_back(std::move(new_role)); } + if (!query.storage_name.empty()) + { + for (const auto & name : query.names) + { + if (auto another_storage_ptr = access_control.findExcludingStorage(AccessEntityType::ROLE, name, storage_ptr)) + throw Exception(ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS, "Role {} already exists in storage {}", name, another_storage_ptr->getStorageName()); + } + } + if (query.if_not_exists) - access_control.tryInsert(new_roles); + storage->tryInsert(new_roles); else if (query.or_replace) - access_control.insertOrReplace(new_roles); + storage->insertOrReplace(new_roles); else - access_control.insert(new_roles); + storage->insert(new_roles); } return {}; diff --git a/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp b/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp index 87dc9039c17..e4593222f6d 100644 --- a/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp @@ -1,19 +1,27 @@ #include -#include -#include -#include -#include + #include #include #include #include #include #include +#include +#include +#include +#include +#include #include namespace DB { + +namespace ErrorCodes +{ + extern const int ACCESS_ENTITY_ALREADY_EXISTS; +} + namespace { void updateRowPolicyFromQueryImpl( @@ -45,7 +53,8 @@ namespace BlockIO InterpreterCreateRowPolicyQuery::execute() { - auto & query = query_ptr->as(); + const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + auto & query = updated_query_ptr->as(); auto required_access = getRequiredAccess(); if (!query.cluster.empty()) @@ -53,7 +62,7 @@ BlockIO InterpreterCreateRowPolicyQuery::execute() query.replaceCurrentUserTag(getContext()->getUserName()); DDLQueryOnClusterParams params; params.access_to_check = std::move(required_access); - return executeDDLQueryOnCluster(query_ptr, getContext(), params); + return executeDDLQueryOnCluster(updated_query_ptr, getContext(), params); } assert(query.names->cluster.empty()); @@ -66,6 +75,16 @@ BlockIO InterpreterCreateRowPolicyQuery::execute() if (query.roles) roles_from_query = RolesOrUsersSet{*query.roles, access_control, getContext()->getUserID()}; + IAccessStorage * storage = &access_control; + MultipleAccessStorage::StoragePtr storage_ptr; + + if (!query.storage_name.empty()) + { + storage_ptr = access_control.getStorageByName(query.storage_name); + storage = storage_ptr.get(); + } + + Strings names = query.names->toStrings(); if (query.alter) { auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr @@ -74,14 +93,13 @@ BlockIO InterpreterCreateRowPolicyQuery::execute() updateRowPolicyFromQueryImpl(*updated_policy, query, {}, roles_from_query); return updated_policy; }; - Strings names = query.names->toStrings(); if (query.if_exists) { - auto ids = access_control.find(names); - access_control.tryUpdate(ids, update_func); + auto ids = storage->find(names); + storage->tryUpdate(ids, update_func); } else - access_control.update(access_control.getIDs(names), update_func); + storage->update(storage->getIDs(names), update_func); } else { @@ -93,12 +111,21 @@ BlockIO InterpreterCreateRowPolicyQuery::execute() new_policies.emplace_back(std::move(new_policy)); } + if (!query.storage_name.empty()) + { + for (const auto & name : names) + { + if (auto another_storage_ptr = access_control.findExcludingStorage(AccessEntityType::ROW_POLICY, name, storage_ptr)) + throw Exception(ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS, "Row policy {} already exists in storage {}", name, another_storage_ptr->getStorageName()); + } + } + if (query.if_not_exists) - access_control.tryInsert(new_policies); + storage->tryInsert(new_policies); else if (query.or_replace) - access_control.insertOrReplace(new_policies); + storage->insertOrReplace(new_policies); else - access_control.insert(new_policies); + storage->insert(new_policies); } return {}; diff --git a/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp b/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp index 0727b6f2182..3a96c0a96ff 100644 --- a/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp @@ -1,15 +1,23 @@ #include -#include -#include + #include -#include #include +#include #include #include +#include +#include +#include namespace DB { + +namespace ErrorCodes +{ + extern const int ACCESS_ENTITY_ALREADY_EXISTS; +} + namespace { void updateSettingsProfileFromQueryImpl( @@ -41,7 +49,9 @@ namespace BlockIO InterpreterCreateSettingsProfileQuery::execute() { - auto & query = query_ptr->as(); + const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + auto & query = updated_query_ptr->as(); + auto & access_control = getContext()->getAccessControl(); if (query.alter) getContext()->checkAccess(AccessType::ALTER_SETTINGS_PROFILE); @@ -54,19 +64,29 @@ BlockIO InterpreterCreateSettingsProfileQuery::execute() settings_from_query = SettingsProfileElements{*query.settings, access_control}; if (!query.attach) - getContext()->checkSettingsConstraints(*settings_from_query); + getContext()->checkSettingsConstraints(*settings_from_query, SettingSource::PROFILE); } if (!query.cluster.empty()) { query.replaceCurrentUserTag(getContext()->getUserName()); - return executeDDLQueryOnCluster(query_ptr, getContext()); + return executeDDLQueryOnCluster(updated_query_ptr, getContext()); } std::optional roles_from_query; if (query.to_roles) roles_from_query = RolesOrUsersSet{*query.to_roles, access_control, getContext()->getUserID()}; + + IAccessStorage * storage = &access_control; + MultipleAccessStorage::StoragePtr storage_ptr; + + if (!query.storage_name.empty()) + { + storage_ptr = access_control.getStorageByName(query.storage_name); + storage = storage_ptr.get(); + } + if (query.alter) { auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr @@ -77,11 +97,11 @@ BlockIO InterpreterCreateSettingsProfileQuery::execute() }; if (query.if_exists) { - auto ids = access_control.find(query.names); - access_control.tryUpdate(ids, update_func); + auto ids = storage->find(query.names); + storage->tryUpdate(ids, update_func); } else - access_control.update(access_control.getIDs(query.names), update_func); + storage->update(storage->getIDs(query.names), update_func); } else { @@ -93,12 +113,21 @@ BlockIO InterpreterCreateSettingsProfileQuery::execute() new_profiles.emplace_back(std::move(new_profile)); } + if (!query.storage_name.empty()) + { + for (const auto & name : query.names) + { + if (auto another_storage_ptr = access_control.findExcludingStorage(AccessEntityType::SETTINGS_PROFILE, name, storage_ptr)) + throw Exception(ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS, "Settings profile {} already exists in storage {}", name, another_storage_ptr->getStorageName()); + } + } + if (query.if_not_exists) - access_control.tryInsert(new_profiles); + storage->tryInsert(new_profiles); else if (query.or_replace) - access_control.insertOrReplace(new_profiles); + storage->insertOrReplace(new_profiles); else - access_control.insert(new_profiles); + storage->insert(new_profiles); } return {}; diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 165937560cc..cd4565293ac 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -1,14 +1,18 @@ #include -#include -#include -#include -#include + #include #include +#include #include +#include #include #include #include +#include +#include +#include +#include +#include #include @@ -17,6 +21,7 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int ACCESS_ENTITY_ALREADY_EXISTS; } namespace { @@ -104,7 +109,9 @@ namespace BlockIO InterpreterCreateUserQuery::execute() { - const auto & query = query_ptr->as(); + const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query_ptr->as(); + auto & access_control = getContext()->getAccessControl(); auto access = getContext()->getAccess(); access->checkAccess(query.alter ? AccessType::ALTER_USER : AccessType::CREATE_USER); @@ -133,12 +140,22 @@ BlockIO InterpreterCreateUserQuery::execute() settings_from_query = SettingsProfileElements{*query.settings, access_control}; if (!query.attach) - getContext()->checkSettingsConstraints(*settings_from_query); + getContext()->checkSettingsConstraints(*settings_from_query, SettingSource::USER); } if (!query.cluster.empty()) - return executeDDLQueryOnCluster(query_ptr, getContext()); + return executeDDLQueryOnCluster(updated_query_ptr, getContext()); + IAccessStorage * storage = &access_control; + MultipleAccessStorage::StoragePtr storage_ptr; + + if (!query.storage_name.empty()) + { + storage_ptr = access_control.getStorageByName(query.storage_name); + storage = storage_ptr.get(); + } + + Strings names = query.names->toStrings(); if (query.alter) { std::optional grantees_from_query; @@ -152,14 +169,13 @@ BlockIO InterpreterCreateUserQuery::execute() return updated_user; }; - Strings names = query.names->toStrings(); if (query.if_exists) { - auto ids = access_control.find(names); - access_control.tryUpdate(ids, update_func); + auto ids = storage->find(names); + storage->tryUpdate(ids, update_func); } else - access_control.update(access_control.getIDs(names), update_func); + storage->update(storage->getIDs(names), update_func); } else { @@ -171,13 +187,22 @@ BlockIO InterpreterCreateUserQuery::execute() new_users.emplace_back(std::move(new_user)); } + if (!query.storage_name.empty()) + { + for (const auto & name : names) + { + if (auto another_storage_ptr = access_control.findExcludingStorage(AccessEntityType::USER, name, storage_ptr)) + throw Exception(ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS, "User {} already exists in storage {}", name, another_storage_ptr->getStorageName()); + } + } + std::vector ids; if (query.if_not_exists) - ids = access_control.tryInsert(new_users); + ids = storage->tryInsert(new_users); else if (query.or_replace) - ids = access_control.insertOrReplace(new_users); + ids = storage->insertOrReplace(new_users); else - ids = access_control.insert(new_users); + ids = storage->insert(new_users); if (query.grantees) { diff --git a/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp index d4c37064065..371ed248306 100644 --- a/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp @@ -1,11 +1,12 @@ #include -#include -#include + #include #include #include #include - +#include +#include +#include namespace DB { @@ -17,27 +18,37 @@ namespace ErrorCodes BlockIO InterpreterDropAccessEntityQuery::execute() { - auto & query = query_ptr->as(); + const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + auto & query = updated_query_ptr->as(); + auto & access_control = getContext()->getAccessControl(); getContext()->checkAccess(getRequiredAccess()); if (!query.cluster.empty()) - return executeDDLQueryOnCluster(query_ptr, getContext()); + return executeDDLQueryOnCluster(updated_query_ptr, getContext()); query.replaceEmptyDatabase(getContext()->getCurrentDatabase()); - auto do_drop = [&](const Strings & names) + auto do_drop = [&](const Strings & names, const String & storage_name) { + IAccessStorage * storage = &access_control; + MultipleAccessStorage::StoragePtr storage_ptr; + if (!storage_name.empty()) + { + storage_ptr = access_control.getStorageByName(storage_name); + storage = storage_ptr.get(); + } + if (query.if_exists) - access_control.tryRemove(access_control.find(query.type, names)); + storage->tryRemove(storage->find(query.type, names)); else - access_control.remove(access_control.getIDs(query.type, names)); + storage->remove(storage->getIDs(query.type, names)); }; if (query.type == AccessEntityType::ROW_POLICY) - do_drop(query.row_policy_names->toStrings()); + do_drop(query.row_policy_names->toStrings(), query.storage_name); else - do_drop(query.names); + do_drop(query.names, query.storage_name); return {}; } diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index 77474d68795..1a8268b9b1b 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -139,7 +139,7 @@ namespace /// For example, to execute /// GRANT ALL ON mydb.* TO role1 /// REVOKE ALL ON *.* FROM role1 - /// the current user needs to have grants only on the 'mydb' database. + /// the current user needs to have the grants only on the 'mydb' database. AccessRights all_granted_access; for (const auto & id : grantees_from_query) { diff --git a/src/Interpreters/Access/InterpreterMoveAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterMoveAccessEntityQuery.cpp new file mode 100644 index 00000000000..49e90783a59 --- /dev/null +++ b/src/Interpreters/Access/InterpreterMoveAccessEntityQuery.cpp @@ -0,0 +1,93 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int ACCESS_ENTITY_NOT_FOUND; +} + + +BlockIO InterpreterMoveAccessEntityQuery::execute() +{ + auto & query = query_ptr->as(); + auto & access_control = getContext()->getAccessControl(); + getContext()->checkAccess(getRequiredAccess()); + + if (!query.cluster.empty()) + return executeDDLQueryOnCluster(query_ptr, getContext()); + + query.replaceEmptyDatabase(getContext()->getCurrentDatabase()); + + std::vector ids; + if (query.type == AccessEntityType::ROW_POLICY) + ids = access_control.getIDs(query.type, query.row_policy_names->toStrings()); + else + ids = access_control.getIDs(query.type, query.names); + + /// Validate that all entities are from the same storage. + const auto source_storage = access_control.findStorage(ids.front()); + if (!source_storage->exists(ids)) + throw Exception(ErrorCodes::ACCESS_ENTITY_NOT_FOUND, "All access entities must be from the same storage in order to be moved"); + + access_control.moveAccessEntities(ids, source_storage->getStorageName(), query.storage_name); + return {}; +} + + +AccessRightsElements InterpreterMoveAccessEntityQuery::getRequiredAccess() const +{ + const auto & query = query_ptr->as(); + AccessRightsElements res; + switch (query.type) + { + case AccessEntityType::USER: + { + res.emplace_back(AccessType::DROP_USER); + res.emplace_back(AccessType::CREATE_USER); + return res; + } + case AccessEntityType::ROLE: + { + res.emplace_back(AccessType::DROP_ROLE); + res.emplace_back(AccessType::CREATE_ROLE); + return res; + } + case AccessEntityType::SETTINGS_PROFILE: + { + res.emplace_back(AccessType::DROP_SETTINGS_PROFILE); + res.emplace_back(AccessType::CREATE_SETTINGS_PROFILE); + return res; + } + case AccessEntityType::ROW_POLICY: + { + if (query.row_policy_names) + { + for (const auto & row_policy_name : query.row_policy_names->full_names) + { + res.emplace_back(AccessType::DROP_ROW_POLICY, row_policy_name.database, row_policy_name.table_name); + res.emplace_back(AccessType::CREATE_ROW_POLICY, row_policy_name.database, row_policy_name.table_name); + } + } + return res; + } + case AccessEntityType::QUOTA: + { + res.emplace_back(AccessType::DROP_QUOTA); + res.emplace_back(AccessType::CREATE_QUOTA); + return res; + } + case AccessEntityType::MAX: + break; + } + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{}: type is not supported by DROP query", toString(query.type)); +} + +} diff --git a/src/Interpreters/Access/InterpreterMoveAccessEntityQuery.h b/src/Interpreters/Access/InterpreterMoveAccessEntityQuery.h new file mode 100644 index 00000000000..1a70bf7c065 --- /dev/null +++ b/src/Interpreters/Access/InterpreterMoveAccessEntityQuery.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class AccessRightsElements; + +class InterpreterMoveAccessEntityQuery : public IInterpreter, WithMutableContext +{ +public: + InterpreterMoveAccessEntityQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) : WithMutableContext(context_), query_ptr(query_ptr_) {} + + BlockIO execute() override; + +private: + AccessRightsElements getRequiredAccess() const; + + ASTPtr query_ptr; +}; + +} diff --git a/src/Interpreters/ActionLocksManager.cpp b/src/Interpreters/ActionLocksManager.cpp index 7b57b8803cd..d7480d45524 100644 --- a/src/Interpreters/ActionLocksManager.cpp +++ b/src/Interpreters/ActionLocksManager.cpp @@ -16,6 +16,7 @@ namespace ActionLocks extern const StorageActionBlockType DistributedSend = 5; extern const StorageActionBlockType PartsTTLMerge = 6; extern const StorageActionBlockType PartsMove = 7; + extern const StorageActionBlockType PullReplicationLog = 8; } diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 232721666e7..337d948471f 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -2191,8 +2191,8 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( /// Replace predicate result to constant 1. Node node; node.type = ActionType::COLUMN; - node.result_name = std::move(predicate->result_name); - node.result_type = std::move(predicate->result_type); + node.result_name = predicate->result_name; + node.result_type = predicate->result_type; node.column = node.result_type->createColumnConst(0, 1); if (predicate->type != ActionType::INPUT) @@ -2506,6 +2506,96 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( return result_dag; } +ActionsDAG::NodeRawConstPtrs ActionsDAG::extractConjunctionAtoms(const Node * predicate) +{ + NodeRawConstPtrs atoms; + + std::stack stack; + stack.push(predicate); + + while (!stack.empty()) + { + const auto * node = stack.top(); + stack.pop(); + if (node->type == ActionsDAG::ActionType::FUNCTION) + { + const auto & name = node->function_base->getName(); + if (name == "and") + { + for (const auto * arg : node->children) + stack.push(arg); + + continue; + } + } + + atoms.push_back(node); + } + + return atoms; +} + +ActionsDAG::NodeRawConstPtrs ActionsDAG::filterNodesByAllowedInputs( + NodeRawConstPtrs nodes, + const std::unordered_set & allowed_inputs) +{ + size_t result_size = 0; + + std::unordered_map can_compute; + struct Frame + { + const ActionsDAG::Node * node; + size_t next_child_to_visit = 0; + bool can_compute_all_childern = true; + }; + + std::stack stack; + + for (const auto * node : nodes) + { + if (!can_compute.contains(node)) + stack.push({node}); + + while (!stack.empty()) + { + auto & frame = stack.top(); + bool need_visit_child = false; + while (frame.next_child_to_visit < frame.node->children.size()) + { + auto it = can_compute.find(frame.node->children[frame.next_child_to_visit]); + if (it == can_compute.end()) + { + stack.push({frame.node->children[frame.next_child_to_visit]}); + need_visit_child = true; + break; + } + + frame.can_compute_all_childern &= it->second; + ++frame.next_child_to_visit; + } + + if (need_visit_child) + continue; + + if (frame.node->type == ActionsDAG::ActionType::INPUT) + can_compute[frame.node] = allowed_inputs.contains(frame.node); + else + can_compute[frame.node] = frame.can_compute_all_childern; + + stack.pop(); + } + + if (can_compute.at(node)) + { + nodes[result_size] = node; + ++result_size; + } + } + + nodes.resize(result_size); + return nodes; +} + FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAGPtr & actions_) :actions(actions_) { diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 51edecf2bd6..3431daf99f2 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -384,6 +384,16 @@ public: const ContextPtr & context, bool single_output_condition_node = true); + /// Check if `predicate` is a combination of AND functions. + /// Returns a list of nodes representing atomic predicates. + static NodeRawConstPtrs extractConjunctionAtoms(const Node * predicate); + + /// Get a list of nodes. For every node, check if it can be compused using allowed subset of inputs. + /// Returns only those nodes from the list which can be computed. + static NodeRawConstPtrs filterNodesByAllowedInputs( + NodeRawConstPtrs nodes, + const std::unordered_set & allowed_inputs); + private: NodeRawConstPtrs getParents(const Node * target) const; diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index b769011e3d4..aa7ca274530 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -74,6 +74,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; + extern const int FUNCTION_CANNOT_HAVE_PARAMETERS; } static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols) @@ -93,38 +94,6 @@ static size_t getTypeDepth(const DataTypePtr & type) return 0; } -template -static bool decimalEqualsFloat(Field field, Float64 float_value) -{ - auto decimal_field = field.get>(); - auto decimal_to_float = DecimalUtils::convertTo(decimal_field.getValue(), decimal_field.getScale()); - return decimal_to_float == float_value; -} - -/// Applies stricter rules than convertFieldToType: -/// Doesn't allow : -/// - loss of precision converting to Decimal -static bool convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type, Field & result_value) -{ - result_value = convertFieldToType(from_value, to_type); - if (Field::isDecimal(from_value.getType()) && Field::isDecimal(result_value.getType())) - return applyVisitor(FieldVisitorAccurateEquals{}, from_value, result_value); - if (from_value.getType() == Field::Types::Float64 && Field::isDecimal(result_value.getType())) - { - /// Convert back to Float64 and compare - if (result_value.getType() == Field::Types::Decimal32) - return decimalEqualsFloat(result_value, from_value.get()); - if (result_value.getType() == Field::Types::Decimal64) - return decimalEqualsFloat(result_value, from_value.get()); - if (result_value.getType() == Field::Types::Decimal128) - return decimalEqualsFloat(result_value, from_value.get()); - if (result_value.getType() == Field::Types::Decimal256) - return decimalEqualsFloat(result_value, from_value.get()); - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown decimal type {}", result_value.getTypeName()); - } - return true; -} - /// The `convertFieldToTypeStrict` is used to prevent unexpected results in case of conversion with loss of precision. /// Example: `SELECT 33.3 :: Decimal(9, 1) AS a WHERE a IN (33.33 :: Decimal(9, 2))` /// 33.33 in the set is converted to 33.3, but it is not equal to 33.3 in the column, so the result should still be empty. @@ -145,11 +114,10 @@ static Block createBlockFromCollection(const Collection & collection, const Data { if (columns_num == 1) { - Field field; - bool is_conversion_ok = convertFieldToTypeStrict(value, *types[0], field); + auto field = convertFieldToTypeStrict(value, *types[0]); bool need_insert_null = transform_null_in && types[0]->isNullable(); - if (is_conversion_ok && (!field.isNull() || need_insert_null)) - columns[0]->insert(field); + if (field && (!field->isNull() || need_insert_null)) + columns[0]->insert(*field); } else { @@ -170,9 +138,10 @@ static Block createBlockFromCollection(const Collection & collection, const Data size_t i = 0; for (; i < tuple_size; ++i) { - bool is_conversion_ok = convertFieldToTypeStrict(tuple[i], *types[i], tuple_values[i]); - if (!is_conversion_ok) + auto converted_field = convertFieldToTypeStrict(tuple[i], *types[i]); + if (!converted_field) break; + tuple_values[i] = std::move(*converted_field); bool need_insert_null = transform_null_in && types[i]->isNullable(); if (tuple_values[i].isNull() && !need_insert_null) @@ -376,7 +345,7 @@ Block createBlockForSet( { auto get_tuple_type_from_ast = [context](const auto & func) -> DataTypePtr { - if (func && (func->name == "tuple" || func->name == "array") && !func->arguments->children.empty()) + if ((func->name == "tuple" || func->name == "array") && !func->arguments->children.empty()) { /// Won't parse all values of outer tuple. auto element = func->arguments->children.at(0); @@ -387,6 +356,7 @@ Block createBlockForSet( return evaluateConstantExpression(func, context).second; }; + assert(right_arg); const DataTypePtr & right_arg_type = get_tuple_type_from_ast(right_arg); size_t left_tuple_depth = getTypeDepth(left_arg_type); @@ -1107,6 +1077,10 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & e.addMessage("Or unknown aggregate function " + node.name + ". Maybe you meant: " + toString(hints)); throw; } + + /// Normal functions are not parametric for now. + if (node.parameters) + throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", node.name); } Names argument_names; @@ -1210,22 +1184,16 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & else if (data.is_create_parameterized_view && query_parameter) { const auto data_type = DataTypeFactory::instance().get(query_parameter->type); - /// Use getUniqueName() to allow multiple use of query parameter in the query: - /// - /// CREATE VIEW view AS - /// SELECT * - /// FROM system.one - /// WHERE dummy = {k1:Int}+1 OR dummy = {k1:Int}+2 - /// ^^ ^^ - /// - /// NOTE: query in the VIEW will not be modified this is needed - /// only during analysis for CREATE VIEW to avoid duplicated - /// column names. - ColumnWithTypeAndName column(data_type, data.getUniqueName("__" + query_parameter->getColumnName())); - data.addColumn(column); + /// During analysis for CREATE VIEW of a parameterized view, if parameter is + /// used multiple times, column is only added once + if (!data.hasColumn(query_parameter->name)) + { + ColumnWithTypeAndName column(data_type, query_parameter->name); + data.addColumn(column); + } argument_types.push_back(data_type); - argument_names.push_back(column.name); + argument_names.push_back(query_parameter->name); } else { diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h index 7ba9011f18b..722675f62ed 100644 --- a/src/Interpreters/AggregationCommon.h +++ b/src/Interpreters/AggregationCommon.h @@ -90,7 +90,10 @@ void fillFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const S /// Note: here we violate strict aliasing. /// It should be ok as log as we do not reffer to any value from `out` before filling. const char * source = static_cast(column)->getRawDataBegin(); - T * dest = reinterpret_cast(reinterpret_cast(out.data()) + offset); + size_t offset_to = offset; + if constexpr (std::endian::native == std::endian::big) + offset_to = sizeof(Key) - sizeof(T) - offset; + T * dest = reinterpret_cast(reinterpret_cast(out.data()) + offset_to); fillFixedBatch(num_rows, reinterpret_cast(source), dest); offset += sizeof(T); } @@ -253,15 +256,11 @@ static inline T ALWAYS_INLINE packFixed( static inline UInt128 ALWAYS_INLINE hash128( /// NOLINT size_t i, size_t keys_size, const ColumnRawPtrs & key_columns) { - UInt128 key; SipHash hash; - for (size_t j = 0; j < keys_size; ++j) key_columns[j]->updateHashWithValue(i, hash); - hash.get128(key); - - return key; + return hash.get128(); } /** Serialize keys into a continuous chunk of memory. diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index c7d4b87694b..23ee097ebff 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -175,7 +175,7 @@ public: private: CachePtr getHashTableStatsCache(const Params & params, const std::lock_guard &) { - if (!hash_table_stats || hash_table_stats->maxSize() != params.max_entries_for_hash_table_stats) + if (!hash_table_stats || hash_table_stats->maxSizeInBytes() != params.max_entries_for_hash_table_stats) hash_table_stats = std::make_shared(params.max_entries_for_hash_table_stats); return hash_table_stats; } @@ -694,8 +694,7 @@ void Aggregator::compileAggregateFunctionsIfNeeded() SipHash aggregate_functions_description_hash; aggregate_functions_description_hash.update(functions_description); - UInt128 aggregate_functions_description_hash_key; - aggregate_functions_description_hash.get128(aggregate_functions_description_hash_key); + const auto aggregate_functions_description_hash_key = aggregate_functions_description_hash.get128(); { std::lock_guard lock(mutex); @@ -984,6 +983,8 @@ void Aggregator::executeOnBlockSmall( } executeImpl(result, row_begin, row_end, key_columns, aggregate_instructions); + + CurrentMemoryTracker::check(); } void Aggregator::mergeOnBlockSmall( @@ -1023,6 +1024,8 @@ void Aggregator::mergeOnBlockSmall( #undef M else throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); + + CurrentMemoryTracker::check(); } void Aggregator::executeImpl( @@ -1383,11 +1386,8 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl( } -void NO_INLINE Aggregator::executeOnIntervalWithoutKeyImpl( - AggregatedDataVariants & data_variants, - size_t row_begin, - size_t row_end, - AggregateFunctionInstruction * aggregate_instructions) const +void NO_INLINE Aggregator::executeOnIntervalWithoutKey( + AggregatedDataVariants & data_variants, size_t row_begin, size_t row_end, AggregateFunctionInstruction * aggregate_instructions) const { /// `data_variants` will destroy the states of aggregate functions in the destructor data_variants.aggregator = this; @@ -1414,7 +1414,7 @@ void NO_INLINE Aggregator::executeOnIntervalWithoutKeyImpl( } } -void NO_INLINE Aggregator::mergeOnIntervalWithoutKeyImpl( +void NO_INLINE Aggregator::mergeOnIntervalWithoutKey( AggregatedDataVariants & data_variants, size_t row_begin, size_t row_end, @@ -2020,7 +2020,8 @@ template NO_INLINE Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t) const { - const size_t max_block_size = params.max_block_size; + /// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated + const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1; const bool final = true; ConvertToBlockRes res; @@ -2097,7 +2098,8 @@ template Aggregator::ConvertToBlockRes NO_INLINE Aggregator::convertToBlockImplNotFinal(Method & method, Table & data, Arenas & aggregates_pools, size_t) const { - const size_t max_block_size = params.max_block_size; + /// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated + const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1; const bool final = false; ConvertToBlockRes res; @@ -2270,6 +2272,29 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va return block; } +Block Aggregator::prepareBlockAndFillWithoutKeySnapshot(AggregatedDataVariants & data_variants) const +{ + size_t rows = 1; + bool final = true; + + auto && out_cols + = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), data_variants.aggregates_pools, final, rows); + auto && [key_columns, raw_key_columns, aggregate_columns, final_aggregate_columns, aggregate_columns_data] = out_cols; + + AggregatedDataWithoutKey & data = data_variants.without_key; + + /// Always single-thread. It's safe to pass current arena from 'aggregates_pool'. + for (size_t insert_i = 0; insert_i < params.aggregates_size; ++insert_i) + aggregate_functions[insert_i]->insertResultInto( + data + offsets_of_aggregate_states[insert_i], + *final_aggregate_columns[insert_i], + data_variants.aggregates_pool); + + Block block = finalizeBlock(params, getHeader(final), std::move(out_cols), final, rows); + + return block; +} + template Aggregator::ConvertToBlockRes Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const @@ -2477,48 +2502,21 @@ void NO_INLINE Aggregator::mergeDataNullKey( } } - template void NO_INLINE Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena) const { if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization) mergeDataNullKey(table_dst, table_src, arena); + PaddedPODArray dst_places; + PaddedPODArray src_places; + auto merge = [&](AggregateDataPtr & __restrict dst, AggregateDataPtr & __restrict src, bool inserted) { if (!inserted) { -#if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) - { - const auto & compiled_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions; - compiled_functions.merge_aggregate_states_function(dst, src); - - if (compiled_aggregate_functions_holder->compiled_aggregate_functions.functions_count != params.aggregates_size) - { - for (size_t i = 0; i < params.aggregates_size; ++i) - { - if (!is_aggregate_function_compiled[i]) - aggregate_functions[i]->merge( - dst + offsets_of_aggregate_states[i], src + offsets_of_aggregate_states[i], arena); - } - - for (size_t i = 0; i < params.aggregates_size; ++i) - { - if (!is_aggregate_function_compiled[i]) - aggregate_functions[i]->destroy(src + offsets_of_aggregate_states[i]); - } - } - } - else -#endif - { - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->merge(dst + offsets_of_aggregate_states[i], src + offsets_of_aggregate_states[i], arena); - - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->destroy(src + offsets_of_aggregate_states[i]); - } + dst_places.push_back(dst); + src_places.push_back(src); } else { @@ -2529,8 +2527,30 @@ void NO_INLINE Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, A }; table_src.template mergeToViaEmplace(table_dst, std::move(merge)); - table_src.clearAndShrink(); + +#if USE_EMBEDDED_COMPILER + if constexpr (use_compiled_functions) + { + const auto & compiled_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions; + compiled_functions.merge_aggregate_states_function(dst_places.data(), src_places.data(), dst_places.size()); + + for (size_t i = 0; i < params.aggregates_size; ++i) + { + if (!is_aggregate_function_compiled[i]) + aggregate_functions[i]->mergeAndDestroyBatch( + dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], arena); + } + + return; + } +#endif + + for (size_t i = 0; i < params.aggregates_size; ++i) + { + aggregate_functions[i]->mergeAndDestroyBatch( + dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], arena); + } } @@ -2601,6 +2621,20 @@ void NO_INLINE Aggregator::mergeWithoutKeyDataImpl( AggregatedDataVariantsPtr & res = non_empty_data[0]; + for (size_t i = 0; i < params.aggregates_size; ++i) + { + if (aggregate_functions[i]->isParallelizeMergePrepareNeeded()) + { + size_t size = non_empty_data.size(); + std::vector data_vec; + + for (size_t result_num = 0; result_num < size; ++result_num) + data_vec.emplace_back(non_empty_data[result_num]->without_key + offsets_of_aggregate_states[i]); + + aggregate_functions[i]->parallelizeMergePrepare(data_vec, thread_pool); + } + } + /// We merge all aggregation results to the first. for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) { @@ -2905,6 +2939,7 @@ void NO_INLINE Aggregator::mergeBlockWithoutKeyStreamsImpl( AggregateColumnsConstData aggregate_columns = params.makeAggregateColumnsData(block); mergeWithoutKeyStreamsImpl(result, 0, block.rows(), aggregate_columns); } + void NO_INLINE Aggregator::mergeWithoutKeyStreamsImpl( AggregatedDataVariants & result, size_t row_begin, @@ -3123,6 +3158,8 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari LOG_TRACE(log, "Merged partially aggregated single-level data."); } + + CurrentMemoryTracker::check(); } diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 05b34e8460f..30eebfba4ed 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -292,7 +292,7 @@ struct AggregationMethodStringNoCache { } - using State = ColumnsHashing::HashMethodString; + using State = ColumnsHashing::HashMethodString; static const bool low_cardinality_optimization = false; static const bool one_key_nullable_optimization = nullable; @@ -512,7 +512,10 @@ struct AggregationMethodKeysFixed else { size_t size = key_sizes[i]; - observed_column->insertData(reinterpret_cast(&key) + pos, size); + size_t offset_to = pos; + if constexpr (std::endian::native == std::endian::big) + offset_to = sizeof(Key) - size - pos; + observed_column->insertData(reinterpret_cast(&key) + offset_to, size); pos += size; } } @@ -1118,9 +1121,55 @@ public: AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block bool & no_more_keys) const; + /** This array serves two purposes. + * + * Function arguments are collected side by side, and they do not need to be collected from different places. Also the array is made zero-terminated. + * The inner loop (for the case without_key) is almost twice as compact; performance gain of about 30%. + */ + struct AggregateFunctionInstruction + { + const IAggregateFunction * that{}; + size_t state_offset{}; + const IColumn ** arguments{}; + const IAggregateFunction * batch_that{}; + const IColumn ** batch_arguments{}; + const UInt64 * offsets{}; + bool has_sparse_arguments = false; + }; + + /// Used for optimize_aggregation_in_order: + /// - No two-level aggregation + /// - No external aggregation + /// - No without_key support (it is implemented using executeOnIntervalWithoutKey()) + void executeOnBlockSmall( + AggregatedDataVariants & result, + size_t row_begin, + size_t row_end, + ColumnRawPtrs & key_columns, + AggregateFunctionInstruction * aggregate_instructions) const; + + void executeOnIntervalWithoutKey( + AggregatedDataVariants & data_variants, + size_t row_begin, + size_t row_end, + AggregateFunctionInstruction * aggregate_instructions) const; + /// Used for aggregate projection. bool mergeOnBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys) const; + void mergeOnBlockSmall( + AggregatedDataVariants & result, + size_t row_begin, + size_t row_end, + const AggregateColumnsConstData & aggregate_columns_data, + const ColumnRawPtrs & key_columns) const; + + void mergeOnIntervalWithoutKey( + AggregatedDataVariants & data_variants, + size_t row_begin, + size_t row_end, + const AggregateColumnsConstData & aggregate_columns_data) const; + /** Convert the aggregation data structure into a block. * If overflow_row = true, then aggregates for rows that are not included in max_rows_to_group_by are put in the first block. * @@ -1164,6 +1213,7 @@ private: friend class ConvertingAggregatedToChunksSource; friend class ConvertingAggregatedToChunksWithMergingSource; friend class AggregatingInOrderTransform; + friend class AggregatingPartialResultTransform; /// Data structure of source blocks. Block header; @@ -1178,22 +1228,6 @@ private: AggregateFunctionsPlainPtrs aggregate_functions; - /** This array serves two purposes. - * - * Function arguments are collected side by side, and they do not need to be collected from different places. Also the array is made zero-terminated. - * The inner loop (for the case without_key) is almost twice as compact; performance gain of about 30%. - */ - struct AggregateFunctionInstruction - { - const IAggregateFunction * that{}; - size_t state_offset{}; - const IColumn ** arguments{}; - const IAggregateFunction * batch_that{}; - const IColumn ** batch_arguments{}; - const UInt64 * offsets{}; - bool has_sparse_arguments = false; - }; - using AggregateFunctionInstructions = std::vector; using NestedColumnsHolder = std::vector>; @@ -1239,26 +1273,6 @@ private: */ void destroyAllAggregateStates(AggregatedDataVariants & result) const; - - /// Used for optimize_aggregation_in_order: - /// - No two-level aggregation - /// - No external aggregation - /// - No without_key support (it is implemented using executeOnIntervalWithoutKeyImpl()) - void executeOnBlockSmall( - AggregatedDataVariants & result, - size_t row_begin, - size_t row_end, - ColumnRawPtrs & key_columns, - AggregateFunctionInstruction * aggregate_instructions) const; - void mergeOnBlockSmall( - AggregatedDataVariants & result, - size_t row_begin, - size_t row_end, - const AggregateColumnsConstData & aggregate_columns_data, - const ColumnRawPtrs & key_columns) const; - - void mergeOnBlockImpl(Block block, AggregatedDataVariants & result, bool no_more_keys) const; - void executeImpl( AggregatedDataVariants & result, size_t row_begin, @@ -1300,17 +1314,6 @@ private: AggregateFunctionInstruction * aggregate_instructions, Arena * arena) const; - void executeOnIntervalWithoutKeyImpl( - AggregatedDataVariants & data_variants, - size_t row_begin, - size_t row_end, - AggregateFunctionInstruction * aggregate_instructions) const; - void mergeOnIntervalWithoutKeyImpl( - AggregatedDataVariants & data_variants, - size_t row_begin, - size_t row_end, - const AggregateColumnsConstData & aggregate_columns_data) const; - template void writeToTemporaryFileImpl( AggregatedDataVariants & data_variants, @@ -1392,6 +1395,7 @@ private: std::atomic * is_cancelled = nullptr) const; Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const; + Block prepareBlockAndFillWithoutKeySnapshot(AggregatedDataVariants & data_variants) const; BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, ThreadPool * thread_pool) const; template diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 8d0f18cc305..9e20ef803b3 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -105,9 +105,7 @@ UInt128 AsynchronousInsertQueue::InsertQuery::calculateHash() const applyVisitor(FieldVisitorHash(siphash), setting.getValue()); } - UInt128 res; - siphash.get128(res); - return res; + return siphash.get128(); } bool AsynchronousInsertQueue::InsertQuery::operator==(const InsertQuery & other) const @@ -149,9 +147,10 @@ void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr excep } } -AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_) +AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_, bool flush_on_shutdown_) : WithContext(context_) , pool_size(pool_size_) + , flush_on_shutdown(flush_on_shutdown_) , queue_shards(pool_size) , pool(CurrentMetrics::AsynchronousInsertThreads, CurrentMetrics::AsynchronousInsertThreadsActive, pool_size) { @@ -164,8 +163,6 @@ AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t poo AsynchronousInsertQueue::~AsynchronousInsertQueue() { - /// TODO: add a setting for graceful shutdown. - LOG_TRACE(log, "Shutting down the asynchronous insertion queue"); shutdown = true; @@ -177,17 +174,18 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue() assert(dump_by_first_update_threads[i].joinable()); dump_by_first_update_threads[i].join(); + if (flush_on_shutdown) + { + for (auto & [_, elem] : shard.queue) + scheduleDataProcessingJob(elem.key, std::move(elem.data), getContext()); + } + else { - std::lock_guard lock(shard.mutex); for (auto & [_, elem] : shard.queue) - { for (const auto & entry : elem.data->entries) - { entry->finish(std::make_exception_ptr(Exception( ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout exceeded)"))); - } - } } } @@ -232,7 +230,10 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) /// to avoid buffering of huge amount of data in memory. auto read_buf = getReadBufferFromASTInsertQuery(query); - LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, /* throw_exception */ false, /* exact_limit */ {}); + + LimitReadBuffer limit_buf( + *read_buf, settings.async_insert_max_data_size, + /*throw_exception=*/ false, /*exact_limit=*/ {}); WriteBufferFromString write_buf(bytes); copyData(limit_buf, write_buf); @@ -250,6 +251,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) return PushResult { .status = PushResult::TOO_MUCH_DATA, + .future = {}, .insert_data_buffer = std::make_unique(std::move(buffers)), }; } @@ -284,18 +286,19 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) assert(data); data->size_in_bytes += entry_data_size; - ++data->query_number; data->entries.emplace_back(entry); insert_future = entry->getFuture(); LOG_TRACE(log, "Have {} pending inserts with total {} bytes of data for query '{}'", data->entries.size(), data->size_in_bytes, key.query_str); + bool has_enough_bytes = data->size_in_bytes >= key.settings.async_insert_max_data_size; + bool has_enough_queries = data->entries.size() >= key.settings.async_insert_max_query_number && key.settings.async_insert_deduplicate; + /// Here we check whether we hit the limit on maximum data size in the buffer. /// And use setting from query context. /// It works, because queries with the same set of settings are already grouped together. - if (data->size_in_bytes >= key.settings.async_insert_max_data_size - || (data->query_number >= key.settings.async_insert_max_query_number && key.settings.async_insert_deduplicate)) + if (!flush_stopped && (has_enough_bytes || has_enough_queries)) { data_to_process = std::move(data); shard.iterators.erase(it); @@ -316,9 +319,55 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) { .status = PushResult::OK, .future = std::move(insert_future), + .insert_data_buffer = nullptr, }; } +void AsynchronousInsertQueue::flushAll() +{ + std::lock_guard flush_lock(flush_mutex); + + LOG_DEBUG(log, "Requested to flush asynchronous insert queue"); + + /// Disable background flushes to avoid adding new elements to the queue. + flush_stopped = true; + std::vector queues_to_flush(pool_size); + + for (size_t i = 0; i < pool_size; ++i) + { + std::lock_guard lock(queue_shards[i].mutex); + queues_to_flush[i] = std::move(queue_shards[i].queue); + queue_shards[i].iterators.clear(); + } + + size_t total_queries = 0; + size_t total_bytes = 0; + size_t total_entries = 0; + + for (auto & queue : queues_to_flush) + { + total_queries += queue.size(); + for (auto & [_, entry] : queue) + { + total_bytes += entry.data->size_in_bytes; + total_entries += entry.data->entries.size(); + scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext()); + } + } + + /// Note that jobs scheduled before the call of 'flushAll' are not counted here. + LOG_DEBUG(log, + "Will wait for finishing of {} flushing jobs (about {} inserts, {} bytes, {} distinct queries)", + pool.active(), total_entries, total_bytes, total_queries); + + /// Wait until all jobs are finished. That includes also jobs + /// that were scheduled before the call of 'flushAll'. + pool.wait(); + + LOG_DEBUG(log, "Finished flushing of asynchronous insert queue"); + flush_stopped = false; +} + void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num) { auto & shard = queue_shards[shard_num]; @@ -344,6 +393,9 @@ void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num) if (shutdown) return; + if (flush_stopped) + continue; + const auto now = std::chrono::steady_clock::now(); while (true) @@ -386,7 +438,7 @@ try elem.flush_query_id = flush_query_id; elem.exception = flush_exception; elem.status = flush_exception.empty() ? Status::Ok : Status::FlushError; - log.add(elem); + log.add(std::move(elem)); } } catch (...) @@ -408,7 +460,6 @@ try const auto * log = &Poco::Logger::get("AsynchronousInsertQueue"); const auto & insert_query = assert_cast(*key.query); auto insert_context = Context::createCopy(global_context); - DB::CurrentThread::QueryScope query_scope_holder(insert_context); bool internal = false; // To enable logging this query bool async_insert = true; @@ -430,6 +481,9 @@ try insert_context->setInitialQueryStartTime(query_start_time); insert_context->setCurrentQueryId(insert_query_id); insert_context->setInitialQueryId(insert_query_id); + + DB::CurrentThread::QueryScope query_scope_holder(insert_context); + size_t log_queries_cut_to_length = insert_context->getSettingsRef().log_queries_cut_to_length; String query_for_logging = insert_query.hasSecretParts() ? insert_query.formatForLogging(log_queries_cut_to_length) @@ -556,7 +610,7 @@ try if (!elem.exception.empty()) { elem.status = AsynchronousInsertLogElement::ParsingError; - insert_log->add(elem); + insert_log->add(std::move(elem)); } else { @@ -605,7 +659,7 @@ try total_rows, total_bytes, key.query_str); bool pulling_pipeline = false; - logQueryFinish(query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, internal); + logQueryFinish(query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal); } catch (...) { diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index 8530a453cd6..577752af45a 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -19,7 +19,7 @@ class AsynchronousInsertQueue : public WithContext public: using Milliseconds = std::chrono::milliseconds; - AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_); + AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_, bool flush_on_shutdown_); ~AsynchronousInsertQueue(); struct PushResult @@ -40,6 +40,8 @@ public: std::unique_ptr insert_data_buffer; }; + /// Force flush the whole queue. + void flushAll(); PushResult push(ASTPtr query, ContextPtr query_context); size_t getPoolSize() const { return pool_size; } @@ -100,9 +102,7 @@ private: using EntryPtr = std::shared_ptr; std::list entries; - size_t size_in_bytes = 0; - size_t query_number = 0; }; using InsertDataPtr = std::unique_ptr; @@ -130,6 +130,8 @@ private: }; const size_t pool_size; + const bool flush_on_shutdown; + std::vector queue_shards; /// Logic and events behind queue are as follows: @@ -141,6 +143,10 @@ private: /// (async_insert_max_data_size setting). If so, then again we dump the data. std::atomic shutdown{false}; + std::atomic flush_stopped{false}; + + /// A mutex that prevents concurrent forced flushes of queue. + mutable std::mutex flush_mutex; /// Dump the data only inside this pool. ThreadPool pool; diff --git a/src/Interpreters/BackupLog.cpp b/src/Interpreters/BackupLog.cpp new file mode 100644 index 00000000000..5e6c038ac5d --- /dev/null +++ b/src/Interpreters/BackupLog.cpp @@ -0,0 +1,61 @@ +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +BackupLogElement::BackupLogElement(BackupOperationInfo info_) + : event_time(std::chrono::system_clock::now()) + , event_time_usec(timeInMicroseconds(event_time)) + , info(std::move(info_)) +{ +} + +NamesAndTypesList BackupLogElement::getNamesAndTypes() +{ + return + { + {"event_date", std::make_shared()}, + {"event_time_microseconds", std::make_shared(6)}, + {"id", std::make_shared()}, + {"name", std::make_shared()}, + {"status", std::make_shared(getBackupStatusEnumValues())}, + {"error", std::make_shared()}, + {"start_time", std::make_shared()}, + {"end_time", std::make_shared()}, + {"num_files", std::make_shared()}, + {"total_size", std::make_shared()}, + {"num_entries", std::make_shared()}, + {"uncompressed_size", std::make_shared()}, + {"compressed_size", std::make_shared()}, + {"files_read", std::make_shared()}, + {"bytes_read", std::make_shared()}, + }; +} + +void BackupLogElement::appendToBlock(MutableColumns & columns) const +{ + size_t i = 0; + columns[i++]->insert(DateLUT::instance().toDayNum(std::chrono::system_clock::to_time_t(event_time)).toUnderType()); + columns[i++]->insert(event_time_usec); + columns[i++]->insert(info.id); + columns[i++]->insert(info.name); + columns[i++]->insert(static_cast(info.status)); + columns[i++]->insert(info.error_message); + columns[i++]->insert(static_cast(std::chrono::system_clock::to_time_t(info.start_time))); + columns[i++]->insert(static_cast(std::chrono::system_clock::to_time_t(info.end_time))); + columns[i++]->insert(info.num_files); + columns[i++]->insert(info.total_size); + columns[i++]->insert(info.num_entries); + columns[i++]->insert(info.uncompressed_size); + columns[i++]->insert(info.compressed_size); + columns[i++]->insert(info.num_read_files); + columns[i++]->insert(info.num_read_bytes); +} + +} diff --git a/src/Interpreters/BackupLog.h b/src/Interpreters/BackupLog.h new file mode 100644 index 00000000000..283b74f68ba --- /dev/null +++ b/src/Interpreters/BackupLog.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +/** A struct which will be inserted as row into backup_log table. + * Contains a record about backup or restore operation. + */ +struct BackupLogElement +{ + BackupLogElement() = default; + BackupLogElement(BackupOperationInfo info_); + BackupLogElement(const BackupLogElement &) = default; + BackupLogElement & operator=(const BackupLogElement &) = default; + BackupLogElement(BackupLogElement &&) = default; + BackupLogElement & operator=(BackupLogElement &&) = default; + + std::chrono::system_clock::time_point event_time{}; + Decimal64 event_time_usec{}; + BackupOperationInfo info{}; + + static std::string name() { return "BackupLog"; } + static NamesAndTypesList getNamesAndTypes(); + static NamesAndAliases getNamesAndAliases() { return {}; } + void appendToBlock(MutableColumns & columns) const; + static const char * getCustomColumnList() { return nullptr; } +}; + +class BackupLog : public SystemLog +{ + using SystemLog::SystemLog; + +public: + static const char * getDefaultOrderBy() { return "event_date, event_time_microseconds"; } +}; + +} diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 42cc7b80a66..01ee788cc74 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -9,8 +9,6 @@ #include #include #include -#include -#include #include #include @@ -21,8 +19,11 @@ namespace fs = std::filesystem; namespace ProfileEvents { + extern const Event FilesystemCacheLoadMetadataMicroseconds; extern const Event FilesystemCacheEvictedBytes; extern const Event FilesystemCacheEvictedFileSegments; + extern const Event FilesystemCacheEvictionSkippedFileSegments; + extern const Event FilesystemCacheEvictionTries; extern const Event FilesystemCacheLockCacheMicroseconds; extern const Event FilesystemCacheReserveMicroseconds; extern const Event FilesystemCacheEvictMicroseconds; @@ -42,6 +43,7 @@ size_t roundUpToMultiple(size_t num, size_t multiple) { return roundDownToMultiple(num + multiple - 1, multiple); } + } namespace DB @@ -51,13 +53,13 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -FileCache::FileCache(const FileCacheSettings & settings) +FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & settings) : max_file_segment_size(settings.max_file_segment_size) - , bypass_cache_threshold(settings.enable_bypass_cache_with_threashold ? settings.bypass_cache_threashold : 0) - , delayed_cleanup_interval_ms(settings.delayed_cleanup_interval_ms) + , bypass_cache_threshold(settings.enable_bypass_cache_with_threshold ? settings.bypass_cache_threshold : 0) , boundary_alignment(settings.boundary_alignment) , background_download_threads(settings.background_download_threads) - , log(&Poco::Logger::get("FileCache")) + , metadata_download_threads(settings.load_metadata_threads) + , log(&Poco::Logger::get("FileCache(" + cache_name + ")")) , metadata(settings.base_path) { main_priority = std::make_unique(settings.max_size, settings.max_elements); @@ -134,9 +136,7 @@ void FileCache::initialize() for (size_t i = 0; i < background_download_threads; ++i) download_threads.emplace_back([this] { metadata.downloadThreadFunc(); }); - cleanup_task = Context::getGlobalContextInstance()->getSchedulePool().createTask("FileCacheCleanup", [this]{ cleanupThreadFunc(); }); - cleanup_task->activate(); - cleanup_task->scheduleAfter(delayed_cleanup_interval_ms); + cleanup_thread = std::make_unique(std::function{ [this]{ metadata.cleanupThreadFunc(); }}); } CacheGuard::Lock FileCache::lockCache() const @@ -168,41 +168,6 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment: if (!file_segment_metadata.evicting()) { file_segment = file_segment_metadata.file_segment; - if (file_segment->isDownloaded()) - { - if (file_segment->getDownloadedSize(true) == 0) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Cannot have zero size downloaded file segments. {}", - file_segment->getInfoForLog()); - } - -#ifndef NDEBUG - /** - * Check that in-memory state of the cache is consistent with the state on disk. - * Check only in debug build, because such checks can be done often and can be quite - * expensive compared to overall query execution time. - */ - - fs::path path = file_segment->getPathInLocalCache(); - if (!fs::exists(path)) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "File path does not exist, but file has DOWNLOADED state. {}", - file_segment->getInfoForLog()); - } - - if (fs::file_size(path) == 0) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Cannot have zero size downloaded file segments. {}", - file_segment->getInfoForLog()); - } -#endif - } } else { @@ -554,40 +519,21 @@ KeyMetadata::iterator FileCache::addFileSegment( result_state = state; } - PriorityIterator cache_it; - if (state == FileSegment::State::DOWNLOADED) + auto file_segment = std::make_shared(key, offset, size, result_state, settings, this, locked_key.getKeyMetadata()); + auto file_segment_metadata = std::make_shared(std::move(file_segment)); + + auto [file_segment_metadata_it, inserted] = locked_key.getKeyMetadata()->emplace(offset, file_segment_metadata); + if (!inserted) { - cache_it = main_priority->add(locked_key.getKeyMetadata(), offset, size, *lock); + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Failed to insert {}:{}: entry already exists", key, offset); } - try - { - auto file_segment = std::make_shared( - key, offset, size, result_state, settings, this, locked_key.getKeyMetadata(), cache_it); - auto file_segment_metadata = std::make_shared(std::move(file_segment)); - - auto [file_segment_metadata_it, inserted] = locked_key.getKeyMetadata()->emplace(offset, file_segment_metadata); - if (!inserted) - { - if (cache_it) - cache_it->remove(*lock); - - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Failed to insert {}:{}: entry already exists", key, offset); - } - - return file_segment_metadata_it; - } - catch (...) - { - if (cache_it) - cache_it->remove(*lock); - throw; - } + return file_segment_metadata_it; } -bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) +bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCacheReserveStat & reserve_stat) { ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheReserveMicroseconds); @@ -653,6 +599,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) { chassert(segment_metadata->file_segment->assertCorrectness()); + auto & stat_by_kind = reserve_stat.stat_by_kind[segment_metadata->file_segment->getKind()]; if (segment_metadata->releasable()) { const auto & key = segment_metadata->file_segment->key(); @@ -661,9 +608,20 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) it = to_delete.emplace(key, locked_key.getKeyMetadata()).first; it->second.add(segment_metadata); + stat_by_kind.releasable_size += segment_metadata->size(); + ++stat_by_kind.releasable_count; + freeable_space += segment_metadata->size(); ++freeable_count; } + else + { + stat_by_kind.non_releasable_size += segment_metadata->size(); + ++stat_by_kind.non_releasable_count; + + ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionSkippedFileSegments); + } + return PriorityIterationResult::CONTINUE; }; @@ -677,6 +635,8 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) if (is_query_priority_overflow()) { + ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionTries); + query_priority->iterate( [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) { return is_query_priority_overflow() ? iterate_func(locked_key, segment_metadata) : PriorityIterationResult::BREAK; }, @@ -718,8 +678,14 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) return is_overflow; }; + /// If we have enough space in query_priority, we are not interested about stat there anymore. + /// Clean the stat before iterating main_priority to avoid calculating any segment stat twice. + reserve_stat.stat_by_kind.clear(); + if (is_main_priority_overflow()) { + ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionTries); + main_priority->iterate( [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) { return is_main_priority_overflow() ? iterate_func(locked_key, segment_metadata) : PriorityIterationResult::BREAK; }, @@ -806,19 +772,23 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) return true; } +void FileCache::removeKey(const Key & key) +{ + assertInitialized(); + metadata.removeKey(key, /* if_exists */false, /* if_releasable */true); +} + void FileCache::removeKeyIfExists(const Key & key) { assertInitialized(); + metadata.removeKey(key, /* if_exists */true, /* if_releasable */true); +} - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::RETURN_NULL); - if (!locked_key) - return; - - /// In ordinary case we remove data from cache when it's not used by anyone. - /// But if we have multiple replicated zero-copy tables on the same server - /// it became possible to start removing something from cache when it is used - /// by other "zero-copy" tables. That is why it's not an error. - locked_key->removeAllReleasable(); +void FileCache::removeFileSegment(const Key & key, size_t offset) +{ + assertInitialized(); + auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); + locked_key->removeFileSegment(offset); } void FileCache::removePathIfExists(const String & path) @@ -829,23 +799,12 @@ void FileCache::removePathIfExists(const String & path) void FileCache::removeAllReleasable() { assertInitialized(); - - auto lock = lockCache(); - - main_priority->iterate([&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) - { - if (segment_metadata->releasable()) - { - auto file_segment = segment_metadata->file_segment; - locked_key.removeFileSegment(file_segment->offset(), file_segment->lock()); - return PriorityIterationResult::REMOVE_AND_CONTINUE; - } - return PriorityIterationResult::CONTINUE; - }, lock); + metadata.removeAllKeys(/* if_releasable */true); if (stash) { /// Remove all access information. + auto lock = lockCache(); stash->records.clear(); stash->queue->removeAll(lock); } @@ -853,13 +812,8 @@ void FileCache::removeAllReleasable() void FileCache::loadMetadata() { - auto lock = lockCache(); + ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheLoadMetadataMicroseconds); - UInt64 offset = 0; - size_t size = 0; - std::vector>> queue_entries; - - /// cache_base_path / key_prefix / key / offset if (!metadata.empty()) { throw Exception( @@ -869,148 +823,236 @@ void FileCache::loadMetadata() "Please, check log for error messages"); } - size_t total_size = 0; - for (auto key_prefix_it = fs::directory_iterator{metadata.getBaseDirectory()}; key_prefix_it != fs::directory_iterator(); - key_prefix_it++) + loadMetadataImpl(); + + /// Shuffle file_segment_metadatas to have random order in LRUQueue + /// as at startup all file_segment_metadatas have the same priority. + main_priority->shuffle(lockCache()); +} + +void FileCache::loadMetadataImpl() +{ + auto get_keys_dir_to_process = [ + &, key_prefix_it = fs::directory_iterator{metadata.getBaseDirectory()}, get_key_mutex = std::mutex()] + () mutable -> std::optional { - const fs::path key_prefix_directory = key_prefix_it->path(); - - if (!key_prefix_it->is_directory()) + std::lock_guard lk(get_key_mutex); + while (true) { - if (key_prefix_directory.filename() != "status") + if (key_prefix_it == fs::directory_iterator()) + return std::nullopt; + + auto path = key_prefix_it->path(); + if (key_prefix_it->is_directory()) { - LOG_WARNING( - log, "Unexpected file {} (not a directory), will skip it", - key_prefix_directory.string()); + key_prefix_it++; + return path; } + + if (key_prefix_it->path().filename() != "status") + { + LOG_WARNING(log, "Unexpected file {} (not a directory), will skip it", path.string()); + } + key_prefix_it++; + } + }; + + std::vector loading_threads; + std::exception_ptr first_exception; + std::mutex set_exception_mutex; + std::atomic stop_loading = false; + + LOG_INFO(log, "Loading filesystem cache with {} threads", metadata_download_threads); + + for (size_t i = 0; i < metadata_download_threads; ++i) + { + try + { + loading_threads.emplace_back([&] + { + while (!stop_loading) + { + try + { + auto path = get_keys_dir_to_process(); + if (!path.has_value()) + return; + + loadMetadataForKeys(path.value()); + } + catch (...) + { + { + std::lock_guard exception_lock(set_exception_mutex); + if (!first_exception) + first_exception = std::current_exception(); + } + stop_loading = true; + return; + } + } + }); + } + catch (...) + { + { + std::lock_guard exception_lock(set_exception_mutex); + if (!first_exception) + first_exception = std::current_exception(); + } + stop_loading = true; + break; + } + } + + for (auto & thread : loading_threads) + if (thread.joinable()) + thread.join(); + + if (first_exception) + std::rethrow_exception(first_exception); + +#ifdef ABORT_ON_LOGICAL_ERROR + assertCacheCorrectness(); +#endif +} + +void FileCache::loadMetadataForKeys(const fs::path & keys_dir) +{ + fs::directory_iterator key_it{keys_dir}; + if (key_it == fs::directory_iterator{}) + { + LOG_DEBUG(log, "Removing empty key prefix directory: {}", keys_dir.string()); + fs::remove(keys_dir); + return; + } + + UInt64 offset = 0, size = 0; + for (; key_it != fs::directory_iterator(); key_it++) + { + const fs::path key_directory = key_it->path(); + + if (!key_it->is_directory()) + { + LOG_DEBUG( + log, + "Unexpected file: {} (not a directory). Expected a directory", + key_directory.string()); continue; } - fs::directory_iterator key_it{key_prefix_directory}; - if (key_it == fs::directory_iterator{}) + if (fs::directory_iterator{key_directory} == fs::directory_iterator{}) { - LOG_DEBUG(log, "Removing empty key prefix directory: {}", key_prefix_directory.string()); - fs::remove(key_prefix_directory); + LOG_DEBUG(log, "Removing empty key directory: {}", key_directory.string()); + fs::remove(key_directory); continue; } - for (/* key_it already initialized to verify emptiness */; key_it != fs::directory_iterator(); key_it++) + const auto key = Key::fromKeyString(key_directory.filename().string()); + auto key_metadata = metadata.getKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY, /* is_initial_load */true); + + const size_t size_limit = main_priority->getSizeLimit(); + const size_t elements_limit = main_priority->getElementsLimit(); + + for (fs::directory_iterator offset_it{key_directory}; offset_it != fs::directory_iterator(); ++offset_it) { - const fs::path key_directory = key_it->path(); + auto offset_with_suffix = offset_it->path().filename().string(); + auto delim_pos = offset_with_suffix.find('_'); + bool parsed; + FileSegmentKind segment_kind = FileSegmentKind::Regular; - if (!key_it->is_directory()) + if (delim_pos == std::string::npos) + parsed = tryParse(offset, offset_with_suffix); + else { - LOG_DEBUG( - log, - "Unexpected file: {} (not a directory). Expected a directory", - key_directory.string()); - continue; - } - - if (fs::directory_iterator{key_directory} == fs::directory_iterator{}) - { - LOG_DEBUG(log, "Removing empty key directory: {}", key_directory.string()); - fs::remove(key_directory); - continue; - } - - const auto key = Key(unhexUInt(key_directory.filename().string().data())); - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY, /* is_initial_load */true); - - for (fs::directory_iterator offset_it{key_directory}; offset_it != fs::directory_iterator(); ++offset_it) - { - auto offset_with_suffix = offset_it->path().filename().string(); - auto delim_pos = offset_with_suffix.find('_'); - bool parsed; - FileSegmentKind segment_kind = FileSegmentKind::Regular; - - if (delim_pos == std::string::npos) - parsed = tryParse(offset, offset_with_suffix); - else + parsed = tryParse(offset, offset_with_suffix.substr(0, delim_pos)); + if (offset_with_suffix.substr(delim_pos+1) == "persistent") { - parsed = tryParse(offset, offset_with_suffix.substr(0, delim_pos)); - if (offset_with_suffix.substr(delim_pos+1) == "persistent") - { - /// For compatibility. Persistent files are no longer supported. - fs::remove(offset_it->path()); - continue; - } - if (offset_with_suffix.substr(delim_pos+1) == "temporary") - { - fs::remove(offset_it->path()); - continue; - } + /// For compatibility. Persistent files are no longer supported. + fs::remove(offset_it->path()); + continue; } - - if (!parsed) - { - LOG_WARNING(log, "Unexpected file: {}", offset_it->path().string()); - continue; /// Or just remove? Some unexpected file. - } - - size = offset_it->file_size(); - if (!size) + if (offset_with_suffix.substr(delim_pos+1) == "temporary") { fs::remove(offset_it->path()); continue; } + } - if ((main_priority->getSizeLimit() == 0 || main_priority->getSize(lock) + size <= main_priority->getSizeLimit()) - && (main_priority->getElementsLimit() == 0 || main_priority->getElementsCount(lock) + 1 <= main_priority->getElementsLimit())) + if (!parsed) + { + LOG_WARNING(log, "Unexpected file: {}", offset_it->path().string()); + continue; /// Or just remove? Some unexpected file. + } + + size = offset_it->file_size(); + if (!size) + { + fs::remove(offset_it->path()); + continue; + } + + bool limits_satisfied; + IFileCachePriority::Iterator cache_it; + { + auto lock = lockCache(); + limits_satisfied = (size_limit == 0 || main_priority->getSize(lock) + size <= size_limit) + && (elements_limit == 0 || main_priority->getElementsCount(lock) + 1 <= elements_limit); + + if (limits_satisfied) + cache_it = main_priority->add(key_metadata, offset, size, lock); + + /// TODO: we can get rid of this lockCache() if we first load everything in parallel + /// without any mutual lock between loading threads, and only after do removeOverflow(). + /// This will be better because overflow here may + /// happen only if cache configuration changed and max_size because less than it was. + } + + if (limits_satisfied) + { + bool inserted = false; + try { - KeyMetadata::iterator file_segment_metadata_it; - try - { - file_segment_metadata_it = addFileSegment( - *locked_key, offset, size, FileSegment::State::DOWNLOADED, CreateFileSegmentSettings(segment_kind), &lock); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - chassert(false); + auto file_segment = std::make_shared(key, offset, size, + FileSegment::State::DOWNLOADED, + CreateFileSegmentSettings(segment_kind), + this, + key_metadata, + cache_it); - fs::remove(offset_it->path()); - continue; - } + inserted = key_metadata->emplace(offset, std::make_shared(std::move(file_segment))).second; + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + chassert(false); + } - const auto & file_segment_metadata = file_segment_metadata_it->second; - chassert(file_segment_metadata->file_segment->assertCorrectness()); - total_size += size; - - queue_entries.emplace_back( - file_segment_metadata->getQueueIterator(), - file_segment_metadata->file_segment); + if (inserted) + { + LOG_TEST(log, "Added file segment {}:{} (size: {}) with path: {}", key, offset, size, offset_it->path().string()); } else { - LOG_WARNING( - log, - "Cache capacity changed (max size: {}, used: {}), " - "cached file `{}` does not fit in cache anymore (size: {})", - main_priority->getSizeLimit(), main_priority->getSize(lock), key_directory.string(), size); - + cache_it->remove(lockCache()); fs::remove(offset_it->path()); + chassert(false); } } + else + { + LOG_WARNING( + log, + "Cache capacity changed (max size: {}), " + "cached file `{}` does not fit in cache anymore (size: {})", + main_priority->getSizeLimit(), offset_it->path().string(), size); + + fs::remove(offset_it->path()); + } } - } - chassert(total_size == main_priority->getSize(lock)); - chassert(total_size <= main_priority->getSizeLimit()); - - /// Shuffle file_segment_metadatas to have random order in LRUQueue - /// as at startup all file_segment_metadatas have the same priority. - pcg64 generator(randomSeed()); - std::shuffle(queue_entries.begin(), queue_entries.end(), generator); - for (auto & [it, file_segment] : queue_entries) - { - /// Cache size changed and, for example, 1st file segment fits into cache - /// and 2nd file segment will fit only if first was evicted, then first will be removed and - /// file_segment_metadata is nullptr here. - if (file_segment.expired()) - continue; - - it->use(lock); + if (key_metadata->empty()) + metadata.removeKey(key, false, false); } } @@ -1021,36 +1063,18 @@ FileCache::~FileCache() void FileCache::deactivateBackgroundOperations() { - if (cleanup_task) - cleanup_task->deactivate(); - metadata.cancelDownload(); + metadata.cancelCleanup(); + for (auto & thread : download_threads) if (thread.joinable()) thread.join(); + + if (cleanup_thread && cleanup_thread->joinable()) + cleanup_thread->join(); } -void FileCache::cleanup() -{ - metadata.doCleanup(); -} - -void FileCache::cleanupThreadFunc() -{ - try - { - cleanup(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - chassert(false); - } - - cleanup_task->scheduleAfter(delayed_cleanup_interval_ms); -} - -FileSegmentsHolderPtr FileCache::getSnapshot() +FileSegments FileCache::getSnapshot() { assertInitialized(); #ifndef NDEBUG @@ -1063,19 +1087,19 @@ FileSegmentsHolderPtr FileCache::getSnapshot() for (const auto & [_, file_segment_metadata] : locked_key) file_segments.push_back(FileSegment::getSnapshot(file_segment_metadata->file_segment)); }); - return std::make_unique(std::move(file_segments), /* complete_on_dtor */false); + return file_segments; } -FileSegmentsHolderPtr FileCache::getSnapshot(const Key & key) +FileSegments FileCache::getSnapshot(const Key & key) { FileSegments file_segments; - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); + auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW_LOGICAL); for (const auto & [_, file_segment_metadata] : *locked_key->getKeyMetadata()) file_segments.push_back(FileSegment::getSnapshot(file_segment_metadata->file_segment)); - return std::make_unique(std::move(file_segments)); + return file_segments; } -FileSegmentsHolderPtr FileCache::dumpQueue() +FileSegments FileCache::dumpQueue() { assertInitialized(); @@ -1086,7 +1110,7 @@ FileSegmentsHolderPtr FileCache::dumpQueue() return PriorityIterationResult::CONTINUE; }, lockCache()); - return std::make_unique(std::move(file_segments)); + return file_segments; } std::vector FileCache::tryGetCachePaths(const Key & key) @@ -1161,4 +1185,15 @@ FileCache::QueryContextHolderPtr FileCache::getQueryContextHolder( return std::make_unique(query_id, this, std::move(context)); } +FileSegments FileCache::sync() +{ + FileSegments file_segments; + metadata.iterate([&](LockedKey & locked_key) + { + auto broken = locked_key.sync(); + file_segments.insert(file_segments.end(), broken.begin(), broken.end()); + }); + return file_segments; +} + } diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 0e3b17baa2f..108f53cf2cc 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -12,7 +12,7 @@ #include -#include +#include #include #include #include @@ -30,6 +30,22 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +/// Track acquired space in cache during reservation +/// to make error messages when no space left more informative. +struct FileCacheReserveStat +{ + struct Stat + { + size_t releasable_size; + size_t releasable_count; + + size_t non_releasable_size; + size_t non_releasable_count; + }; + + std::unordered_map stat_by_kind; +}; + /// Local cache for remote filesystem files, represented as a set of non-overlapping non-empty file segments. /// Different caching algorithms are implemented using IFileCachePriority. class FileCache : private boost::noncopyable @@ -42,7 +58,7 @@ public: using PriorityIterator = IFileCachePriority::Iterator; using PriorityIterationResult = IFileCachePriority::IterationResult; - explicit FileCache(const FileCacheSettings & settings); + FileCache(const std::string & cache_name, const FileCacheSettings & settings); ~FileCache(); @@ -83,13 +99,19 @@ public: FileSegmentsHolderPtr set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings); - /// Remove files by `key`. Removes files which might be used at the moment. + /// Remove file segment by `key` and `offset`. Throws if file segment does not exist. + void removeFileSegment(const Key & key, size_t offset); + + /// Remove files by `key`. Throws if key does not exist. + void removeKey(const Key & key); + + /// Remove files by `key`. void removeKeyIfExists(const Key & key); - /// Removes files by `path`. Removes files which might be used at the moment. + /// Removes files by `path`. void removePathIfExists(const String & path); - /// Remove files by `key`. Will not remove files which are used at the moment. + /// Remove files by `key`. void removeAllReleasable(); std::vector tryGetCachePaths(const Key & key); @@ -100,15 +122,13 @@ public: size_t getMaxFileSegmentSize() const { return max_file_segment_size; } - bool tryReserve(FileSegment & file_segment, size_t size); + bool tryReserve(FileSegment & file_segment, size_t size, FileCacheReserveStat & stat); - FileSegmentsHolderPtr getSnapshot(); + FileSegments getSnapshot(); - FileSegmentsHolderPtr getSnapshot(const Key & key); + FileSegments getSnapshot(const Key & key); - FileSegmentsHolderPtr dumpQueue(); - - void cleanup(); + FileSegments dumpQueue(); void deactivateBackgroundOperations(); @@ -130,14 +150,16 @@ public: CacheGuard::Lock lockCache() const; + FileSegments sync(); + private: using KeyAndOffset = FileCacheKeyAndOffset; const size_t max_file_segment_size; const size_t bypass_cache_threshold = 0; - const size_t delayed_cleanup_interval_ms; const size_t boundary_alignment; const size_t background_download_threads; + const size_t metadata_download_threads; Poco::Logger * log; @@ -180,15 +202,16 @@ private: * A background cleanup task. * Clears removed cache entries from metadata. */ - BackgroundSchedulePool::TaskHolder cleanup_task; - std::vector download_threads; + std::unique_ptr cleanup_thread; void assertInitialized() const; void assertCacheCorrectness(); void loadMetadata(); + void loadMetadataImpl(); + void loadMetadataForKeys(const std::filesystem::path & keys_dir); FileSegments getImpl(const LockedKey & locked_key, const FileSegment::Range & range) const; @@ -213,8 +236,6 @@ private: FileSegment::State state, const CreateFileSegmentSettings & create_settings, const CacheGuard::Lock *); - - void cleanupThreadFunc(); }; } diff --git a/src/Interpreters/Cache/FileCacheFactory.cpp b/src/Interpreters/Cache/FileCacheFactory.cpp index 0f14da8c88f..ef04262f956 100644 --- a/src/Interpreters/Cache/FileCacheFactory.cpp +++ b/src/Interpreters/Cache/FileCacheFactory.cpp @@ -29,7 +29,7 @@ FileCachePtr FileCacheFactory::getOrCreate( auto it = caches_by_name.find(cache_name); if (it == caches_by_name.end()) { - auto cache = std::make_shared(file_cache_settings); + auto cache = std::make_shared(cache_name, file_cache_settings); it = caches_by_name.emplace( cache_name, std::make_unique(cache, file_cache_settings)).first; } diff --git a/src/Interpreters/Cache/FileCacheKey.cpp b/src/Interpreters/Cache/FileCacheKey.cpp index f97cdc058aa..75a8ac2934e 100644 --- a/src/Interpreters/Cache/FileCacheKey.cpp +++ b/src/Interpreters/Cache/FileCacheKey.cpp @@ -7,6 +7,10 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} FileCacheKey::FileCacheKey(const std::string & path) : key(sipHash128(path.data(), path.size())) @@ -28,4 +32,11 @@ FileCacheKey FileCacheKey::random() return FileCacheKey(UUIDHelpers::generateV4().toUnderType()); } +FileCacheKey FileCacheKey::fromKeyString(const std::string & key_str) +{ + if (key_str.size() != 32) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid cache key hex: {}", key_str); + return FileCacheKey(unhexUInt(key_str.data())); +} + } diff --git a/src/Interpreters/Cache/FileCacheKey.h b/src/Interpreters/Cache/FileCacheKey.h index bab8359732c..e788cd5e7cd 100644 --- a/src/Interpreters/Cache/FileCacheKey.h +++ b/src/Interpreters/Cache/FileCacheKey.h @@ -21,6 +21,8 @@ struct FileCacheKey static FileCacheKey random(); bool operator==(const FileCacheKey & other) const { return key == other.key; } + + static FileCacheKey fromKeyString(const std::string & key_str); }; using FileCacheKeyAndOffset = std::pair; diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp index 455e9b44d0b..6f2f8c4b778 100644 --- a/src/Interpreters/Cache/FileCacheSettings.cpp +++ b/src/Interpreters/Cache/FileCacheSettings.cpp @@ -39,10 +39,10 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & enable_filesystem_query_cache_limit = config.getUInt64(config_prefix + ".enable_filesystem_query_cache_limit", false); cache_hits_threshold = config.getUInt64(config_prefix + ".cache_hits_threshold", FILECACHE_DEFAULT_HITS_THRESHOLD); - enable_bypass_cache_with_threashold = config.getUInt64(config_prefix + ".enable_bypass_cache_with_threashold", false); + enable_bypass_cache_with_threshold = config.getUInt64(config_prefix + ".enable_bypass_cache_with_threshold", false); - if (config.has(config_prefix + ".bypass_cache_threashold")) - bypass_cache_threashold = parseWithSizeSuffix(config.getString(config_prefix + ".bypass_cache_threashold")); + if (config.has(config_prefix + ".bypass_cache_threshold")) + bypass_cache_threshold = parseWithSizeSuffix(config.getString(config_prefix + ".bypass_cache_threshold")); if (config.has(config_prefix + ".boundary_alignment")) boundary_alignment = parseWithSizeSuffix(config.getString(config_prefix + ".boundary_alignment")); @@ -50,7 +50,8 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & if (config.has(config_prefix + ".background_download_threads")) background_download_threads = config.getUInt(config_prefix + ".background_download_threads"); - delayed_cleanup_interval_ms = config.getUInt64(config_prefix + ".delayed_cleanup_interval_ms", FILECACHE_DELAYED_CLEANUP_INTERVAL_MS); + if (config.has(config_prefix + ".load_metadata_threads")) + load_metadata_threads = config.getUInt(config_prefix + ".load_metadata_threads"); } } diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h index e56d6fcc54d..9888b814a0b 100644 --- a/src/Interpreters/Cache/FileCacheSettings.h +++ b/src/Interpreters/Cache/FileCacheSettings.h @@ -22,13 +22,14 @@ struct FileCacheSettings size_t cache_hits_threshold = FILECACHE_DEFAULT_HITS_THRESHOLD; bool enable_filesystem_query_cache_limit = false; - bool enable_bypass_cache_with_threashold = false; - size_t bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD; - size_t delayed_cleanup_interval_ms = FILECACHE_DELAYED_CLEANUP_INTERVAL_MS; + bool enable_bypass_cache_with_threshold = false; + size_t bypass_cache_threshold = FILECACHE_BYPASS_THRESHOLD; size_t boundary_alignment = FILECACHE_DEFAULT_FILE_SEGMENT_ALIGNMENT; size_t background_download_threads = FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS; + size_t load_metadata_threads = FILECACHE_DEFAULT_LOAD_METADATA_THREADS; + void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); }; diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h index 4d4a1c1429c..3e7150ad253 100644 --- a/src/Interpreters/Cache/FileCache_fwd.h +++ b/src/Interpreters/Cache/FileCache_fwd.h @@ -7,10 +7,10 @@ namespace DB static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 32 * 1024 * 1024; /// 32Mi static constexpr int FILECACHE_DEFAULT_FILE_SEGMENT_ALIGNMENT = 4 * 1024 * 1024; /// 4Mi static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS = 2; +static constexpr int FILECACHE_DEFAULT_LOAD_METADATA_THREADS = 1; static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 10000000; static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0; static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024; -static constexpr size_t FILECACHE_DELAYED_CLEANUP_INTERVAL_MS = 1000 * 60; /// 1 min class FileCache; using FileCachePtr = std::shared_ptr; diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index d191b2a803c..bb3216cb20e 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -151,23 +151,13 @@ void FileSegment::setQueueIterator(Priority::Iterator iterator) queue_iterator = iterator; } -size_t FileSegment::getFirstNonDownloadedOffset(bool sync) const +size_t FileSegment::getCurrentWriteOffset() const { - return range().left + getDownloadedSize(sync); + return range().left + downloaded_size; } -size_t FileSegment::getCurrentWriteOffset(bool sync) const +size_t FileSegment::getDownloadedSize() const { - return getFirstNonDownloadedOffset(sync); -} - -size_t FileSegment::getDownloadedSize(bool sync) const -{ - if (sync) - { - std::lock_guard lock(download_mutex); - return downloaded_size; - } return downloaded_size; } @@ -186,9 +176,7 @@ bool FileSegment::isDownloaded() const String FileSegment::getCallerId() { - if (!CurrentThread::isInitialized() - || !CurrentThread::get().getQueryContext() - || CurrentThread::getQueryId().empty()) + if (!CurrentThread::isInitialized() || CurrentThread::getQueryId().empty()) return "None:" + toString(getThreadId()); return std::string(CurrentThread::getQueryId()) + ":" + toString(getThreadId()); @@ -233,7 +221,7 @@ void FileSegment::resetDownloadingStateUnlocked(const FileSegmentGuard::Lock & l assert(isDownloaderUnlocked(lock)); assert(download_state == State::DOWNLOADING); - size_t current_downloaded_size = getDownloadedSize(true); + size_t current_downloaded_size = getDownloadedSize(); /// range().size() can equal 0 in case of write-though cache. if (!is_unbound && current_downloaded_size != 0 && current_downloaded_size == range().size()) setDownloadedUnlocked(lock); @@ -258,6 +246,9 @@ void FileSegment::resetDownloader() void FileSegment::resetDownloaderUnlocked(const FileSegmentGuard::Lock &) { + if (downloader_id.empty()) + return; + LOG_TEST(log, "Resetting downloader from {}", downloader_id); downloader_id.clear(); } @@ -266,7 +257,6 @@ void FileSegment::assertIsDownloaderUnlocked(const std::string & operation, cons { auto caller = getCallerId(); auto current_downloader = getDownloaderUnlocked(lock); - LOG_TEST(log, "Downloader id: {}, caller id: {}, operation: {}", current_downloader, caller, operation); if (caller != current_downloader) { @@ -345,14 +335,14 @@ void FileSegment::write(const char * from, size_t size, size_t offset) ErrorCodes::LOGICAL_ERROR, "Expected DOWNLOADING state, got {}", stateToString(download_state)); - size_t first_non_downloaded_offset = getFirstNonDownloadedOffset(false); + size_t first_non_downloaded_offset = getCurrentWriteOffset(); if (offset != first_non_downloaded_offset) throw Exception( ErrorCodes::LOGICAL_ERROR, "Attempt to write {} bytes to offset: {}, but current write offset is {}", size, offset, first_non_downloaded_offset); - size_t current_downloaded_size = getDownloadedSize(false); + size_t current_downloaded_size = getDownloadedSize(); chassert(reserved_size >= current_downloaded_size); size_t free_reserved_size = reserved_size - current_downloaded_size; @@ -379,13 +369,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset) try { cache_writer->write(from, size); - - std::lock_guard lock(download_mutex); - cache_writer->next(); downloaded_size += size; - chassert(std::filesystem::file_size(file_segment_path) == downloaded_size); } catch (ErrnoException & e) @@ -416,7 +402,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset) throw; } - chassert(getFirstNonDownloadedOffset(false) == offset + size); + chassert(getCurrentWriteOffset() == offset + size); } FileSegment::State FileSegment::wait(size_t offset) @@ -425,7 +411,7 @@ FileSegment::State FileSegment::wait(size_t offset) auto lock = lockFileSegment(); - if (downloader_id.empty() || offset < getCurrentWriteOffset(true)) + if (downloader_id.empty() || offset < getCurrentWriteOffset()) return download_state; if (download_state == State::EMPTY) @@ -441,7 +427,7 @@ FileSegment::State FileSegment::wait(size_t offset) [[maybe_unused]] const auto ok = cv.wait_for(lock, std::chrono::seconds(60), [&, this]() { - return download_state != State::DOWNLOADING || offset < getCurrentWriteOffset(false); + return download_state != State::DOWNLOADING || offset < getCurrentWriteOffset(); }); /// chassert(ok); } @@ -476,7 +462,7 @@ LockedKeyPtr FileSegment::lockKeyMetadata(bool assert_exists) const return metadata->tryLock(); } -bool FileSegment::reserve(size_t size_to_reserve) +bool FileSegment::reserve(size_t size_to_reserve, FileCacheReserveStat * reserve_stat) { if (!size_to_reserve) throw Exception(ErrorCodes::LOGICAL_ERROR, "Zero space reservation is not allowed"); @@ -490,7 +476,7 @@ bool FileSegment::reserve(size_t size_to_reserve) assertNotDetachedUnlocked(lock); assertIsDownloaderUnlocked("reserve", lock); - expected_downloaded_size = getDownloadedSize(false); + expected_downloaded_size = getDownloadedSize(); is_file_segment_size_exceeded = expected_downloaded_size + size_to_reserve > range().size(); if (is_file_segment_size_exceeded && !is_unbound) @@ -512,9 +498,8 @@ bool FileSegment::reserve(size_t size_to_reserve) size_t already_reserved_size = reserved_size - expected_downloaded_size; - bool reserved = already_reserved_size >= size_to_reserve; - if (reserved) - return reserved; + if (already_reserved_size >= size_to_reserve) + return true; size_to_reserve = size_to_reserve - already_reserved_size; @@ -523,7 +508,12 @@ bool FileSegment::reserve(size_t size_to_reserve) if (is_unbound && is_file_segment_size_exceeded) segment_range.right = range().left + expected_downloaded_size + size_to_reserve; - reserved = cache->tryReserve(*this, size_to_reserve); + /// if reserve_stat is not passed then use dummy stat and discard the result. + FileCacheReserveStat dummy_stat; + if (!reserve_stat) + reserve_stat = &dummy_stat; + + bool reserved = cache->tryReserve(*this, size_to_reserve, *reserve_stat); if (!reserved) setDownloadFailedUnlocked(lockFileSegment()); @@ -610,7 +600,7 @@ void FileSegment::complete() const bool is_downloader = isDownloaderUnlocked(segment_lock); const bool is_last_holder = locked_key->isLastOwnerOfFileSegment(offset()); - const size_t current_downloaded_size = getDownloadedSize(true); + const size_t current_downloaded_size = getDownloadedSize(); SCOPE_EXIT({ if (is_downloader) @@ -731,11 +721,10 @@ String FileSegment::getInfoForLogUnlocked(const FileSegmentGuard::Lock &) const info << "File segment: " << range().toString() << ", "; info << "key: " << key().toString() << ", "; info << "state: " << download_state.load() << ", "; - info << "downloaded size: " << getDownloadedSize(false) << ", "; + info << "downloaded size: " << getDownloadedSize() << ", "; info << "reserved size: " << reserved_size.load() << ", "; info << "downloader id: " << (downloader_id.empty() ? "None" : downloader_id) << ", "; - info << "current write offset: " << getCurrentWriteOffset(false) << ", "; - info << "first non-downloaded offset: " << getFirstNonDownloadedOffset(false) << ", "; + info << "current write offset: " << getCurrentWriteOffset() << ", "; info << "caller id: " << getCallerId() << ", "; info << "kind: " << toString(segment_kind) << ", "; info << "unbound: " << is_unbound; @@ -787,6 +776,8 @@ bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock &) cons { chassert(downloader_id.empty()); chassert(downloaded_size == reserved_size); + chassert(downloaded_size == range().size()); + chassert(downloaded_size > 0); chassert(std::filesystem::file_size(getPathInLocalCache()) > 0); chassert(queue_iterator); check_iterator(queue_iterator); @@ -840,7 +831,7 @@ FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment) CreateFileSegmentSettings(file_segment->getKind(), file_segment->is_unbound)); snapshot->hits_count = file_segment->getHitsCount(); - snapshot->downloaded_size = file_segment->getDownloadedSize(false); + snapshot->downloaded_size = file_segment->getDownloadedSize(); snapshot->download_state = file_segment->download_state.load(); snapshot->ref_count = file_segment.use_count(); @@ -878,8 +869,15 @@ void FileSegment::setDetachedState(const FileSegmentGuard::Lock & lock) key_metadata.reset(); cache = nullptr; queue_iterator = nullptr; - cache_writer.reset(); - remote_file_reader.reset(); + try + { + cache_writer.reset(); + remote_file_reader.reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } } void FileSegment::detach(const FileSegmentGuard::Lock & lock, const LockedKey &) diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 44d95816915..8948b67fe2a 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -26,6 +26,7 @@ namespace DB { class ReadBufferFromFileBase; +struct FileCacheReserveStat; /* * FileSegmentKind is used to specify the eviction policy for file segments. @@ -177,11 +178,9 @@ public: size_t getRefCount() const { return ref_count; } - size_t getCurrentWriteOffset(bool sync) const; + size_t getCurrentWriteOffset() const; - size_t getFirstNonDownloadedOffset(bool sync) const; - - size_t getDownloadedSize(bool sync) const; + size_t getDownloadedSize() const; size_t getReservedSize() const; @@ -243,12 +242,7 @@ public: /// Try to reserve exactly `size` bytes (in addition to the getDownloadedSize() bytes already downloaded). /// Returns true if reservation was successful, false otherwise. - bool reserve(size_t size_to_reserve); - - /// Try to reserve at max `size_to_reserve` bytes. - /// Returns actual size reserved. It can be less than size_to_reserve in non strict mode. - /// In strict mode throws an error on attempt to reserve space too much space. - size_t tryReserve(size_t size_to_reserve, bool strict = false); + bool reserve(size_t size_to_reserve, FileCacheReserveStat * reserve_stat = nullptr); /// Write data into reserved space. void write(const char * from, size_t size, size_t offset); @@ -306,7 +300,6 @@ private: /// downloaded_size should always be less or equal to reserved_size std::atomic downloaded_size = 0; std::atomic reserved_size = 0; - mutable std::mutex download_mutex; mutable FileSegmentGuard segment_guard; std::weak_ptr key_metadata; diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h index 34c49653ab8..7de380c163b 100644 --- a/src/Interpreters/Cache/IFileCachePriority.h +++ b/src/Interpreters/Cache/IFileCachePriority.h @@ -88,6 +88,8 @@ public: /// From lowest to highest priority. virtual void iterate(IterateFunc && func, const CacheGuard::Lock &) = 0; + virtual void shuffle(const CacheGuard::Lock &) = 0; + private: const size_t max_size = 0; const size_t max_elements = 0; diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 18862e154da..5ecea95b1db 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace CurrentMetrics { @@ -213,4 +214,16 @@ void LRUFileCachePriority::LRUFileCacheIterator::checkUsable() const throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to use invalid iterator"); } +void LRUFileCachePriority::shuffle(const CacheGuard::Lock &) +{ + std::vector its; + its.reserve(queue.size()); + for (auto it = queue.begin(); it != queue.end(); ++it) + its.push_back(it); + pcg64 generator(randomSeed()); + std::shuffle(its.begin(), its.end(), generator); + for (auto & it : its) + queue.splice(queue.end(), queue, it); +} + } diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index e041e59a91a..89f86961811 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -4,6 +4,7 @@ #include #include #include +#include "Interpreters/Cache/Guards.h" namespace CurrentMetrics { @@ -40,6 +41,8 @@ public: void iterate(IterateFunc && func, const CacheGuard::Lock &) override; + void shuffle(const CacheGuard::Lock &) override; + private: void updateElementsCount(int64_t num); void updateSize(int64_t size); diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 6a68d0f21f7..7a9321e4215 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -1,7 +1,6 @@ #include #include #include -#include "Common/Exception.h" #include #include #include @@ -11,6 +10,7 @@ namespace fs = std::filesystem; namespace CurrentMetrics { extern const Metric FilesystemCacheDownloadQueueElements; + extern const Metric FilesystemCacheDelayedCleanupElements; } namespace ProfileEvents @@ -25,6 +25,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } FileSegmentMetadata::FileSegmentMetadata(FileSegmentPtr && file_segment_) @@ -58,14 +59,16 @@ size_t FileSegmentMetadata::size() const KeyMetadata::KeyMetadata( const Key & key_, const std::string & key_path_, - CleanupQueue & cleanup_queue_, - DownloadQueue & download_queue_, + CleanupQueuePtr cleanup_queue_, + DownloadQueuePtr download_queue_, Poco::Logger * log_, + std::shared_mutex & key_prefix_directory_mutex_, bool created_base_directory_) : key(key_) , key_path(key_path_) , cleanup_queue(cleanup_queue_) , download_queue(download_queue_) + , key_prefix_directory_mutex(key_prefix_directory_mutex_) , created_base_directory(created_base_directory_) , log(log_) { @@ -86,63 +89,56 @@ LockedKeyPtr KeyMetadata::lock() LockedKeyPtr KeyMetadata::tryLock() { - ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheLockKeyMicroseconds); - - auto locked = std::make_unique(shared_from_this()); + auto locked = lockNoStateCheck(); if (key_state == KeyMetadata::KeyState::ACTIVE) return locked; return nullptr; } +LockedKeyPtr KeyMetadata::lockNoStateCheck() +{ + ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheLockKeyMicroseconds); + return std::make_unique(shared_from_this()); +} + bool KeyMetadata::createBaseDirectory() { if (!created_base_directory.exchange(true)) { try { + std::shared_lock lock(key_prefix_directory_mutex); fs::create_directories(key_path); } - catch (...) + catch (const fs::filesystem_error & e) { - /// Avoid errors like - /// std::__1::__fs::filesystem::filesystem_error: filesystem error: in create_directories: No space left on device - /// and mark file segment with SKIP_CACHE state - tryLogCurrentException(__PRETTY_FUNCTION__); created_base_directory = false; - return false; + + if (e.code() == std::errc::no_space_on_device) + { + LOG_TRACE(log, "Failed to create base directory for key {}, " + "because no space left on device", key); + + return false; + } + throw; } } return true; } -std::string KeyMetadata::getFileSegmentPath(const FileSegment & file_segment) +std::string KeyMetadata::getFileSegmentPath(const FileSegment & file_segment) const { return fs::path(key_path) / CacheMetadata::getFileNameForFileSegment(file_segment.offset(), file_segment.getKind()); } -class CleanupQueue -{ - friend struct CacheMetadata; -public: - void add(const FileCacheKey & key); - void remove(const FileCacheKey & key); - size_t getSize() const; - -private: - bool tryPop(FileCacheKey & key); - - std::unordered_set keys; - mutable std::mutex mutex; -}; - - CacheMetadata::CacheMetadata(const std::string & path_) : path(path_) - , cleanup_queue(std::make_unique()) - , download_queue(std::make_unique()) + , cleanup_queue(std::make_shared()) + , download_queue(std::make_shared()) , log(&Poco::Logger::get("CacheMetadata")) { } @@ -183,38 +179,20 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata( KeyNotFoundPolicy key_not_found_policy, bool is_initial_load) { - KeyMetadataPtr key_metadata; - { - auto lock = lockMetadata(); - - auto it = find(key); - if (it == end()) - { - if (key_not_found_policy == KeyNotFoundPolicy::THROW) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key); - else if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL) - return nullptr; - - it = emplace( - key, std::make_shared( - key, getPathForKey(key), *cleanup_queue, *download_queue, log, is_initial_load)).first; - } - - key_metadata = it->second; - } + auto key_metadata = getKeyMetadata(key, key_not_found_policy, is_initial_load); + if (!key_metadata) + return nullptr; { - LockedKeyPtr locked_metadata; - { - ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheLockKeyMicroseconds); - locked_metadata = std::make_unique(key_metadata); - } - + auto locked_metadata = key_metadata->lockNoStateCheck(); const auto key_state = locked_metadata->getKeyState(); + if (key_state == KeyMetadata::KeyState::ACTIVE) return locked_metadata; if (key_not_found_policy == KeyNotFoundPolicy::THROW) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}` in cache", key); + else if (key_not_found_policy == KeyNotFoundPolicy::THROW_LOGICAL) throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key); if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL) @@ -237,17 +215,37 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata( return lockKeyMetadata(key, key_not_found_policy); } -void CacheMetadata::iterate(IterateCacheMetadataFunc && func) +KeyMetadataPtr CacheMetadata::getKeyMetadata( + const Key & key, + KeyNotFoundPolicy key_not_found_policy, + bool is_initial_load) { auto lock = lockMetadata(); - for (const auto & [key, key_metadata] : *this) - { - LockedKeyPtr locked_key; - { - ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheLockKeyMicroseconds); - locked_key = std::make_unique(key_metadata); - } + auto it = find(key); + if (it == end()) + { + if (key_not_found_policy == KeyNotFoundPolicy::THROW) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}` in cache", key); + else if (key_not_found_policy == KeyNotFoundPolicy::THROW_LOGICAL) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key); + else if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL) + return nullptr; + + it = emplace( + key, std::make_shared( + key, getPathForKey(key), cleanup_queue, download_queue, log, key_prefix_directory_mutex, is_initial_load)).first; + } + + return it->second; +} + +void CacheMetadata::iterate(IterateFunc && func) +{ + auto lock = lockMetadata(); + for (auto & [key, key_metadata] : *this) + { + auto locked_key = key_metadata->lockNoStateCheck(); const auto key_state = locked_key->getKeyState(); if (key_state == KeyMetadata::KeyState::ACTIVE) @@ -255,8 +253,7 @@ void CacheMetadata::iterate(IterateCacheMetadataFunc && func) func(*locked_key); continue; } - - if (key_state == KeyMetadata::KeyState::REMOVING) + else if (key_state == KeyMetadata::KeyState::REMOVING) continue; throw Exception( @@ -264,69 +261,185 @@ void CacheMetadata::iterate(IterateCacheMetadataFunc && func) } } -void CacheMetadata::doCleanup() +void CacheMetadata::removeAllKeys(bool if_releasable) { auto lock = lockMetadata(); - - FileCacheKey cleanup_key; - while (cleanup_queue->tryPop(cleanup_key)) + for (auto it = begin(); it != end();) { - auto it = find(cleanup_key); - if (it == end()) - continue; - - LockedKeyPtr locked_metadata; + auto locked_key = it->second->lockNoStateCheck(); + if (locked_key->getKeyState() == KeyMetadata::KeyState::ACTIVE) { - ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheLockKeyMicroseconds); - locked_metadata = std::make_unique(it->second); - } - - const auto key_state = locked_metadata->getKeyState(); - if (key_state == KeyMetadata::KeyState::ACTIVE) - { - /// Key was added back to cache after we submitted it to removal queue. - continue; - } - - chassert(it->second->empty()); - locked_metadata->markAsRemoved(); - erase(it); - LOG_DEBUG(log, "Key {} is removed from metadata", cleanup_key); - - const fs::path key_directory = getPathForKey(cleanup_key); - const fs::path key_prefix_directory = key_directory.parent_path(); - - try - { - if (fs::exists(key_directory)) - fs::remove_all(key_directory); - } - catch (...) - { - LOG_ERROR(log, "Error while removing key {}: {}", cleanup_key, getCurrentExceptionMessage(true)); - chassert(false); - continue; - } - - try - { - if (fs::exists(key_prefix_directory) && fs::is_empty(key_prefix_directory)) - fs::remove(key_prefix_directory); - } - catch (const fs::filesystem_error & e) - { - /// Key prefix directory can become non-empty just now, it is expected. - if (e.code() == std::errc::directory_not_empty) + bool removed_all = locked_key->removeAllFileSegments(if_releasable); + if (removed_all) + { + it = removeEmptyKey(it, *locked_key, lock); continue; - LOG_ERROR(log, "Error while removing key {}: {}", cleanup_key, getCurrentExceptionMessage(true)); - chassert(false); + } } - catch (...) + ++it; + } +} + +void CacheMetadata::removeKey(const Key & key, bool if_exists, bool if_releasable) +{ + auto metadata_lock = lockMetadata(); + + auto it = find(key); + if (it == end()) + { + if (if_exists) + return; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key: {}", key); + } + + auto locked_key = it->second->lockNoStateCheck(); + auto state = locked_key->getKeyState(); + if (state != KeyMetadata::KeyState::ACTIVE) + { + if (if_exists) + return; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key: {} (state: {})", key, magic_enum::enum_name(state)); + } + + bool removed_all = locked_key->removeAllFileSegments(if_releasable); + if (removed_all) + removeEmptyKey(it, *locked_key, metadata_lock); +} + +CacheMetadata::iterator CacheMetadata::removeEmptyKey(iterator it, LockedKey & locked_key, const CacheMetadataGuard::Lock &) +{ + const auto & key = locked_key.getKey(); + + if (!it->second->empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot remove non-empty key: {}", key); + + locked_key.markAsRemoved(); + auto next_it = erase(it); + + LOG_DEBUG(log, "Key {} is removed from metadata", key); + + const fs::path key_directory = getPathForKey(key); + const fs::path key_prefix_directory = key_directory.parent_path(); + + try + { + if (fs::exists(key_directory)) + fs::remove_all(key_directory); + } + catch (...) + { + LOG_ERROR(log, "Error while removing key {}: {}", key, getCurrentExceptionMessage(true)); + chassert(false); + return next_it; + } + + try + { + std::unique_lock mutex(key_prefix_directory_mutex); + if (fs::exists(key_prefix_directory) && fs::is_empty(key_prefix_directory)) + fs::remove(key_prefix_directory); + } + catch (...) + { + LOG_ERROR(log, "Error while removing key {}: {}", key, getCurrentExceptionMessage(true)); + chassert(false); + } + return next_it; +} + +class CleanupQueue +{ + friend struct CacheMetadata; +public: + void add(const FileCacheKey & key) + { + bool inserted; { - LOG_ERROR(log, "Error while removing key {}: {}", cleanup_key, getCurrentExceptionMessage(true)); - chassert(false); + std::lock_guard lock(mutex); + if (cancelled) + return; + inserted = keys.insert(key).second; + } + /// There is an invariant that key cannot be submitted for removal if it is already in removal queue. + /// Because + /// 1) when submit key to removal it acquires state REMOVING and we submit key for removal only if it has ACTIVE state. + /// 2) if a key is added to cache and it was found in removal queue - it will be removed from the queue and get state ACTIVE. + /// and both these actions are synchronized by the same KeyGuard. + chassert(inserted); + if (inserted) + { + CurrentMetrics::add(CurrentMetrics::FilesystemCacheDelayedCleanupElements); + cv.notify_one(); } } + + void cancel() + { + { + std::lock_guard lock(mutex); + cancelled = true; + } + cv.notify_all(); + } + +private: + std::unordered_set keys; + mutable std::mutex mutex; + std::condition_variable cv; + bool cancelled = false; +}; + +void CacheMetadata::cleanupThreadFunc() +{ + while (true) + { + Key key; + { + std::unique_lock lock(cleanup_queue->mutex); + if (cleanup_queue->cancelled) + return; + + auto & keys = cleanup_queue->keys; + if (keys.empty()) + { + cleanup_queue->cv.wait(lock, [&](){ return cleanup_queue->cancelled || !keys.empty(); }); + if (cleanup_queue->cancelled) + return; + } + + auto it = keys.begin(); + key = *it; + keys.erase(it); + } + + CurrentMetrics::sub(CurrentMetrics::FilesystemCacheDelayedCleanupElements); + + try + { + auto lock = lockMetadata(); + + auto it = find(key); + if (it == end()) + continue; + + auto locked_key = it->second->lockNoStateCheck(); + if (locked_key->getKeyState() == KeyMetadata::KeyState::REMOVING) + { + removeEmptyKey(it, *locked_key, lock); + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } +} + +void CacheMetadata::cancelCleanup() +{ + cleanup_queue->cancel(); } class DownloadQueue @@ -337,6 +450,8 @@ public: { { std::lock_guard lock(mutex); + if (cancelled) + return; queue.push(DownloadInfo{file_segment->key(), file_segment->offset(), file_segment}); } @@ -360,6 +475,9 @@ private: struct DownloadInfo { + DownloadInfo(const CacheMetadata::Key & key_, const size_t & offset_, const std::weak_ptr & file_segment_) + : key(key_), offset(offset_), file_segment(file_segment_) {} + CacheMetadata::Key key; size_t offset; /// We keep weak pointer to file segment @@ -382,14 +500,14 @@ void CacheMetadata::downloadThreadFunc() { std::unique_lock lock(download_queue->mutex); - if (download_queue->cancelled) return; if (download_queue->queue.empty()) { - download_queue->cv.wait(lock); - continue; + download_queue->cv.wait(lock, [&](){ return download_queue->cancelled || !download_queue->queue.empty(); }); + if (download_queue->cancelled) + return; } auto entry = download_queue->queue.front(); @@ -452,12 +570,12 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optionalset(memory->data(), memory->size()); } - size_t offset = file_segment.getCurrentWriteOffset(false); + size_t offset = file_segment.getCurrentWriteOffset(); if (offset != static_cast(reader->getPosition())) reader->seek(offset, SEEK_SET); @@ -492,7 +610,7 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optionalempty() || getKeyState() != KeyMetadata::KeyState::ACTIVE) return; + /// If state if ACTIVE and key turns out empty - we submit it for delayed removal. + /// Because we do not want to always lock all cache metadata lock, when we remove files segments. + /// but sometimes we do - we remove the empty key without delay - then key state + /// will be REMOVED here and we will return in the check above. + /// See comment near cleanupThreadFunc() for more details. + key_metadata->key_state = KeyMetadata::KeyState::REMOVING; LOG_DEBUG(key_metadata->log, "Submitting key {} for removal", getKey()); - key_metadata->cleanup_queue.add(getKey()); + key_metadata->cleanup_queue->add(getKey()); } void LockedKey::removeFromCleanupQueue() @@ -558,13 +682,15 @@ bool LockedKey::isLastOwnerOfFileSegment(size_t offset) const return file_segment_metadata->file_segment.use_count() == 2; } -void LockedKey::removeAllReleasable() +bool LockedKey::removeAllFileSegments(bool if_releasable) { + bool removed_all = true; for (auto it = key_metadata->begin(); it != key_metadata->end();) { - if (!it->second->releasable()) + if (if_releasable && !it->second->releasable()) { ++it; + removed_all = false; continue; } else if (it->second->evicting()) @@ -575,50 +701,82 @@ void LockedKey::removeAllReleasable() /// so if we remove file segment now, we break the freeable_count /// calculation in tryReserve. ++it; + removed_all = false; continue; } auto file_segment = it->second->file_segment; it = removeFileSegment(file_segment->offset(), file_segment->lock()); } + return removed_all; } -KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegmentGuard::Lock & segment_lock) +KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, bool can_be_broken) { auto it = key_metadata->find(offset); if (it == key_metadata->end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no offset {}", offset); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no offset {}", offset); auto file_segment = it->second->file_segment; + return removeFileSegmentImpl(it, file_segment->lock(), can_be_broken); +} + +KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegmentGuard::Lock & segment_lock, bool can_be_broken) +{ + auto it = key_metadata->find(offset); + if (it == key_metadata->end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no offset {} in key {}", offset, getKey()); + + return removeFileSegmentImpl(it, segment_lock, can_be_broken); +} + +KeyMetadata::iterator LockedKey::removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock & segment_lock, bool can_be_broken) +{ + auto file_segment = it->second->file_segment; LOG_DEBUG( key_metadata->log, "Remove from cache. Key: {}, offset: {}, size: {}", - getKey(), offset, file_segment->reserved_size); + getKey(), file_segment->offset(), file_segment->reserved_size); - chassert(file_segment->assertCorrectnessUnlocked(segment_lock)); + chassert(can_be_broken || file_segment->assertCorrectnessUnlocked(segment_lock)); if (file_segment->queue_iterator) file_segment->queue_iterator->invalidate(); file_segment->detach(segment_lock, *this); - const auto path = key_metadata->getFileSegmentPath(*file_segment); - bool exists = fs::exists(path); - if (exists) + try { - fs::remove(path); + const auto path = key_metadata->getFileSegmentPath(*file_segment); + bool exists = fs::exists(path); + if (exists) + { + fs::remove(path); - /// Clear OpenedFileCache to avoid reading from incorrect file descriptor. - int flags = file_segment->getFlagsForLocalRead(); - /// Files are created with flags from file_segment->getFlagsForLocalRead() - /// plus optionally O_DIRECT is added, depends on query setting, so remove both. - OpenedFileCache::instance().remove(path, flags); - OpenedFileCache::instance().remove(path, flags | O_DIRECT); + /// Clear OpenedFileCache to avoid reading from incorrect file descriptor. + int flags = file_segment->getFlagsForLocalRead(); + /// Files are created with flags from file_segment->getFlagsForLocalRead() + /// plus optionally O_DIRECT is added, depends on query setting, so remove both. + OpenedFileCache::instance().remove(path, flags); + OpenedFileCache::instance().remove(path, flags | O_DIRECT); - LOG_TEST(key_metadata->log, "Removed file segment at path: {}", path); + LOG_TEST(key_metadata->log, "Removed file segment at path: {}", path); + } + else if (file_segment->downloaded_size && !can_be_broken) + { +#ifdef ABORT_ON_LOGICAL_ERROR + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path); +#else + LOG_WARNING(key_metadata->log, "Expected path {} to exist, while removing {}:{}", + path, getKey(), file_segment->offset()); +#endif + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + chassert(false); } - else if (file_segment->downloaded_size) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path); return key_metadata->erase(it); } @@ -636,7 +794,7 @@ void LockedKey::shrinkFileSegmentToDownloadedSize( const auto & file_segment = metadata->file_segment; chassert(file_segment->assertCorrectnessUnlocked(segment_lock)); - const size_t downloaded_size = file_segment->getDownloadedSize(false); + const size_t downloaded_size = file_segment->getDownloadedSize(); if (downloaded_size == file_segment->range().size()) { throw Exception( @@ -664,7 +822,7 @@ void LockedKey::addToDownloadQueue(size_t offset, const FileSegmentGuard::Lock & auto it = key_metadata->find(offset); if (it == key_metadata->end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is not offset {}", offset); - key_metadata->download_queue.add(it->second->file_segment); + key_metadata->download_queue->add(it->second->file_segment); } std::optional LockedKey::hasIntersectingRange(const FileSegment::Range & range) const @@ -734,35 +892,62 @@ std::string LockedKey::toString() const return result; } -void CleanupQueue::add(const FileCacheKey & key) +FileSegments LockedKey::sync() { - std::lock_guard lock(mutex); - keys.insert(key); -} + FileSegments broken; + for (auto it = key_metadata->begin(); it != key_metadata->end();) + { + if (it->second->evicting() || !it->second->releasable()) + { + ++it; + continue; + } -void CleanupQueue::remove(const FileCacheKey & key) -{ - std::lock_guard lock(mutex); - bool erased = keys.erase(key); - if (!erased) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key {} in removal queue", key); -} + auto file_segment = it->second->file_segment; + if (file_segment->isDetached()) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "File segment has unexpected state: DETACHED ({})", file_segment->getInfoForLog()); + } -bool CleanupQueue::tryPop(FileCacheKey & key) -{ - std::lock_guard lock(mutex); - if (keys.empty()) - return false; - auto it = keys.begin(); - key = *it; - keys.erase(it); - return true; -} + if (file_segment->getDownloadedSize() == 0) + { + ++it; + continue; + } -size_t CleanupQueue::getSize() const -{ - std::lock_guard lock(mutex); - return keys.size(); + const auto & path = key_metadata->getFileSegmentPath(*file_segment); + if (!fs::exists(path)) + { + LOG_WARNING( + key_metadata->log, + "File segment has DOWNLOADED state, but file does not exist ({})", + file_segment->getInfoForLog()); + + broken.push_back(FileSegment::getSnapshot(file_segment)); + it = removeFileSegment(file_segment->offset(), file_segment->lock(), /* can_be_broken */true); + continue; + } + + const size_t actual_size = fs::file_size(path); + const size_t expected_size = file_segment->getDownloadedSize(); + + if (actual_size == expected_size) + { + ++it; + continue; + } + + LOG_WARNING( + key_metadata->log, + "File segment has unexpected size. Having {}, expected {} ({})", + actual_size, expected_size, file_segment->getInfoForLog()); + + broken.push_back(FileSegment::getSnapshot(file_segment)); + it = removeFileSegment(file_segment->offset(), file_segment->lock(), /* can_be_broken */false); + } + return broken; } } diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index 503c19f4150..8645dd80ba8 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { @@ -47,9 +48,10 @@ struct KeyMetadata : public std::map, KeyMetadata( const Key & key_, const std::string & key_path_, - CleanupQueue & cleanup_queue_, - DownloadQueue & download_queue_, + CleanupQueuePtr cleanup_queue_, + DownloadQueuePtr download_queue_, Poco::Logger * log_, + std::shared_mutex & key_prefix_directory_mutex_, bool created_base_directory_ = false); enum class KeyState @@ -67,15 +69,18 @@ struct KeyMetadata : public std::map, /// Return nullptr if key has non-ACTIVE state. LockedKeyPtr tryLock(); + LockedKeyPtr lockNoStateCheck(); + bool createBaseDirectory(); - std::string getFileSegmentPath(const FileSegment & file_segment); + std::string getFileSegmentPath(const FileSegment & file_segment) const; private: KeyState key_state = KeyState::ACTIVE; KeyGuard guard; - CleanupQueue & cleanup_queue; - DownloadQueue & download_queue; + const CleanupQueuePtr cleanup_queue; + const DownloadQueuePtr download_queue; + std::shared_mutex & key_prefix_directory_mutex; std::atomic created_base_directory = false; Poco::Logger * log; }; @@ -87,7 +92,7 @@ struct CacheMetadata : public std::unordered_map, { public: using Key = FileCacheKey; - using IterateCacheMetadataFunc = std::function; + using IterateFunc = std::function; explicit CacheMetadata(const std::string & path_); @@ -101,21 +106,40 @@ public: String getPathForKey(const Key & key) const; static String getFileNameForFileSegment(size_t offset, FileSegmentKind segment_kind); - void iterate(IterateCacheMetadataFunc && func); + void iterate(IterateFunc && func); enum class KeyNotFoundPolicy { THROW, + THROW_LOGICAL, CREATE_EMPTY, RETURN_NULL, }; + KeyMetadataPtr getKeyMetadata( + const Key & key, + KeyNotFoundPolicy key_not_found_policy, + bool is_initial_load = false); + LockedKeyPtr lockKeyMetadata( const Key & key, KeyNotFoundPolicy key_not_found_policy, bool is_initial_load = false); - void doCleanup(); + void removeKey(const Key & key, bool if_exists, bool if_releasable); + void removeAllKeys(bool if_releasable); + + void cancelCleanup(); + + /// Firstly, this cleanup does not delete cache files, + /// but only empty keys from cache_metadata_map and key (prefix) directories from fs. + /// Secondly, it deletes those only if arose as a result of + /// (1) eviction in FileCache::tryReserve(); + /// (2) removal of cancelled non-downloaded file segments after FileSegment::complete(). + /// which does not include removal of cache files because of FileCache::removeKey/removeAllKeys, + /// triggered by removal of source files from objects storage. + /// E.g. number of elements submitted to background cleanup should remain low. + void cleanupThreadFunc(); void downloadThreadFunc(); @@ -125,11 +149,13 @@ private: CacheMetadataGuard::Lock lockMetadata() const; const std::string path; /// Cache base path mutable CacheMetadataGuard guard; - const CleanupQueuePtr cleanup_queue; - const DownloadQueuePtr download_queue; + CleanupQueuePtr cleanup_queue; + DownloadQueuePtr download_queue; + std::shared_mutex key_prefix_directory_mutex; Poco::Logger * log; void downloadImpl(FileSegment & file_segment, std::optional> & memory); + iterator removeEmptyKey(iterator it, LockedKey &, const CacheMetadataGuard::Lock &); }; @@ -169,9 +195,10 @@ struct LockedKey : private boost::noncopyable std::shared_ptr getKeyMetadata() const { return key_metadata; } std::shared_ptr getKeyMetadata() { return key_metadata; } - void removeAllReleasable(); + bool removeAllFileSegments(bool if_releasable = true); - KeyMetadata::iterator removeFileSegment(size_t offset, const FileSegmentGuard::Lock &); + KeyMetadata::iterator removeFileSegment(size_t offset, const FileSegmentGuard::Lock &, bool can_be_broken = false); + KeyMetadata::iterator removeFileSegment(size_t offset, bool can_be_broken = false); void shrinkFileSegmentToDownloadedSize(size_t offset, const FileSegmentGuard::Lock &); @@ -185,9 +212,13 @@ struct LockedKey : private boost::noncopyable void markAsRemoved(); + FileSegments sync(); + std::string toString() const; private: + KeyMetadata::iterator removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock &, bool can_be_broken = false); + const std::shared_ptr key_metadata; KeyGuard::Lock lock; /// `lock` must be destructed before `key_metadata`. }; diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index a6c509e8bb1..972b6e4a3cb 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -191,7 +190,7 @@ QueryCache::Writer::Writer( if (auto entry = cache.getWithKey(key); entry.has_value() && !IsStale()(entry->key)) { skip_insert = true; /// Key already contained in cache and did not expire yet --> don't replace it - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.query_string); + LOG_TRACE(logger, "Skipped insert (non-stale entry found), query: {}", key.query_string); } } @@ -263,14 +262,14 @@ void QueryCache::Writer::finalizeWrite() if (std::chrono::duration_cast(std::chrono::system_clock::now() - query_start_time) < min_query_runtime) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.query_string); + LOG_TRACE(logger, "Skipped insert (query not expensive enough), query: {}", key.query_string); return; } if (auto entry = cache.getWithKey(key); entry.has_value() && !IsStale()(entry->key)) { /// Same check as in ctor because a parallel Writer could have inserted the current key in the meantime - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.query_string); + LOG_TRACE(logger, "Skipped insert (non-stale entry found), query: {}", key.query_string); return; } @@ -353,12 +352,14 @@ void QueryCache::Writer::finalizeWrite() if ((new_entry_size_in_bytes > max_entry_size_in_bytes) || (new_entry_size_in_rows > max_entry_size_in_rows)) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.query_string); + LOG_TRACE(logger, "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.query_string); return; } cache.set(key, query_result); + LOG_TRACE(logger, "Stored result of query: {}", key.query_string); + was_finalized = true; } @@ -388,7 +389,7 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar if (!entry.has_value()) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.query_string); + LOG_TRACE(logger, "No entry found for query {}", key.query_string); return; } @@ -397,13 +398,13 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar if (!entry_key.is_shared && entry_key.user_name != key.user_name) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.query_string); + LOG_TRACE(logger, "Inaccessible entry found for query {}", key.query_string); return; } if (IsStale()(entry_key)) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found for query {}", key.query_string); + LOG_TRACE(logger, "Stale entry found for query {}", key.query_string); return; } @@ -441,7 +442,7 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar buildSourceFromChunks(entry_key.header, std::move(decompressed_chunks), entry_mapped->totals, entry_mapped->extremes); } - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.query_string); + LOG_TRACE(logger, "Entry found for query {}", key.query_string); } bool QueryCache::Reader::hasCacheEntryForKey() const @@ -471,6 +472,21 @@ std::unique_ptr QueryCache::Reader::getSourceExtremes() return std::move(source_from_chunks_extremes); } +QueryCache::QueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_) + : cache(std::make_unique>(std::make_unique())) +{ + updateConfiguration(max_size_in_bytes, max_entries, max_entry_size_in_bytes_, max_entry_size_in_rows_); +} + +void QueryCache::updateConfiguration(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_) +{ + std::lock_guard lock(mutex); + cache.setMaxSizeInBytes(max_size_in_bytes); + cache.setMaxCount(max_entries); + max_entry_size_in_bytes = max_entry_size_in_bytes_; + max_entry_size_in_rows = max_entry_size_in_rows_; +} + QueryCache::Reader QueryCache::createReader(const Key & key) { std::lock_guard lock(mutex); @@ -488,12 +504,21 @@ QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::millis return Writer(cache, key, max_entry_size_in_bytes, max_entry_size_in_rows, min_query_runtime, squash_partial_results, max_block_size); } -void QueryCache::reset() +void QueryCache::clear() { - cache.reset(); + cache.clear(); std::lock_guard lock(mutex); times_executed.clear(); - cache_size_in_bytes = 0; +} + +size_t QueryCache::sizeInBytes() const +{ + return cache.sizeInBytes(); +} + +size_t QueryCache::count() const +{ + return cache.count(); } size_t QueryCache::recordQueryRun(const Key & key) @@ -501,7 +526,7 @@ size_t QueryCache::recordQueryRun(const Key & key) std::lock_guard lock(mutex); size_t times = ++times_executed[key]; // Regularly drop times_executed to avoid DOS-by-unlimited-growth. - static constexpr size_t TIMES_EXECUTED_MAX_SIZE = 10'000; + static constexpr auto TIMES_EXECUTED_MAX_SIZE = 10'000uz; if (times_executed.size() > TIMES_EXECUTED_MAX_SIZE) times_executed.clear(); return times; @@ -512,23 +537,4 @@ std::vector QueryCache::dump() const return cache.dump(); } -QueryCache::QueryCache() - : cache(std::make_unique>(std::make_unique())) -{ -} - -void QueryCache::updateConfiguration(const Poco::Util::AbstractConfiguration & config) -{ - std::lock_guard lock(mutex); - - size_t max_size_in_bytes = config.getUInt64("query_cache.max_size_in_bytes", 1_GiB); - cache.setMaxSize(max_size_in_bytes); - - size_t max_entries = config.getUInt64("query_cache.max_entries", 1024); - cache.setMaxCount(max_entries); - - max_entry_size_in_bytes = config.getUInt64("query_cache.max_entry_size_in_bytes", 1_MiB); - max_entry_size_in_rows = config.getUInt64("query_cache.max_entry_rows_in_rows", 30'000'000); -} - } diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index c24b09c8e46..d3c98dbd97a 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -1,10 +1,10 @@ #pragma once #include +#include #include #include #include -#include #include #include @@ -24,6 +24,14 @@ bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context); class QueryCache { public: + enum class Usage + { + Unknown, /// we don't know what what happened + None, /// query result neither written nor read into/from query cache + Write, /// query result written into query cache + Read, /// query result read from query cache + }; + /// Represents a query result in the cache. struct Key { @@ -102,9 +110,6 @@ private: /// query --> query result using Cache = CacheBase; - /// query --> query execution count - using TimesExecuted = std::unordered_map; - public: /// Buffers multiple partial query result chunks (buffer()) and eventually stores them as cache entry (finalizeWrite()). /// @@ -140,6 +145,7 @@ public: Cache::MappedPtr query_result TSA_GUARDED_BY(mutex) = std::make_shared(); std::atomic skip_insert = false; bool was_finalized = false; + Poco::Logger * logger = &Poco::Logger::get("QueryCache"); Writer(Cache & cache_, const Key & key_, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_, @@ -166,17 +172,21 @@ public: std::unique_ptr source_from_chunks; std::unique_ptr source_from_chunks_totals; std::unique_ptr source_from_chunks_extremes; + Poco::Logger * logger = &Poco::Logger::get("QueryCache"); friend class QueryCache; /// for createReader() }; - QueryCache(); + QueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_); - void updateConfiguration(const Poco::Util::AbstractConfiguration & config); + void updateConfiguration(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_); Reader createReader(const Key & key); Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime, bool squash_partial_results, size_t max_block_size, size_t max_query_cache_size_in_bytes_quota, size_t max_query_cache_entries_quota); - void reset(); + void clear(); + + size_t sizeInBytes() const; + size_t count() const; /// Record new execution of query represented by key. Returns number of executions so far. size_t recordQueryRun(const Key & key); @@ -185,17 +195,18 @@ public: std::vector dump() const; private: - Cache cache; + Cache cache; /// has its own locking --> not protected by mutex mutable std::mutex mutex; + + /// query --> query execution count + using TimesExecuted = std::unordered_map; TimesExecuted times_executed TSA_GUARDED_BY(mutex); /// Cache configuration size_t max_entry_size_in_bytes TSA_GUARDED_BY(mutex) = 0; size_t max_entry_size_in_rows TSA_GUARDED_BY(mutex) = 0; - size_t cache_size_in_bytes TSA_GUARDED_BY(mutex) = 0; /// Updated in each cache insert/delete - friend class StorageSystemQueryCache; }; diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index d50289a5728..9ba7f0b6d1b 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -44,11 +45,25 @@ void WriteBufferToFileSegment::nextImpl() size_t bytes_to_write = offset(); + FileCacheReserveStat reserve_stat; /// In case of an error, we don't need to finalize the file segment /// because it will be deleted soon and completed in the holder's destructor. - bool ok = file_segment->reserve(bytes_to_write); + bool ok = file_segment->reserve(bytes_to_write, &reserve_stat); + if (!ok) - throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve space for the file cache ({})", file_segment->getInfoForLog()); + { + String reserve_stat_msg; + for (const auto & [kind, stat] : reserve_stat.stat_by_kind) + reserve_stat_msg += fmt::format("{} hold {}, can release {}; ", + toString(kind), ReadableSize(stat.non_releasable_size), ReadableSize(stat.releasable_size)); + + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve {} bytes for {}: {}(segment info: {})", + bytes_to_write, + file_segment->getKind() == FileSegmentKind::Temporary ? "temporary file" : "the file in cache", + reserve_stat_msg, + file_segment->getInfoForLog() + ); + } try { @@ -71,4 +86,10 @@ std::shared_ptr WriteBufferToFileSegment::getReadBufferImpl() return std::make_shared(file_segment->getPathInLocalCache()); } +WriteBufferToFileSegment::~WriteBufferToFileSegment() +{ + /// To be sure that file exists before destructor of segment_holder is called + WriteBufferFromFileDecorator::finalize(); +} + } diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.h b/src/Interpreters/Cache/WriteBufferToFileSegment.h index d39772873f7..21565e297c9 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.h +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.h @@ -16,6 +16,7 @@ public: explicit WriteBufferToFileSegment(FileSegmentsHolderPtr segment_holder); void nextImpl() override; + ~WriteBufferToFileSegment() override; private: diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 413ff1db6bc..92ef5a0d159 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -9,6 +9,7 @@ #include "config_version.h" +#include namespace DB { @@ -18,7 +19,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } - void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const { if (server_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) @@ -194,11 +194,29 @@ void ClientInfo::setInitialQuery() query_kind = QueryKind::INITIAL_QUERY; fillOSUserHostNameAndVersionInfo(); if (client_name.empty()) - client_name = DBMS_NAME; + client_name = VERSION_NAME; else - client_name = (DBMS_NAME " ") + client_name; + client_name = (VERSION_NAME " ") + client_name; } +bool ClientInfo::clientVersionEquals(const ClientInfo & other, bool compare_patch) const +{ + bool patch_equals = compare_patch ? client_version_patch == other.client_version_patch : true; + return client_version_major == other.client_version_major && + client_version_minor == other.client_version_minor && + patch_equals && + client_tcp_protocol_version == other.client_tcp_protocol_version; +} + +String ClientInfo::getVersionStr() const +{ + return std::format("{}.{}.{} ({})", client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); +} + +VersionNumber ClientInfo::getVersionNumber() const +{ + return VersionNumber(client_version_major, client_version_minor, client_version_patch); +} void ClientInfo::fillOSUserHostNameAndVersionInfo() { @@ -210,11 +228,33 @@ void ClientInfo::fillOSUserHostNameAndVersionInfo() client_hostname = getFQDNOrHostName(); - client_version_major = DBMS_VERSION_MAJOR; - client_version_minor = DBMS_VERSION_MINOR; - client_version_patch = DBMS_VERSION_PATCH; + client_version_major = VERSION_MAJOR; + client_version_minor = VERSION_MINOR; + client_version_patch = VERSION_PATCH; client_tcp_protocol_version = DBMS_TCP_PROTOCOL_VERSION; } +String toString(ClientInfo::Interface interface) +{ + switch (interface) + { + case ClientInfo::Interface::TCP: + return "TCP"; + case ClientInfo::Interface::HTTP: + return "HTTP"; + case ClientInfo::Interface::GRPC: + return "GRPC"; + case ClientInfo::Interface::MYSQL: + return "MYSQL"; + case ClientInfo::Interface::POSTGRESQL: + return "POSTGRESQL"; + case ClientInfo::Interface::LOCAL: + return "LOCAL"; + case ClientInfo::Interface::TCP_INTERSERVER: + return "TCP_INTERSERVER"; + } + + return std::format("Unknown {}!\n", static_cast(interface)); +} } diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index 5c5a284d63b..70524333047 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -4,6 +4,7 @@ #include #include #include +#include #include namespace DB @@ -48,7 +49,6 @@ public: SECONDARY_QUERY = 2, /// Query that was initiated by another query for distributed or ON CLUSTER query execution. }; - QueryKind query_kind = QueryKind::NO_QUERY; /// Current values are not serialized, because it is passed separately. @@ -135,8 +135,15 @@ public: /// Initialize parameters on client initiating query. void setInitialQuery(); + bool clientVersionEquals(const ClientInfo & other, bool compare_patch) const; + + String getVersionStr() const; + VersionNumber getVersionNumber() const; + private: void fillOSUserHostNameAndVersionInfo(); }; +String toString(ClientInfo::Interface interface); + } diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 891586d88b6..82c3d48bc05 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -420,8 +420,6 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, if (address.is_local) info.local_addresses.push_back(address); - info.all_addresses.push_back(address); - auto pool = ConnectionPoolFactory::instance().get( static_cast(settings.distributed_connections_pool_size), address.host_name, address.port, @@ -564,7 +562,6 @@ void Cluster::addShard(const Settings & settings, Addresses && addresses, bool t ShardInfoInsertPathForInternalReplication && insert_paths, UInt32 weight, bool internal_replication) { Addresses shard_local_addresses; - Addresses shard_all_addresses; ConnectionPoolPtrs all_replicas_pools; all_replicas_pools.reserve(addresses.size()); @@ -582,7 +579,6 @@ void Cluster::addShard(const Settings & settings, Addresses && addresses, bool t all_replicas_pools.emplace_back(replica_pool); if (replica.is_local && !treat_local_as_remote) shard_local_addresses.push_back(replica); - shard_all_addresses.push_back(replica); } ConnectionPoolWithFailoverPtr shard_pool = std::make_shared( all_replicas_pools, settings.load_balancing, @@ -596,7 +592,6 @@ void Cluster::addShard(const Settings & settings, Addresses && addresses, bool t current_shard_num, weight, std::move(shard_local_addresses), - std::move(shard_all_addresses), std::move(shard_pool), std::move(all_replicas_pools), internal_replication @@ -720,8 +715,6 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti if (address.is_local) info.local_addresses.push_back(address); - info.all_addresses.push_back(address); - auto pool = ConnectionPoolFactory::instance().get( static_cast(settings.distributed_connections_pool_size), address.host_name, diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index de10a445d01..b2bc03dd74d 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -144,12 +144,6 @@ public: UInt32 shard_index_ = 0, UInt32 replica_index_ = 0); - Address( - const String & host_port_, - const ClusterConnectionParameters & params, - UInt32 shard_index_, - UInt32 replica_index_); - Address( const DatabaseReplicaInfo & info, const ClusterConnectionParameters & params, @@ -223,7 +217,6 @@ public: UInt32 shard_num = 0; UInt32 weight = 1; Addresses local_addresses; - Addresses all_addresses; /// nullptr if there are no remote addresses ConnectionPoolWithFailoverPtr pool; /// Connection pool for each replica, contains nullptr for local replicas @@ -279,6 +272,8 @@ public: /// Are distributed DDL Queries (ON CLUSTER Clause) allowed for this cluster bool areDistributedDDLQueriesAllowed() const { return allow_distributed_ddl_queries; } + const String & getName() const { return name; } + private: SlotToShard slot_to_shard; diff --git a/src/Interpreters/ClusterDiscovery.cpp b/src/Interpreters/ClusterDiscovery.cpp index 553488edf50..a5e861f29a3 100644 --- a/src/Interpreters/ClusterDiscovery.cpp +++ b/src/Interpreters/ClusterDiscovery.cpp @@ -7,13 +7,15 @@ #include #include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include #include @@ -31,9 +33,15 @@ namespace DB namespace ErrorCodes { + extern const int KEEPER_EXCEPTION; extern const int LOGICAL_ERROR; } +namespace FailPoints +{ + extern const char cluster_discovery_faults[]; +} + namespace { @@ -347,6 +355,19 @@ void ClusterDiscovery::registerInZk(zkutil::ZooKeeperPtr & zk, ClusterInfo & inf void ClusterDiscovery::initialUpdate() { + LOG_DEBUG(log, "Initializing"); + + fiu_do_on(FailPoints::cluster_discovery_faults, + { + constexpr UInt8 success_chance = 4; + static size_t fail_count = 0; + fail_count++; + /// strict limit on fail count to avoid flaky tests + auto is_failed = fail_count < success_chance && std::uniform_int_distribution<>(0, success_chance)(thread_local_rng) != 0; + if (is_failed) + throw Exception(ErrorCodes::KEEPER_EXCEPTION, "Failpoint cluster_discovery_faults is triggered"); + }); + auto zk = context->getZooKeeper(); for (auto & [_, info] : clusters_info) { @@ -357,6 +378,8 @@ void ClusterDiscovery::initialUpdate() clusters_to_update->set(info.name); } } + LOG_DEBUG(log, "Initialized"); + is_initialized = true; } void ClusterDiscovery::start() @@ -414,6 +437,10 @@ bool ClusterDiscovery::runMainThread(std::function up_to_date_callback) using namespace std::chrono_literals; constexpr auto force_update_interval = 2min; + + if (!is_initialized) + initialUpdate(); + bool finished = false; while (!finished) { diff --git a/src/Interpreters/ClusterDiscovery.h b/src/Interpreters/ClusterDiscovery.h index 140e3691c03..6547840845e 100644 --- a/src/Interpreters/ClusterDiscovery.h +++ b/src/Interpreters/ClusterDiscovery.h @@ -137,6 +137,7 @@ private: mutable std::mutex mutex; std::unordered_map cluster_impls; + bool is_initialized = false; ThreadFromGlobalPool main_thread; Poco::Logger * log; diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 0cf3f360994..c444e1407c6 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -35,6 +36,11 @@ namespace ErrorCodes extern const int ALL_REPLICAS_ARE_STALE; } +namespace FailPoints +{ + extern const char use_delayed_remote_source[]; +} + namespace ClusterProxy { @@ -117,13 +123,14 @@ void SelectStreamFactory::createForShard( auto emplace_local_stream = [&]() { local_plans.emplace_back(createLocalPlan( - query_ast, header, context, processed_stage, shard_info.shard_num, shard_count, /*replica_num=*/0, /*replica_count=*/0, /*coordinator=*/nullptr)); + query_ast, header, context, processed_stage, shard_info.shard_num, shard_count)); }; auto emplace_remote_stream = [&](bool lazy = false, time_t local_delay = 0) { remote_shards.emplace_back(Shard{ .query = query_ast, + .main_table = main_table, .header = header, .shard_info = shard_info, .lazy = lazy, @@ -133,6 +140,12 @@ void SelectStreamFactory::createForShard( const auto & settings = context->getSettingsRef(); + fiu_do_on(FailPoints::use_delayed_remote_source, + { + emplace_remote_stream(/*lazy=*/true, /*local_delay=*/999999); + return; + }); + if (settings.prefer_localhost_replica && shard_info.isLocal()) { StoragePtr main_table_storage; diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 030c0b77dd5..ca07fd5deda 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -50,6 +50,8 @@ public: { /// Query and header may be changed depending on shard. ASTPtr query; + /// Used to check the table existence on remote node + StorageID main_table; Block header; Cluster::ShardInfo shard_info; @@ -58,9 +60,6 @@ public: /// (When there is a local replica with big delay). bool lazy = false; time_t local_delay = 0; - - /// Set only if parallel reading from replicas is used. - std::shared_ptr coordinator; }; using Shards = std::vector; diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 3dea52faf46..0890801062e 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -29,13 +29,17 @@ namespace ErrorCodes { extern const int TOO_LARGE_DISTRIBUTED_DEPTH; extern const int LOGICAL_ERROR; - extern const int SUPPORT_IS_DISABLED; } namespace ClusterProxy { -ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info, Poco::Logger * log) +ContextMutablePtr updateSettingsForCluster(bool interserver_mode, + ContextPtr context, + const Settings & settings, + const StorageID & main_table, + ASTPtr additional_filter_ast, + Poco::Logger * log) { Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); @@ -43,7 +47,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c /// If "secret" (in remote_servers) is not in use, /// user on the shard is not the same as the user on the initiator, /// hence per-user limits should not be applied. - if (cluster.getSecret().empty()) + if (!interserver_mode) { /// Does not matter on remote servers, because queries are sent under different user. new_settings.max_concurrent_queries_for_user = 0; @@ -111,11 +115,11 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c /// /// Here we don't try to analyze setting again. In case if query_info->additional_filter_ast is not empty, some filter was applied. /// It's just easier to add this filter for a source table. - if (query_info && query_info->additional_filter_ast) + if (additional_filter_ast) { Tuple tuple; tuple.push_back(main_table.getShortName()); - tuple.push_back(queryToString(query_info->additional_filter_ast)); + tuple.push_back(queryToString(additional_filter_ast)); new_settings.additional_table_filters.value.push_back(std::move(tuple)); } @@ -170,17 +174,16 @@ void executeQuery( std::vector plans; SelectStreamFactory::Shards remote_shards; - auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, main_table, &query_info, log); + auto new_context = updateSettingsForCluster(!not_optimized_cluster->getSecret().empty(), context, settings, + main_table, query_info.additional_filter_ast, log); new_context->increaseDistributedDepth(); size_t shards = query_info.getCluster()->getShardCount(); for (const auto & shard_info : query_info.getCluster()->getShardsInfo()) { - ASTPtr query_ast_for_shard; - if (query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1) + ASTPtr query_ast_for_shard = query_ast->clone(); + if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1) { - query_ast_for_shard = query_ast->clone(); - OptimizeShardingKeyRewriteInVisitor::Data visitor_data{ sharding_key_expr, sharding_key_expr->getSampleBlock().getByPosition(0).type, @@ -191,8 +194,6 @@ void executeQuery( OptimizeShardingKeyRewriteInVisitor visitor(visitor_data); visitor.visit(query_ast_for_shard); } - else - query_ast_for_shard = query_ast->clone(); if (shard_filter_generator) { @@ -234,7 +235,8 @@ void executeQuery( std::move(external_tables), log, shards, - query_info.storage_limits); + query_info.storage_limits, + query_info.getCluster()->getName()); read_from_remote->setStepDescription("Read from remote replica"); plan->addStep(std::move(read_from_remote)); @@ -264,64 +266,75 @@ void executeQuery( void executeQueryWithParallelReplicas( QueryPlan & query_plan, const StorageID & main_table, - const ASTPtr & table_func_ptr, SelectStreamFactory & stream_factory, - const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info, + const ASTPtr & query_ast, + ContextPtr context, + std::shared_ptr storage_limits, const ClusterPtr & not_optimized_cluster) { - if (not_optimized_cluster->getShardsInfo().size() != 1) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Cluster for parallel replicas should consist only from one shard"); - - auto shard_info = not_optimized_cluster->getShardsInfo().front(); - const auto & settings = context->getSettingsRef(); - ClusterPtr new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings); - - auto all_replicas_count = std::min(static_cast(settings.max_parallel_replicas), new_cluster->getShardCount()); - auto coordinator = std::make_shared(all_replicas_count); - auto remote_plan = std::make_unique(); - auto plans = std::vector(); - - /// This is a little bit weird, but we construct an "empty" coordinator without - /// any specified reading/coordination method (like Default, InOrder, InReverseOrder) - /// Because we will understand it later during QueryPlan optimization - /// So we place a reference to the coordinator to some common plane like QueryInfo - /// to then tell it about the reading method we chose. - query_info.coordinator = coordinator; - auto new_context = Context::createCopy(context); auto scalars = new_context->hasQueryContext() ? new_context->getQueryContext()->getScalars() : Scalars{}; - auto external_tables = new_context->getExternalTables(); + UInt64 shard_num = 0; /// shard_num is 1-based, so 0 - no shard specified + const auto it = scalars.find("_shard_num"); + if (it != scalars.end()) + { + const Block & block = it->second; + const auto & column = block.safeGetByPosition(0).column; + shard_num = column->getUInt(0); + } + + size_t all_replicas_count = 0; + ClusterPtr new_cluster; + /// if got valid shard_num from query initiator, then parallel replicas scope is the specified shard + /// shards are numbered in order of appearance in the cluster config + if (shard_num > 0) + { + const auto shard_count = not_optimized_cluster->getShardCount(); + if (shard_num > shard_count) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Shard number is greater than shard count: shard_num={} shard_count={} cluster={}", + shard_num, + shard_count, + not_optimized_cluster->getName()); + + chassert(shard_count == not_optimized_cluster->getShardsAddresses().size()); + + LOG_DEBUG(&Poco::Logger::get("executeQueryWithParallelReplicas"), "Parallel replicas query in shard scope: shard_num={} cluster={}", + shard_num, not_optimized_cluster->getName()); + + // get cluster for shard specified by shard_num + // shard_num is 1-based, but getClusterWithSingleShard expects 0-based index + auto single_shard_cluster = not_optimized_cluster->getClusterWithSingleShard(shard_num - 1); + // convert cluster to representation expected by parallel replicas + new_cluster = single_shard_cluster->getClusterWithReplicasAsShards(settings); + } + else + { + new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings); + } + + all_replicas_count = std::min(static_cast(settings.max_parallel_replicas), new_cluster->getShardCount()); + + auto coordinator = std::make_shared(all_replicas_count); + auto external_tables = new_context->getExternalTables(); auto read_from_remote = std::make_unique( query_ast, new_cluster, - coordinator, + std::move(coordinator), stream_factory.header, stream_factory.processed_stage, main_table, - table_func_ptr, new_context, getThrottler(new_context), std::move(scalars), std::move(external_tables), &Poco::Logger::get("ReadFromParallelRemoteReplicasStep"), - query_info.storage_limits); + std::move(storage_limits)); - remote_plan->addStep(std::move(read_from_remote)); - remote_plan->addInterpreterContext(context); - plans.emplace_back(std::move(remote_plan)); - - if (std::all_of(plans.begin(), plans.end(), [](const QueryPlanPtr & plan) { return !plan; })) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No plans were generated for reading from shard. This is a bug"); - - DataStreams input_streams; - input_streams.reserve(plans.size()); - for (const auto & plan : plans) - input_streams.emplace_back(plan->getCurrentDataStream()); - - auto union_step = std::make_unique(std::move(input_streams)); - query_plan.unitePlans(std::move(union_step), std::move(plans)); + query_plan.addStep(std::move(read_from_remote)); } } diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 41f6da55686..5a88f5a5cb1 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -20,6 +20,9 @@ using ExpressionActionsPtr = std::shared_ptr; struct StorageID; +struct StorageLimits; +using StorageLimitsList = std::list; + namespace ClusterProxy { @@ -34,8 +37,12 @@ class SelectStreamFactory; /// - optimize_skip_unused_shards_nesting /// /// @return new Context with adjusted settings -ContextMutablePtr updateSettingsForCluster( - const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info = nullptr, Poco::Logger * log = nullptr); +ContextMutablePtr updateSettingsForCluster(bool interserver_mode, + ContextPtr context, + const Settings & settings, + const StorageID & main_table, + ASTPtr additional_filter_ast = nullptr, + Poco::Logger * log = nullptr); using AdditionalShardFilterGenerator = std::function; /// Execute a distributed query, creating a query plan, from which the query pipeline can be built. @@ -58,11 +65,10 @@ void executeQuery( void executeQueryWithParallelReplicas( QueryPlan & query_plan, const StorageID & main_table, - const ASTPtr & table_func_ptr, SelectStreamFactory & stream_factory, const ASTPtr & query_ast, ContextPtr context, - const SelectQueryInfo & query_info, + std::shared_ptr storage_limits, const ClusterPtr & not_optimized_cluster); } diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index fd6fc27faec..5dcd699d795 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -38,15 +38,15 @@ bool isRightIdentifier(JoinIdentifierPos pos) } -void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos) +void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos, bool null_safe_comparison) { ASTPtr left = left_ast->clone(); ASTPtr right = right_ast->clone(); if (isLeftIdentifier(table_pos.first) && isRightIdentifier(table_pos.second)) - analyzed_join.addOnKeys(left, right); + analyzed_join.addOnKeys(left, right, null_safe_comparison); else if (isRightIdentifier(table_pos.first) && isLeftIdentifier(table_pos.second)) - analyzed_join.addOnKeys(right, left); + analyzed_join.addOnKeys(right, left, null_safe_comparison); else throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Cannot detect left and right JOIN keys. JOIN ON section is ambiguous."); } @@ -78,7 +78,7 @@ void CollectJoinOnKeysMatcher::Data::asofToJoinKeys() { if (!asof_left_key || !asof_right_key) throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "No inequality in ASOF JOIN ON section."); - addJoinKeys(asof_left_key, asof_right_key, {JoinIdentifierPos::Left, JoinIdentifierPos::Right}); + addJoinKeys(asof_left_key, asof_right_key, {JoinIdentifierPos::Left, JoinIdentifierPos::Right}, false); } void CollectJoinOnKeysMatcher::visit(const ASTIdentifier & ident, const ASTPtr & ast, CollectJoinOnKeysMatcher::Data & data) @@ -96,14 +96,14 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as ASOFJoinInequality inequality = getASOFJoinInequality(func.name); - if (func.name == "equals" || inequality != ASOFJoinInequality::None) + if (func.name == "equals" || func.name == "isNotDistinctFrom" || inequality != ASOFJoinInequality::None) { if (func.arguments->children.size() != 2) throw Exception(ErrorCodes::SYNTAX_ERROR, "Function {} takes two arguments, got '{}' instead", func.name, func.formatForErrorMessage()); } - if (func.name == "equals") + if (func.name == "equals" || func.name == "isNotDistinctFrom") { ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); @@ -121,7 +121,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as if ((isLeftIdentifier(table_numbers.first) && isRightIdentifier(table_numbers.second)) || (isRightIdentifier(table_numbers.first) && isLeftIdentifier(table_numbers.second))) { - data.addJoinKeys(left, right, table_numbers); + bool null_safe_comparison = func.name == "isNotDistinctFrom"; + data.addJoinKeys(left, right, table_numbers, null_safe_comparison); return; } } diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h index 4f4e886099e..194ec01bcd5 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.h +++ b/src/Interpreters/CollectJoinOnKeysVisitor.h @@ -54,7 +54,7 @@ public: ASTPtr asof_left_key{}; ASTPtr asof_right_key{}; - void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos); + void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos, bool null_safe_comparison); void addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos, const ASOFJoinInequality & asof_inequality); void asofToJoinKeys(); diff --git a/src/Interpreters/ComparisonGraph.h b/src/Interpreters/ComparisonGraph.h index 70543227b58..4fd90dad371 100644 --- a/src/Interpreters/ComparisonGraph.h +++ b/src/Interpreters/ComparisonGraph.h @@ -118,7 +118,7 @@ private: { size_t operator() (const IAST::Hash & hash) const { - return hash.first; + return hash.low64; } }; diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index 1283879971d..3052c688e5f 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -33,9 +32,16 @@ class ConcurrentHashJoin : public IJoin { public: - explicit ConcurrentHashJoin(ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_ = false); + explicit ConcurrentHashJoin( + ContextPtr context_, + std::shared_ptr table_join_, + size_t slots_, + const Block & right_sample_block, + bool any_take_last_row_ = false); + ~ConcurrentHashJoin() override = default; + std::string getName() const override { return "ConcurrentHashJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; @@ -66,7 +72,6 @@ private: IColumn::Selector selectDispatchBlock(const Strings & key_columns_names, const Block & from_block); Blocks dispatchBlock(const Strings & key_columns_names, const Block & from_block); - }; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index cc77e0fe723..8695669a7de 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2,14 +2,10 @@ #include #include #include -#include #include -#include #include #include -#include #include -#include #include #include #include @@ -17,11 +13,10 @@ #include #include #include -#include #include #include #include -#include +#include #include #include #include @@ -33,8 +28,6 @@ #include #include #include -#include -#include #include #include #include @@ -43,7 +36,7 @@ #include #include #include -#include +#include #include #include #include @@ -54,7 +47,6 @@ #include #include #include -#include #include #include #include @@ -68,13 +60,10 @@ #include #include #include -#include #include #include #include #include -#include -#include #include #include #include @@ -85,8 +74,6 @@ #include #include #include -#include -#include #include #include #include @@ -96,16 +83,13 @@ #include #include #include -#include #include #include #include #include #include #include -#include #include -#include #include #include #include @@ -114,11 +98,10 @@ #include #include #include -#include +#include +#include +#include -#if USE_ROCKSDB -#include -#endif namespace fs = std::filesystem; @@ -154,13 +137,13 @@ namespace CurrentMetrics extern const Metric IOWriterThreadsActive; } + namespace DB { namespace ErrorCodes { extern const int BAD_ARGUMENTS; - extern const int BAD_GET; extern const int UNKNOWN_DATABASE; extern const int UNKNOWN_TABLE; extern const int TABLE_ALREADY_EXISTS; @@ -175,6 +158,7 @@ namespace ErrorCodes extern const int UNKNOWN_FUNCTION; extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; + extern const int CLUSTER_DOESNT_EXIST; } #define SHUTDOWN(log, desc, ptr, method) do \ @@ -228,6 +212,7 @@ struct ContextSharedPart : boost::noncopyable String user_files_path; /// Path to the directory with user provided files, usable by 'file' table function. String dictionaries_lib_path; /// Path to the directory with user provided binaries and libraries for external dictionaries. String user_scripts_path; /// Path to the directory with user provided scripts. + String filesystem_caches_path; /// Path to the directory with filesystem caches. ConfigurationPtr config; /// Global configuration settings. String tmp_path; /// Path to the temporary files that occur when processing the request. @@ -258,26 +243,27 @@ struct ContextSharedPart : boost::noncopyable std::optional backups_worker; - String default_profile_name; /// Default profile name used for default values. - String system_profile_name; /// Profile used by system processes - String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying + String default_profile_name; /// Default profile name used for default values. + String system_profile_name; /// Profile used by system processes + String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying std::unique_ptr access_control; mutable ResourceManagerPtr resource_manager; - mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks. - mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files. - mutable std::unique_ptr load_marks_threadpool; /// Threadpool for loading marks cache. - mutable std::unique_ptr prefetch_threadpool; /// Threadpool for loading marks cache. - mutable UncompressedCachePtr index_uncompressed_cache; /// The cache of decompressed blocks for MergeTree indices. - mutable MarkCachePtr index_mark_cache; /// Cache of marks in compressed files of MergeTree indices. - mutable QueryCachePtr query_cache; /// Cache of query results. - mutable MMappedFileCachePtr mmap_cache; /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. - ProcessList process_list; /// Executing queries at the moment. + mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks. + mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files. + mutable std::unique_ptr load_marks_threadpool; /// Threadpool for loading marks cache. + mutable std::unique_ptr prefetch_threadpool; /// Threadpool for loading marks cache. + mutable UncompressedCachePtr index_uncompressed_cache; /// The cache of decompressed blocks for MergeTree indices. + mutable QueryCachePtr query_cache; /// Cache of query results. + mutable MarkCachePtr index_mark_cache; /// Cache of marks in compressed files of MergeTree indices. + mutable MMappedFileCachePtr mmap_cache; /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. + ProcessList process_list; /// Executing queries at the moment. + SessionTracker session_tracker; GlobalOvercommitTracker global_overcommit_tracker; - MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree) - MovesList moves_list; /// The list of executing moves (for (Replicated)?MergeTree) + MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree) + MovesList moves_list; /// The list of executing moves (for (Replicated)?MergeTree) ReplicatedFetchList replicated_fetch_list; - ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. - InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. + ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. + InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. mutable std::unique_ptr buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables. mutable std::unique_ptr schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) @@ -357,12 +343,10 @@ struct ContextSharedPart : boost::noncopyable Context::ConfigReloadCallback config_reload_callback; - bool is_server_completely_started = false; + Context::StartStopServersCallback start_servers_callback; + Context::StartStopServersCallback stop_servers_callback; -#if USE_ROCKSDB - /// Global merge tree metadata cache, stored in rocksdb. - MergeTreeMetadataCachePtr merge_tree_metadata_cache; -#endif + bool is_server_completely_started = false; ContextSharedPart() : access_control(std::make_unique()) @@ -487,6 +471,9 @@ struct ContextSharedPart : boost::noncopyable return; shutdown_called = true; + /// Need to flush the async insert queue before shutting down the database catalog + async_insert_queue.reset(); + /// Stop periodic reloading of the configuration files. /// This must be done first because otherwise the reloading may pass a changed config /// to some destroyed parts of ContextSharedPart. @@ -556,7 +543,7 @@ struct ContextSharedPart : boost::noncopyable */ #if USE_EMBEDDED_COMPILER if (auto * cache = CompiledExpressionCacheFactory::instance().tryGetCache()) - cache->reset(); + cache->clear(); #endif /// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference). @@ -590,15 +577,6 @@ struct ContextSharedPart : boost::noncopyable trace_collector.reset(); /// Stop zookeeper connection zookeeper.reset(); - -#if USE_ROCKSDB - /// Shutdown merge tree metadata cache - if (merge_tree_metadata_cache) - { - merge_tree_metadata_cache->shutdown(); - merge_tree_metadata_cache.reset(); - } -#endif } /// Can be removed without context lock @@ -732,6 +710,9 @@ std::unique_lock Context::getLock() const ProcessList & Context::getProcessList() { return shared->process_list; } const ProcessList & Context::getProcessList() const { return shared->process_list; } OvercommitTracker * Context::getGlobalOvercommitTracker() const { return &shared->global_overcommit_tracker; } + +SessionTracker & Context::getSessionTracker() { return shared->session_tracker; } + MergeList & Context::getMergeList() { return shared->merge_list; } const MergeList & Context::getMergeList() const { return shared->merge_list; } MovesList & Context::getMovesList() { return shared->moves_list; } @@ -777,6 +758,12 @@ String Context::getUserScriptsPath() const return shared->user_scripts_path; } +String Context::getFilesystemCachesPath() const +{ + auto lock = getLock(); + return shared->filesystem_caches_path; +} + Strings Context::getWarnings() const { Strings common_warnings; @@ -868,6 +855,16 @@ void Context::setPath(const String & path) shared->user_scripts_path = shared->path + "user_scripts/"; } +void Context::setFilesystemCachesPath(const String & path) +{ + auto lock = getLock(); + + if (!fs::path(path).is_absolute()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filesystem caches path must be absolute: {}", path); + + shared->filesystem_caches_path = path; +} + static void setupTmpPath(Poco::Logger * log, const std::string & path) try { @@ -985,7 +982,7 @@ void Context::setTemporaryStorageInCache(const String & cache_disk_name, size_t auto file_cache = FileCacheFactory::instance().getByName(disk_ptr->getCacheName()).cache; if (!file_cache) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Cache '{}' is not found", file_cache->getBasePath()); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Cache '{}' is not found", disk_ptr->getCacheName()); LOG_DEBUG(shared->log, "Using file cache ({}) for temporary files", file_cache->getBasePath()); @@ -1076,52 +1073,32 @@ ConfigurationPtr Context::getUsersConfig() return shared->users_config; } -void Context::setUser(const UUID & user_id_, bool set_current_profiles_, bool set_current_roles_, bool set_current_database_) +void Context::setUser(const UUID & user_id_, const std::optional> & current_roles_) { /// Prepare lists of user's profiles, constraints, settings, roles. + /// NOTE: AccessControl::read() and other AccessControl's functions may require some IO work, + /// so Context::getLock() must be unlocked while we're doing this. - std::shared_ptr user; - std::shared_ptr temp_access; - if (set_current_profiles_ || set_current_roles_ || set_current_database_) - { - std::optional params; - { - auto lock = getLock(); - params.emplace(ContextAccessParams{user_id_, /* full_access= */ false, /* use_default_roles = */ true, {}, settings, current_database, client_info}); - } - /// `temp_access` is used here only to extract information about the user, not to actually check access. - /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLock() must be unlocked while we're doing this. - temp_access = getAccessControl().getContextAccess(*params); - user = temp_access->getUser(); - } + auto user = getAccessControl().read(user_id_); - std::shared_ptr profiles; - if (set_current_profiles_) - profiles = temp_access->getDefaultProfileInfo(); - - std::optional> roles; - if (set_current_roles_) - roles = user->granted_roles.findGranted(user->default_roles); - - String database; - if (set_current_database_) - database = user->default_database; + auto new_current_roles = current_roles_ ? user->granted_roles.findGranted(*current_roles_) : user->granted_roles.findGranted(user->default_roles); + auto enabled_roles = getAccessControl().getEnabledRolesInfo(new_current_roles, {}); + auto enabled_profiles = getAccessControl().getEnabledSettingsInfo(user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles); + const auto & database = user->default_database; /// Apply user's profiles, constraints, settings, roles. + auto lock = getLock(); setUserID(user_id_); - if (profiles) - { - /// A profile can specify a value and a readonly constraint for same setting at the same time, - /// so we shouldn't check constraints here. - setCurrentProfiles(*profiles, /* check_constraints= */ false); - } + /// A profile can specify a value and a readonly constraint for same setting at the same time, + /// so we shouldn't check constraints here. + setCurrentProfiles(*enabled_profiles, /* check_constraints= */ false); - if (roles) - setCurrentRoles(*roles); + setCurrentRoles(new_current_roles); + /// It's optional to specify the DEFAULT DATABASE in the user's definition. if (!database.empty()) setCurrentDatabase(database); } @@ -1150,13 +1127,6 @@ std::optional Context::getUserID() const } -void Context::setQuotaKey(String quota_key_) -{ - auto lock = getLock(); - client_info.quota_key = std::move(quota_key_); -} - - void Context::setCurrentRoles(const std::vector & current_roles_) { auto lock = getLock(); @@ -1296,7 +1266,7 @@ void Context::setCurrentProfiles(const SettingsProfilesInfo & profiles_info, boo { auto lock = getLock(); if (check_constraints) - checkSettingsConstraints(profiles_info.settings); + checkSettingsConstraints(profiles_info.settings, SettingSource::PROFILE); applySettingsChanges(profiles_info.settings); settings_constraints_and_current_profiles = profiles_info.getConstraintsAndProfileIDs(settings_constraints_and_current_profiles); } @@ -1322,10 +1292,12 @@ ResourceManagerPtr Context::getResourceManager() const return shared->resource_manager; } -ClassifierPtr Context::getClassifier() const +ClassifierPtr Context::getWorkloadClassifier() const { auto lock = getLock(); - return getResourceManager()->acquire(getSettingsRef().workload); + if (!classifier) + classifier = getResourceManager()->acquire(getSettingsRef().workload); + return classifier; } @@ -1576,12 +1548,25 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const { if (table.get()->isView() && table->as() && table->as()->isParameterizedView()) { + auto query = table->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); + NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression); + StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); + + ASTCreateQuery create; + create.select = query->as(); + auto sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(query, getQueryContext()); + auto res = std::make_shared(StorageID(database_name, table_name), + create, + ColumnsDescription(sample_block.getNamesAndTypesList()), + /* comment */ "", + /* is_parameterized_view */ true); + res->startup(); function->prefer_subquery_to_function_formatting = true; - return table; + return res; } } auto hash = table_expression->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + auto key = toString(hash); StoragePtr & res = table_function_results[key]; if (!res) { @@ -1699,9 +1684,9 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const { /// For input function we should check if input format supports reading subset of columns. if (table_function_ptr->getName() == "input") - use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getInsertFormat()); + use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getInsertFormat(), shared_from_this()); else - use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns(); + use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns(shared_from_this()); } if (use_columns_from_insert_query) @@ -1732,7 +1717,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const auto new_hash = table_expression->getTreeHash(); if (hash != new_hash) { - key = toString(new_hash.first) + '_' + toString(new_hash.second); + key = toString(new_hash); table_function_results[key] = res; } } @@ -1741,8 +1726,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr) { - auto hash = table_expression->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + const auto hash = table_expression->getTreeHash(); + const auto key = toString(hash); StoragePtr & res = table_function_results[key]; if (!res) @@ -1837,29 +1822,29 @@ void Context::applySettingsChanges(const SettingsChanges & changes) } -void Context::checkSettingsConstraints(const SettingsProfileElements & profile_elements) const +void Context::checkSettingsConstraints(const SettingsProfileElements & profile_elements, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, profile_elements); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, profile_elements, source); } -void Context::checkSettingsConstraints(const SettingChange & change) const +void Context::checkSettingsConstraints(const SettingChange & change, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, change); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, change, source); } -void Context::checkSettingsConstraints(const SettingsChanges & changes) const +void Context::checkSettingsConstraints(const SettingsChanges & changes, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes, source); } -void Context::checkSettingsConstraints(SettingsChanges & changes) const +void Context::checkSettingsConstraints(SettingsChanges & changes, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes, source); } -void Context::clampToSettingsConstraints(SettingsChanges & changes) const +void Context::clampToSettingsConstraints(SettingsChanges & changes, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.clamp(settings, changes); + getSettingsConstraintsAndCurrentProfiles()->constraints.clamp(settings, changes, source); } void Context::checkMergeTreeSettingsConstraints(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const @@ -2271,16 +2256,26 @@ QueryStatusPtr Context::getProcessListElement() const } -void Context::setUncompressedCache(const String & uncompressed_cache_policy, size_t max_size_in_bytes) +void Context::setUncompressedCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio) { auto lock = getLock(); if (shared->uncompressed_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Uncompressed cache has been already created."); - shared->uncompressed_cache = std::make_shared(uncompressed_cache_policy, max_size_in_bytes); + shared->uncompressed_cache = std::make_shared(cache_policy, max_size_in_bytes, size_ratio); } +void Context::updateUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = getLock(); + + if (!shared->uncompressed_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Uncompressed cache was not created yet."); + + size_t max_size_in_bytes = config.getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE); + shared->uncompressed_cache->setMaxSizeInBytes(max_size_in_bytes); +} UncompressedCachePtr Context::getUncompressedCache() const { @@ -2288,23 +2283,33 @@ UncompressedCachePtr Context::getUncompressedCache() const return shared->uncompressed_cache; } - -void Context::dropUncompressedCache() const +void Context::clearUncompressedCache() const { auto lock = getLock(); + if (shared->uncompressed_cache) - shared->uncompressed_cache->reset(); + shared->uncompressed_cache->clear(); } - -void Context::setMarkCache(const String & mark_cache_policy, size_t cache_size_in_bytes) +void Context::setMarkCache(const String & cache_policy, size_t max_cache_size_in_bytes, double size_ratio) { auto lock = getLock(); if (shared->mark_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark cache has been already created."); - shared->mark_cache = std::make_shared(mark_cache_policy, cache_size_in_bytes); + shared->mark_cache = std::make_shared(cache_policy, max_cache_size_in_bytes, size_ratio); +} + +void Context::updateMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = getLock(); + + if (!shared->mark_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark cache was not created yet."); + + size_t max_size_in_bytes = config.getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE); + shared->mark_cache->setMaxSizeInBytes(max_size_in_bytes); } MarkCachePtr Context::getMarkCache() const @@ -2313,11 +2318,12 @@ MarkCachePtr Context::getMarkCache() const return shared->mark_cache; } -void Context::dropMarkCache() const +void Context::clearMarkCache() const { auto lock = getLock(); + if (shared->mark_cache) - shared->mark_cache->reset(); + shared->mark_cache->clear(); } ThreadPool & Context::getLoadMarksThreadpool() const @@ -2335,15 +2341,174 @@ ThreadPool & Context::getLoadMarksThreadpool() const return *shared->load_marks_threadpool; } -static size_t getPrefetchThreadpoolSizeFromConfig(const Poco::Util::AbstractConfiguration & config) +void Context::setIndexUncompressedCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio) { - return config.getUInt(".prefetch_threadpool_pool_size", 100); + auto lock = getLock(); + + if (shared->index_uncompressed_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache has been already created."); + + shared->index_uncompressed_cache = std::make_shared(cache_policy, max_size_in_bytes, size_ratio); } -size_t Context::getPrefetchThreadpoolSize() const +void Context::updateIndexUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config) { - const auto & config = getConfigRef(); - return getPrefetchThreadpoolSizeFromConfig(config); + auto lock = getLock(); + + if (!shared->index_uncompressed_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache was not created yet."); + + size_t max_size_in_bytes = config.getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE); + shared->index_uncompressed_cache->setMaxSizeInBytes(max_size_in_bytes); +} + +UncompressedCachePtr Context::getIndexUncompressedCache() const +{ + auto lock = getLock(); + return shared->index_uncompressed_cache; +} + +void Context::clearIndexUncompressedCache() const +{ + auto lock = getLock(); + + if (shared->index_uncompressed_cache) + shared->index_uncompressed_cache->clear(); +} + +void Context::setIndexMarkCache(const String & cache_policy, size_t max_cache_size_in_bytes, double size_ratio) +{ + auto lock = getLock(); + + if (shared->index_mark_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index mark cache has been already created."); + + shared->index_mark_cache = std::make_shared(cache_policy, max_cache_size_in_bytes, size_ratio); +} + +void Context::updateIndexMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = getLock(); + + if (!shared->index_mark_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index mark cache was not created yet."); + + size_t max_size_in_bytes = config.getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE); + shared->index_mark_cache->setMaxSizeInBytes(max_size_in_bytes); +} + +MarkCachePtr Context::getIndexMarkCache() const +{ + auto lock = getLock(); + return shared->index_mark_cache; +} + +void Context::clearIndexMarkCache() const +{ + auto lock = getLock(); + + if (shared->index_mark_cache) + shared->index_mark_cache->clear(); +} + +void Context::setMMappedFileCache(size_t max_cache_size_in_num_entries) +{ + auto lock = getLock(); + + if (shared->mmap_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapped file cache has been already created."); + + shared->mmap_cache = std::make_shared(max_cache_size_in_num_entries); +} + +void Context::updateMMappedFileCacheConfiguration(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = getLock(); + + if (!shared->mmap_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapped file cache was not created yet."); + + size_t max_size_in_bytes = config.getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE); + shared->mmap_cache->setMaxSizeInBytes(max_size_in_bytes); +} + +MMappedFileCachePtr Context::getMMappedFileCache() const +{ + auto lock = getLock(); + return shared->mmap_cache; +} + +void Context::clearMMappedFileCache() const +{ + auto lock = getLock(); + + if (shared->mmap_cache) + shared->mmap_cache->clear(); +} + +void Context::setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_rows) +{ + auto lock = getLock(); + + if (shared->query_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache has been already created."); + + shared->query_cache = std::make_shared(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_rows); +} + +void Context::updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = getLock(); + + if (!shared->query_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache was not created yet."); + + size_t max_size_in_bytes = config.getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE); + size_t max_entries = config.getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES); + size_t max_entry_size_in_bytes = config.getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES); + size_t max_entry_size_in_rows = config.getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS); + shared->query_cache->updateConfiguration(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_rows); +} + +QueryCachePtr Context::getQueryCache() const +{ + auto lock = getLock(); + return shared->query_cache; +} + +void Context::clearQueryCache() const +{ + auto lock = getLock(); + + if (shared->query_cache) + shared->query_cache->clear(); +} + +void Context::clearCaches() const +{ + auto lock = getLock(); + + if (!shared->uncompressed_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Uncompressed cache was not created yet."); + shared->uncompressed_cache->clear(); + + if (!shared->mark_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark cache was not created yet."); + shared->mark_cache->clear(); + + if (!shared->index_uncompressed_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache was not created yet."); + shared->index_uncompressed_cache->clear(); + + if (!shared->index_mark_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index mark cache was not created yet."); + shared->index_mark_cache->clear(); + + if (!shared->mmap_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mmapped file cache was not created yet."); + shared->mmap_cache->clear(); + + /// Intentionally not clearing the query cache which is transactionally inconsistent by design. } ThreadPool & Context::getPrefetchThreadpool() const @@ -2361,131 +2526,10 @@ ThreadPool & Context::getPrefetchThreadpool() const return *shared->prefetch_threadpool; } -void Context::setIndexUncompressedCache(size_t max_size_in_bytes) +size_t Context::getPrefetchThreadpoolSize() const { - auto lock = getLock(); - - if (shared->index_uncompressed_cache) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache has been already created."); - - shared->index_uncompressed_cache = std::make_shared(max_size_in_bytes); -} - - -UncompressedCachePtr Context::getIndexUncompressedCache() const -{ - auto lock = getLock(); - return shared->index_uncompressed_cache; -} - - -void Context::dropIndexUncompressedCache() const -{ - auto lock = getLock(); - if (shared->index_uncompressed_cache) - shared->index_uncompressed_cache->reset(); -} - - -void Context::setIndexMarkCache(size_t cache_size_in_bytes) -{ - auto lock = getLock(); - - if (shared->index_mark_cache) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Index mark cache has been already created."); - - shared->index_mark_cache = std::make_shared(cache_size_in_bytes); -} - -MarkCachePtr Context::getIndexMarkCache() const -{ - auto lock = getLock(); - return shared->index_mark_cache; -} - -void Context::dropIndexMarkCache() const -{ - auto lock = getLock(); - if (shared->index_mark_cache) - shared->index_mark_cache->reset(); -} - -void Context::setQueryCache(const Poco::Util::AbstractConfiguration & config) -{ - auto lock = getLock(); - - if (shared->query_cache) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache has been already created."); - - shared->query_cache = std::make_shared(); - shared->query_cache->updateConfiguration(config); -} - -void Context::updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config) -{ - auto lock = getLock(); - if (shared->query_cache) - shared->query_cache->updateConfiguration(config); -} - -QueryCachePtr Context::getQueryCache() const -{ - auto lock = getLock(); - return shared->query_cache; -} - -void Context::dropQueryCache() const -{ - auto lock = getLock(); - if (shared->query_cache) - shared->query_cache->reset(); -} - -void Context::setMMappedFileCache(size_t cache_size_in_num_entries) -{ - auto lock = getLock(); - - if (shared->mmap_cache) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapped file cache has been already created."); - - shared->mmap_cache = std::make_shared(cache_size_in_num_entries); -} - -MMappedFileCachePtr Context::getMMappedFileCache() const -{ - auto lock = getLock(); - return shared->mmap_cache; -} - -void Context::dropMMappedFileCache() const -{ - auto lock = getLock(); - if (shared->mmap_cache) - shared->mmap_cache->reset(); -} - - -void Context::dropCaches() const -{ - auto lock = getLock(); - - if (shared->uncompressed_cache) - shared->uncompressed_cache->reset(); - - if (shared->mark_cache) - shared->mark_cache->reset(); - - if (shared->index_uncompressed_cache) - shared->index_uncompressed_cache->reset(); - - if (shared->index_mark_cache) - shared->index_mark_cache->reset(); - - if (shared->query_cache) - shared->query_cache->reset(); - - if (shared->mmap_cache) - shared->mmap_cache->reset(); + const auto & config = getConfigRef(); + return config.getUInt(".prefetch_threadpool_pool_size", 100); } BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const @@ -2691,11 +2735,16 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const const auto & config = shared->zookeeper_config ? *shared->zookeeper_config : getConfigRef(); if (!shared->zookeeper) shared->zookeeper = std::make_shared(config, zkutil::getZooKeeperConfigName(config), getZooKeeperLog()); - else if (shared->zookeeper->expired()) + else if (shared->zookeeper->hasReachedDeadline()) + shared->zookeeper->finalize("ZooKeeper session has reached its deadline"); + + if (shared->zookeeper->expired()) { Stopwatch watch; LOG_DEBUG(shared->log, "Trying to establish a new connection with ZooKeeper"); shared->zookeeper = shared->zookeeper->startNewSession(); + if (isServerCompletelyStarted()) + shared->zookeeper->setServerCompletelyStarted(); LOG_DEBUG(shared->log, "Establishing a new connection with ZooKeeper took {} ms", watch.elapsedMilliseconds()); } @@ -2898,23 +2947,6 @@ std::map Context::getAuxiliaryZooKeepers() const return shared->auxiliary_zookeepers; } -#if USE_ROCKSDB -MergeTreeMetadataCachePtr Context::getMergeTreeMetadataCache() const -{ - auto cache = tryGetMergeTreeMetadataCache(); - if (!cache) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Merge tree metadata cache is not initialized, please add config merge_tree_metadata_cache in config.xml and restart"); - return cache; -} - -MergeTreeMetadataCachePtr Context::tryGetMergeTreeMetadataCache() const -{ - return shared->merge_tree_metadata_cache; -} -#endif - void Context::resetZooKeeper() const { std::lock_guard lock(shared->zookeeper_mutex); @@ -3061,7 +3093,7 @@ UInt16 Context::getServerPort(const String & port_name) const { auto it = shared->server_ports.find(port_name); if (it == shared->server_ports.end()) - throw Exception(ErrorCodes::BAD_GET, "There is no port named {}", port_name); + throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "There is no port named {}", port_name); else return it->second; } @@ -3070,7 +3102,7 @@ std::shared_ptr Context::getCluster(const std::string & cluster_name) c { if (auto res = tryGetCluster(cluster_name)) return res; - throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name); + throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "Requested cluster '{}' not found", cluster_name); } @@ -3190,7 +3222,12 @@ void Context::setCluster(const String & cluster_name, const std::shared_ptrsystem_logs = std::make_unique(getGlobalContext(), getConfigRef()); } @@ -3199,12 +3236,11 @@ void Context::initializeTraceCollector() shared->initializeTraceCollector(getTraceLog()); } -#if USE_ROCKSDB -void Context::initializeMergeTreeMetadataCache(const String & dir, size_t size) +/// Call after unexpected crash happen. +void Context::handleCrash() const { - shared->merge_tree_metadata_cache = MergeTreeMetadataCache::create(dir, size); + shared->system_logs->handleCrash(); } -#endif bool Context::hasTraceCollector() const { @@ -3385,6 +3421,26 @@ std::shared_ptr Context::getAsynchronousInsertLog() const return shared->system_logs->asynchronous_insert_log; } +std::shared_ptr Context::getBackupLog() const +{ + auto lock = getLock(); + + if (!shared->system_logs) + return {}; + + return shared->system_logs->backup_log; +} + +std::vector Context::getSystemLogs() const +{ + auto lock = getLock(); + + if (!shared->system_logs) + return {}; + + return shared->system_logs->logs; +} + CompressionCodecPtr Context::chooseCompressionCodec(size_t part_size, double part_size_ratio) const { auto lock = getLock(); @@ -3688,6 +3744,36 @@ void Context::reloadConfig() const shared->config_reload_callback(); } +void Context::setStartServersCallback(StartStopServersCallback && callback) +{ + /// Is initialized at server startup, so lock isn't required. Otherwise use mutex. + shared->start_servers_callback = std::move(callback); +} + +void Context::setStopServersCallback(StartStopServersCallback && callback) +{ + /// Is initialized at server startup, so lock isn't required. Otherwise use mutex. + shared->stop_servers_callback = std::move(callback); +} + +void Context::startServers(const ServerType & server_type) const +{ + /// Use mutex if callback may be changed after startup. + if (!shared->start_servers_callback) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't start servers because start_servers_callback is not set."); + + shared->start_servers_callback(server_type); +} + +void Context::stopServers(const ServerType & server_type) const +{ + /// Use mutex if callback may be changed after startup. + if (!shared->stop_servers_callback) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't stop servers because stop_servers_callback is not set."); + + shared->stop_servers_callback(server_type); +} + void Context::shutdown() { @@ -4502,14 +4588,6 @@ ReadSettings Context::getReadSettings() const return res; } -ReadSettings Context::getBackupReadSettings() const -{ - ReadSettings read_settings = getReadSettings(); - read_settings.remote_throttler = getBackupsThrottler(); - read_settings.local_throttler = getBackupsThrottler(); - return read_settings; -} - WriteSettings Context::getWriteSettings() const { WriteSettings res; @@ -4582,4 +4660,9 @@ void Context::setClientProtocolVersion(UInt64 version) client_protocol_version = version; } +const ServerSettings & Context::getServerSettings() const +{ + return shared->server_settings; +} + } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index fa210f04451..b4a5b3d8c85 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1,6 +1,6 @@ #pragma once -#ifndef CLICKHOUSE_PROGRAM_STANDALONE_BUILD +#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include #include @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -85,6 +86,7 @@ struct Progress; struct FileProgress; class Clusters; class QueryCache; +class ISystemLog; class QueryLog; class QueryThreadLog; class QueryViewsLog; @@ -102,6 +104,7 @@ class ProcessorsProfileLog; class FilesystemCacheLog; class FilesystemReadPrefetchesLog; class AsynchronousInsertLog; +class BackupLog; class IAsynchronousReader; struct MergeTreeSettings; struct InitialAllRangesAnnouncement; @@ -134,6 +137,7 @@ using StoragePolicyPtr = std::shared_ptr; using StoragePoliciesMap = std::map; class StoragePolicySelector; using StoragePolicySelectorPtr = std::shared_ptr; +class ServerType; template class MergeTreeBackgroundExecutor; @@ -193,14 +197,13 @@ using TemporaryDataOnDiskScopePtr = std::shared_ptr; class ParallelReplicasReadingCoordinator; using ParallelReplicasReadingCoordinatorPtr = std::shared_ptr; -#if USE_ROCKSDB -class MergeTreeMetadataCache; -using MergeTreeMetadataCachePtr = std::shared_ptr; -#endif - class PreparedSetsCache; using PreparedSetsCachePtr = std::shared_ptr; +class SessionTracker; + +struct ServerSettings; + /// An empty interface for an arbitrary object that may be attached by a shared pointer /// to query context, when using ClickHouse as a library. struct IHostContext @@ -410,6 +413,10 @@ private: /// Temporary data for query execution accounting. TemporaryDataOnDiskScopePtr temp_data_on_disk; + /// Resource classifier for a query, holds smart pointers required for ResourceLink + /// NOTE: all resource links became invalid after `classifier` destruction + mutable ClassifierPtr classifier; + /// Prepared sets that can be shared between different queries. One use case is when is to share prepared sets between /// mutation tasks of one mutation executed against different parts of the same table. PreparedSetsCachePtr prepared_sets_cache; @@ -484,6 +491,7 @@ public: String getUserFilesPath() const; String getDictionariesLibPath() const; String getUserScriptsPath() const; + String getFilesystemCachesPath() const; /// A list of warnings about server configuration to place in `system.warnings` table. Strings getWarnings() const; @@ -494,6 +502,8 @@ public: TemporaryDataOnDiskScopePtr getSharedTempDataOnDisk() const; void setTempDataOnDisk(TemporaryDataOnDiskScopePtr temp_data_on_disk_); + void setFilesystemCachesPath(const String & path); + void setPath(const String & path); void setFlagsPath(const String & path); void setUserFilesPath(const String & path); @@ -530,16 +540,12 @@ public: /// Sets the current user assuming that he/she is already authenticated. /// WARNING: This function doesn't check password! - void setUser(const UUID & user_id_, bool set_current_profiles_ = true, bool set_current_roles_ = true, bool set_current_database_ = true); + void setUser(const UUID & user_id_, const std::optional> & current_roles_ = {}); UserPtr getUser() const; - void setUserID(const UUID & user_id_); std::optional getUserID() const; - String getUserName() const; - void setQuotaKey(String quota_key_); - void setCurrentRoles(const std::vector & current_roles_); void setCurrentRolesDefault(); boost::container::flat_set getCurrentRoles() const; @@ -576,7 +582,7 @@ public: /// Resource management related ResourceManagerPtr getResourceManager() const; - ClassifierPtr getClassifier() const; + ClassifierPtr getWorkloadClassifier() const; /// We have to copy external tables inside executeQuery() to track limits. Therefore, set callback for it. Must set once. void setExternalTablesInitializer(ExternalTablesInitializer && initializer); @@ -734,11 +740,11 @@ public: void applySettingsChanges(const SettingsChanges & changes); /// Checks the constraints. - void checkSettingsConstraints(const SettingsProfileElements & profile_elements) const; - void checkSettingsConstraints(const SettingChange & change) const; - void checkSettingsConstraints(const SettingsChanges & changes) const; - void checkSettingsConstraints(SettingsChanges & changes) const; - void clampToSettingsConstraints(SettingsChanges & changes) const; + void checkSettingsConstraints(const SettingsProfileElements & profile_elements, SettingSource source) const; + void checkSettingsConstraints(const SettingChange & change, SettingSource source) const; + void checkSettingsConstraints(const SettingsChanges & changes, SettingSource source) const; + void checkSettingsConstraints(SettingsChanges & changes, SettingSource source) const; + void clampToSettingsConstraints(SettingsChanges & changes, SettingSource source) const; void checkMergeTreeSettingsConstraints(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const; /// Reset settings to default value @@ -860,6 +866,8 @@ public: OvercommitTracker * getGlobalOvercommitTracker() const; + SessionTracker & getSessionTracker(); + MergeList & getMergeList(); const MergeList & getMergeList() const; @@ -887,11 +895,6 @@ public: UInt64 getClientProtocolVersion() const; void setClientProtocolVersion(UInt64 version); -#if USE_ROCKSDB - MergeTreeMetadataCachePtr getMergeTreeMetadataCache() const; - MergeTreeMetadataCachePtr tryGetMergeTreeMetadataCache() const; -#endif - #if USE_NURAFT std::shared_ptr & getKeeperDispatcher() const; std::shared_ptr & tryGetKeeperDispatcher() const; @@ -913,44 +916,38 @@ public: void setSystemZooKeeperLogAfterInitializationIfNeeded(); - /// Create a cache of uncompressed blocks of specified size. This can be done only once. - void setUncompressedCache(const String & uncompressed_cache_policy, size_t max_size_in_bytes); - std::shared_ptr getUncompressedCache() const; - void dropUncompressedCache() const; + /// --- Caches ------------------------------------------------------------------------------------------ - /// Create a cache of marks of specified size. This can be done only once. - void setMarkCache(const String & mark_cache_policy, size_t cache_size_in_bytes); + void setUncompressedCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio); + void updateUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config); + std::shared_ptr getUncompressedCache() const; + void clearUncompressedCache() const; + + void setMarkCache(const String & cache_policy, size_t max_cache_size_in_bytes, double size_ratio); + void updateMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getMarkCache() const; - void dropMarkCache() const; + void clearMarkCache() const; ThreadPool & getLoadMarksThreadpool() const; - ThreadPool & getPrefetchThreadpool() const; - - /// Note: prefetchThreadpool is different from threadpoolReader - /// in the way that its tasks are - wait for marks to be loaded - /// and make a prefetch by putting a read task to threadpoolReader. - size_t getPrefetchThreadpoolSize() const; - - /// Create a cache of index uncompressed blocks of specified size. This can be done only once. - void setIndexUncompressedCache(size_t max_size_in_bytes); + void setIndexUncompressedCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio); + void updateIndexUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getIndexUncompressedCache() const; - void dropIndexUncompressedCache() const; + void clearIndexUncompressedCache() const; - /// Create a cache of index marks of specified size. This can be done only once. - void setIndexMarkCache(size_t cache_size_in_bytes); + void setIndexMarkCache(const String & cache_policy, size_t max_cache_size_in_bytes, double size_ratio); + void updateIndexMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getIndexMarkCache() const; - void dropIndexMarkCache() const; + void clearIndexMarkCache() const; - /// Create a cache of mapped files to avoid frequent open/map/unmap/close and to reuse from several threads. - void setMMappedFileCache(size_t cache_size_in_num_entries); + void setMMappedFileCache(size_t max_cache_size_in_num_entries); + void updateMMappedFileCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getMMappedFileCache() const; - void dropMMappedFileCache() const; + void clearMMappedFileCache() const; - /// Create a cache of query results for statements which run repeatedly. - void setQueryCache(const Poco::Util::AbstractConfiguration & config); + void setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_rows); void updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getQueryCache() const; - void dropQueryCache() const; + void clearQueryCache() const; /** Clear the caches of the uncompressed blocks and marks. * This is usually done when renaming tables, changing the type of columns, deleting a table. @@ -958,7 +955,16 @@ public: * (when deleting a table - it is necessary, since in its place another can appear) * const - because the change in the cache is not considered significant. */ - void dropCaches() const; + void clearCaches() const; + + /// ----------------------------------------------------------------------------------------------------- + + ThreadPool & getPrefetchThreadpool() const; + + /// Note: prefetchThreadpool is different from threadpoolReader + /// in the way that its tasks are - wait for marks to be loaded + /// and make a prefetch by putting a read task to threadpoolReader. + size_t getPrefetchThreadpoolSize() const; /// Settings for MergeTree background tasks stored in config.xml BackgroundTaskSchedulingSettings getBackgroundProcessingTaskSchedulingSettings() const; @@ -993,9 +999,8 @@ public: /// Call after initialization before using trace collector. void initializeTraceCollector(); -#if USE_ROCKSDB - void initializeMergeTreeMetadataCache(const String & dir, size_t size); -#endif + /// Call after unexpected crash happen. + void handleCrash() const; bool hasTraceCollector() const; @@ -1015,6 +1020,9 @@ public: std::shared_ptr getFilesystemCacheLog() const; std::shared_ptr getFilesystemReadPrefetchesLog() const; std::shared_ptr getAsynchronousInsertLog() const; + std::shared_ptr getBackupLog() const; + + std::vector getSystemLogs() const; /// Returns an object used to log operations with parts if it possible. /// Provide table name to make required checks. @@ -1057,6 +1065,13 @@ public: void setConfigReloadCallback(ConfigReloadCallback && callback); void reloadConfig() const; + using StartStopServersCallback = std::function; + void setStartServersCallback(StartStopServersCallback && callback); + void setStopServersCallback(StartStopServersCallback && callback); + + void startServers(const ServerType & server_type) const; + void stopServers(const ServerType & server_type) const; + void shutdown(); bool isInternalQuery() const { return is_internal_query; } @@ -1155,9 +1170,6 @@ public: /** Get settings for reading from filesystem. */ ReadSettings getReadSettings() const; - /** Get settings for reading from filesystem for BACKUPs. */ - ReadSettings getBackupReadSettings() const; - /** Get settings for writing to filesystem. */ WriteSettings getWriteSettings() const; @@ -1177,11 +1189,15 @@ public: void setPreparedSetsCache(const PreparedSetsCachePtr & cache); PreparedSetsCachePtr getPreparedSetsCache() const; + const ServerSettings & getServerSettings() const; + private: std::unique_lock getLock() const; void initGlobal(); + void setUserID(const UUID & user_id_); + template void checkAccessImpl(const Args &... args) const; diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp index 379c9122cc8..ec693eb7931 100644 --- a/src/Interpreters/CrashLog.cpp +++ b/src/Interpreters/CrashLog.cpp @@ -83,9 +83,6 @@ void collectCrashLog(Int32 signal, UInt64 thread_id, const String & query_id, co stack_trace.toStringEveryLine([&trace_full](std::string_view line) { trace_full.push_back(line); }); CrashLogElement element{static_cast(time / 1000000000), time, signal, thread_id, query_id, trace, trace_full}; - crash_log_owned->add(element); - /// Notify savingThreadFunction to start flushing crash log - /// Crash log is storing in parallel with the signal processing thread. - crash_log_owned->notifyFlush(true); + crash_log_owned->add(std::move(element)); } } diff --git a/src/Interpreters/CrashLog.h b/src/Interpreters/CrashLog.h index 78794574c82..65714295be4 100644 --- a/src/Interpreters/CrashLog.h +++ b/src/Interpreters/CrashLog.h @@ -45,6 +45,11 @@ public: { crash_log = crash_log_; } + + static consteval size_t getDefaultMaxSize() { return 1024; } + static consteval size_t getDefaultReservedSize() { return 1024; } + static consteval size_t getDefaultFlushIntervalMilliseconds() { return 1000; } + static consteval size_t shouldNotifyFlushOnCrash() { return true; } }; } diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 193bb5b6ab0..8be334d6223 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -551,7 +551,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) chassert(!task.completely_processed); /// Setup tracing context on current thread for current DDL - OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ , + OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__, task.entry.tracing_context, this->context->getOpenTelemetrySpanLog()); tracing_ctx_holder.root_span.kind = OpenTelemetry::CONSUMER; @@ -574,7 +574,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) if (create_active_res != Coordination::Error::ZNONODE && create_active_res != Coordination::Error::ZNODEEXISTS) { chassert(Coordination::isHardwareError(create_active_res)); - throw Coordination::Exception(create_active_res, active_node_path); + throw Coordination::Exception::fromPath(create_active_res, active_node_path); } /// Status dirs were not created in enqueueQuery(...) or someone is removing entry diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 0e2e30eefee..2e82ff464d2 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -14,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +25,7 @@ #include #include +#include "Interpreters/Context_fwd.h" #include "config.h" #if USE_MYSQL @@ -35,7 +38,6 @@ # include #endif - namespace CurrentMetrics { extern const Metric TablesToDropQueueSize; @@ -59,6 +61,29 @@ namespace ErrorCodes extern const int UNFINISHED; } +class DatabaseNameHints : public IHints<> +{ +public: + explicit DatabaseNameHints(const DatabaseCatalog & database_catalog_) + : database_catalog(database_catalog_) + { + } + Names getAllRegisteredNames() const override + { + Names result; + auto databases_list = database_catalog.getDatabases(); + for (const auto & database_name : databases_list | boost::adaptors::map_keys) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; + result.emplace_back(database_name); + } + return result; + } +private: + const DatabaseCatalog & database_catalog; +}; + TemporaryTableHolder::TemporaryTableHolder(ContextPtr context_, const TemporaryTableHolder::Creator & creator, const ASTPtr & query) : WithContext(context_->getGlobalContext()) , temporary_tables(DatabaseCatalog::instance().getDatabaseForTemporaryTables().get()) @@ -313,7 +338,14 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( { assert(!db_and_table.first && !db_and_table.second); if (exception) - exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs())); + { + TableNameHints hints(this->tryGetDatabase(table_id.getDatabaseName()), getContext()); + std::vector names = hints.getHints(table_id.getTableName()); + if (names.empty()) + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", table_id.getNameForLogs())); + else + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist. Maybe you meant {}?", table_id.getNameForLogs(), backQuoteIfNeed(names[0]))); + } return {}; } @@ -336,7 +368,6 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( return db_and_table; } - if (table_id.database_name == TEMPORARY_DATABASE) { /// For temporary tables UUIDs are set in Context::resolveStorageID(...). @@ -349,21 +380,69 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( DatabasePtr database; { - std::lock_guard lock{databases_mutex}; - auto it = databases.find(table_id.getDatabaseName()); - if (databases.end() == it) + // Callers assume that this method doesn't throw exceptions, but getDatabaseName() will throw if there is no database part. + // So, fail early and gracefully... + if (!table_id.hasDatabase()) { if (exception) - exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName()))); + exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Empty database name")); return {}; } - database = it->second; + + std::lock_guard lock{databases_mutex}; + auto it = databases.find(table_id.getDatabaseName()); + if (databases.end() != it) + database = it->second; } - auto table = database->tryGetTable(table_id.table_name, context_); - if (!table && exception) - exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs())); + if (!database) + { + if (exception) + { + DatabaseNameHints hints(*this); + std::vector names = hints.getHints(table_id.getDatabaseName()); + if (names.empty()) + { + exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(table_id.getDatabaseName()))); + } + else + { + exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist. Maybe you meant {}?", backQuoteIfNeed(table_id.getDatabaseName()), backQuoteIfNeed(names[0]))); + } + } + return {}; + } + StoragePtr table; + if (exception) + { + try + { + table = database->getTable(table_id.table_name, context_); + } + catch (const Exception & e) + { + exception->emplace(e); + } + } + else + { + table = database->tryGetTable(table_id.table_name, context_); + } + + if (!table && exception && !exception->has_value()) + { + TableNameHints hints(this->tryGetDatabase(table_id.getDatabaseName()), getContext()); + std::vector names = hints.getHints(table_id.getTableName()); + if (names.empty()) + { + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", table_id.getNameForLogs())); + } + else + { + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist. Maybe you meant {}?", table_id.getNameForLogs(), backQuoteIfNeed(names[0]))); + } + } if (!table) database = nullptr; @@ -414,8 +493,26 @@ bool DatabaseCatalog::isPredefinedTable(const StorageID & table_id) const void DatabaseCatalog::assertDatabaseExists(const String & database_name) const { - std::lock_guard lock{databases_mutex}; - assertDatabaseExistsUnlocked(database_name); + DatabasePtr db; + { + std::lock_guard lock{databases_mutex}; + assert(!database_name.empty()); + if (auto it = databases.find(database_name); it != databases.end()) + db = it->second; + } + if (!db) + { + DatabaseNameHints hints(*this); + std::vector names = hints.getHints(database_name); + if (names.empty()) + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(database_name)); + } + else + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist. Maybe you meant {}?", backQuoteIfNeed(database_name), backQuoteIfNeed(names[0])); + } + } } void DatabaseCatalog::assertDatabaseDoesntExist(const String & database_name) const @@ -424,19 +521,11 @@ void DatabaseCatalog::assertDatabaseDoesntExist(const String & database_name) co assertDatabaseDoesntExistUnlocked(database_name); } -void DatabaseCatalog::assertDatabaseExistsUnlocked(const String & database_name) const -{ - assert(!database_name.empty()); - if (databases.end() == databases.find(database_name)) - throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} doesn't exist", backQuoteIfNeed(database_name)); -} - - void DatabaseCatalog::assertDatabaseDoesntExistUnlocked(const String & database_name) const { assert(!database_name.empty()); if (databases.end() != databases.find(database_name)) - throw Exception(ErrorCodes::DATABASE_ALREADY_EXISTS, "Database {} already exists.", backQuoteIfNeed(database_name)); + throw Exception(ErrorCodes::DATABASE_ALREADY_EXISTS, "Database {} already exists", backQuoteIfNeed(database_name)); } void DatabaseCatalog::attachDatabase(const String & database_name, const DatabasePtr & database) @@ -456,18 +545,34 @@ DatabasePtr DatabaseCatalog::detachDatabase(ContextPtr local_context, const Stri { if (database_name == TEMPORARY_DATABASE) throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Cannot detach database with temporary tables."); - + assert(!database_name.empty()); DatabasePtr db; { std::lock_guard lock{databases_mutex}; - assertDatabaseExistsUnlocked(database_name); - db = databases.find(database_name)->second; - UUID db_uuid = db->getUUID(); - if (db_uuid != UUIDHelpers::Nil) - removeUUIDMapping(db_uuid); - databases.erase(database_name); - } + if (auto it = databases.find(database_name); it != databases.end()) + { + db = it->second; + UUID db_uuid = db->getUUID(); + if (db_uuid != UUIDHelpers::Nil) + removeUUIDMapping(db_uuid); + databases.erase(database_name); + + } + } + if (!db) + { + DatabaseNameHints hints(*this); + std::vector names = hints.getHints(database_name); + if (names.empty()) + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(database_name)); + } + else + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist. Maybe you meant {}?", backQuoteIfNeed(database_name), backQuoteIfNeed(names[0])); + } + } if (check_empty) { try @@ -503,7 +608,6 @@ DatabasePtr DatabaseCatalog::detachDatabase(ContextPtr local_context, const Stri if (db_uuid != UUIDHelpers::Nil) removeUUIDMappingFinally(db_uuid); } - return db; } @@ -529,9 +633,28 @@ void DatabaseCatalog::updateDatabaseName(const String & old_name, const String & DatabasePtr DatabaseCatalog::getDatabase(const String & database_name) const { - std::lock_guard lock{databases_mutex}; - assertDatabaseExistsUnlocked(database_name); - return databases.find(database_name)->second; + assert(!database_name.empty()); + DatabasePtr db; + { + std::lock_guard lock{databases_mutex}; + if (auto it = databases.find(database_name); it != databases.end()) + db = it->second; + } + + if (!db) + { + DatabaseNameHints hints(*this); + std::vector names = hints.getHints(database_name); + if (names.empty()) + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(database_name)); + } + else + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist. Maybe you meant {}?", backQuoteIfNeed(database_name), backQuoteIfNeed(names[0])); + } + } + return db; } DatabasePtr DatabaseCatalog::tryGetDatabase(const String & database_name) const @@ -774,19 +897,37 @@ DDLGuardPtr DatabaseCatalog::getDDLGuard(const String & database, const String & /// TSA does not support unique_lock auto db_guard_iter = TSA_SUPPRESS_WARNING_FOR_WRITE(ddl_guards).try_emplace(database).first; DatabaseGuard & db_guard = db_guard_iter->second; - return std::make_unique(db_guard.first, db_guard.second, std::move(lock), table, database); + return std::make_unique(db_guard.table_guards, db_guard.database_ddl_mutex, std::move(lock), table, database); } -std::unique_lock DatabaseCatalog::getExclusiveDDLGuardForDatabase(const String & database) +DatabaseCatalog::DatabaseGuard & DatabaseCatalog::getDatabaseGuard(const String & database) { DDLGuards::iterator db_guard_iter; { std::lock_guard lock(ddl_guards_mutex); db_guard_iter = ddl_guards.try_emplace(database).first; - assert(db_guard_iter->second.first.contains("")); } DatabaseGuard & db_guard = db_guard_iter->second; - return std::unique_lock{db_guard.second}; + return db_guard; +} + +std::unique_lock DatabaseCatalog::getExclusiveDDLGuardForDatabase(const String & database) +{ + return std::unique_lock{getDatabaseGuard(database).database_ddl_mutex}; +} + +std::unique_lock DatabaseCatalog::getLockForDropDatabase(const String & database) +{ + return std::unique_lock{getDatabaseGuard(database).restart_replica_mutex}; +} + +std::optional> DatabaseCatalog::tryGetLockForRestartReplica(const String & database) +{ + DatabaseGuard & db_guard = getDatabaseGuard(database); + std::shared_lock lock(db_guard.restart_replica_mutex, std::defer_lock); + if (lock.try_lock()) + return lock; + return {}; } bool DatabaseCatalog::isDictionaryExist(const StorageID & table_id) const diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 805d7786569..edf1036b438 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -6,7 +6,10 @@ #include #include #include +#include "Common/NamePrompter.h" #include +#include "Storages/IStorage.h" +#include "Databases/IDatabase.h" #include #include @@ -27,6 +30,29 @@ namespace fs = std::filesystem; namespace DB { +class TableNameHints : public IHints<> +{ +public: + TableNameHints(ConstDatabasePtr database_, ContextPtr context_) + : context(context_), + database(database_) + { + } + Names getAllRegisteredNames() const override + { + Names result; + if (database) + { + for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next()) + result.emplace_back(table_it->name()); + } + return result; + } +private: + ContextPtr context; + ConstDatabasePtr database; +}; + class IDatabase; class Exception; class ColumnsDescription; @@ -148,6 +174,11 @@ public: /// Get an object that protects the database from concurrent DDL queries all tables in the database std::unique_lock getExclusiveDDLGuardForDatabase(const String & database); + /// We need special synchronization between DROP/DETACH DATABASE and SYSTEM RESTART REPLICA + /// because IStorage::flushAndPrepareForShutdown cannot be protected by DDLGuard (and a race with IStorage::startup is possible) + std::unique_lock getLockForDropDatabase(const String & database); + std::optional> tryGetLockForRestartReplica(const String & database); + void assertDatabaseExists(const String & database_name) const; void assertDatabaseDoesntExist(const String & database_name) const; @@ -262,7 +293,6 @@ private: static std::unique_ptr database_catalog; explicit DatabaseCatalog(ContextMutablePtr global_context_); - void assertDatabaseExistsUnlocked(const String & database_name) const TSA_REQUIRES(databases_mutex); void assertDatabaseDoesntExistUnlocked(const String & database_name) const TSA_REQUIRES(databases_mutex); void shutdownImpl(); @@ -280,7 +310,7 @@ private: static inline size_t getFirstLevelIdx(const UUID & uuid) { - return uuid.toUnderType().items[0] >> (64 - bits_for_first_level); + return UUIDHelpers::getHighBytes(uuid) >> (64 - bits_for_first_level); } void dropTableDataTask(); @@ -316,7 +346,15 @@ private: /// For the duration of the operation, an element is placed here, and an object is returned, /// which deletes the element in the destructor when counter becomes zero. /// In case the element already exists, waits when query will be executed in other thread. See class DDLGuard below. - using DatabaseGuard = std::pair; + struct DatabaseGuard + { + SharedMutex database_ddl_mutex; + SharedMutex restart_replica_mutex; + + DDLGuard::Map table_guards; + }; + DatabaseGuard & getDatabaseGuard(const String & database); + using DDLGuards = std::map; DDLGuards ddl_guards TSA_GUARDED_BY(ddl_guards_mutex); /// If you capture mutex and ddl_guards_mutex, then you need to grab them strictly in this order. diff --git a/src/Interpreters/DirectJoin.h b/src/Interpreters/DirectJoin.h index e55ac278705..5f664314818 100644 --- a/src/Interpreters/DirectJoin.h +++ b/src/Interpreters/DirectJoin.h @@ -30,6 +30,7 @@ public: std::shared_ptr storage_, const Block & right_sample_block_with_storage_column_names_); + std::string getName() const override { return "DirectKeyValueJoin"; } virtual const TableJoin & getTableJoin() const override { return *table_join; } virtual bool addBlockToJoin(const Block &, bool) override; diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 3d821b60e81..2d1f3ba708a 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -77,6 +77,10 @@ static auto getQueryInterpreter(const ASTSubquery & subquery, ExecuteScalarSubqu subquery_settings.max_result_rows = 1; subquery_settings.extremes = false; subquery_context->setSettings(subquery_settings); + + /// When execute `INSERT INTO t WITH ... SELECT ...`, it may lead to `Unknown columns` + /// exception with this settings enabled(https://github.com/ClickHouse/ClickHouse/issues/52494). + subquery_context->getQueryContext()->setSetting("use_structure_from_insertion_table_in_table_functions", false); if (!data.only_analyze && subquery_context->hasQueryContext()) { /// Save current cached scalars in the context before analyzing the query @@ -98,7 +102,7 @@ static auto getQueryInterpreter(const ASTSubquery & subquery, ExecuteScalarSubqu void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data) { auto hash = subquery.getTreeHash(); - auto scalar_query_hash_str = toString(hash.first) + "_" + toString(hash.second); + const auto scalar_query_hash_str = toString(hash); std::unique_ptr interpreter = nullptr; bool hit = false; @@ -180,7 +184,9 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr PullingAsyncPipelineExecutor executor(io.pipeline); io.pipeline.setProgressCallback(data.getContext()->getProgressCallback()); - while (block.rows() == 0 && executor.pull(block)); + while (block.rows() == 0 && executor.pull(block)) + { + } if (block.rows() == 0) { @@ -212,7 +218,8 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr Block tmp_block; while (tmp_block.rows() == 0 && executor.pull(tmp_block)) - ; + { + } if (tmp_block.rows() != 0) throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 9aee61eb8f0..4cb1ee2a9a9 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1130,9 +1130,17 @@ JoinPtr SelectQueryExpressionAnalyzer::makeJoin( if (auto storage = analyzed_join->getStorageJoin()) { + auto joined_block_actions = analyzed_join->createJoinedBlockActions(getContext()); + NamesWithAliases required_columns_with_aliases = analyzed_join->getRequiredColumns( + Block(joined_block_actions->getResultColumns()), joined_block_actions->getRequiredColumns().getNames()); + + Names original_right_column_names; + for (auto & pr : required_columns_with_aliases) + original_right_column_names.push_back(pr.first); + auto right_columns = storage->getRightSampleBlock().getColumnsWithTypeAndName(); std::tie(left_convert_actions, right_convert_actions) = analyzed_join->createConvertingActions(left_columns, right_columns); - return storage->getJoinLocked(analyzed_join, getContext()); + return storage->getJoinLocked(analyzed_join, getContext(), original_right_column_names); } joined_plan = buildJoinedPlan(getContext(), join_element, *analyzed_join, query_options); @@ -1505,14 +1513,16 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai for (const auto & child : select_query->select()->children) select.insert(child->getAliasOrColumnName()); + NameSet required_by_interpolate; /// collect columns required for interpolate expressions - /// interpolate expression can use any available column - auto find_columns = [&step, &select](IAST * function) + auto find_columns = [&step, &select, &required_by_interpolate](IAST * function) { - auto f_impl = [&step, &select](IAST * fn, auto fi) + auto f_impl = [&step, &select, &required_by_interpolate](IAST * fn, auto fi) { if (auto * ident = fn->as()) { + required_by_interpolate.insert(ident->getColumnName()); /// exclude columns from select expression - they are already available if (!select.contains(ident->getColumnName())) step.addRequiredOutput(ident->getColumnName()); @@ -1528,6 +1538,14 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai for (const auto & interpolate : interpolate_list->children) find_columns(interpolate->as()->expr.get()); + + if (!required_result_columns.empty()) + { + NameSet required_result_columns_set(required_result_columns.begin(), required_result_columns.end()); + for (const auto & name : required_by_interpolate) + if (!required_result_columns_set.contains(name)) + required_result_columns.push_back(name); + } } if (optimize_read_in_order) diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index 17f0fda71ec..b660db064d1 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -40,6 +40,8 @@ NamesAndTypesList FilesystemCacheLogElement::getNamesAndTypes() {"source_file_path", std::make_shared()}, {"file_segment_range", std::make_shared(types)}, {"total_requested_range", std::make_shared(types)}, + {"key", std::make_shared()}, + {"offset", std::make_shared()}, {"size", std::make_shared()}, {"read_type", std::make_shared()}, {"read_from_cache_attempted", std::make_shared()}, @@ -60,6 +62,8 @@ void FilesystemCacheLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(source_file_path); columns[i++]->insert(Tuple{file_segment_range.first, file_segment_range.second}); columns[i++]->insert(Tuple{requested_range.first, requested_range.second}); + columns[i++]->insert(file_segment_key); + columns[i++]->insert(file_segment_offset); columns[i++]->insert(file_segment_size); columns[i++]->insert(typeToString(cache_type)); columns[i++]->insert(read_from_cache_attempted); diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index 1b22d561c51..41a7c8e0fe6 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -11,16 +11,7 @@ namespace DB { -/// -/// -------- Column --------- Type ------ -/// | event_date | DateTime | -/// | event_time | UInt64 | -/// | query_id | String | -/// | remote_file_path | String | -/// | segment_range | Tuple | -/// | read_type | String | -/// ------------------------------------- -/// + struct FilesystemCacheLogElement { enum class CacheType @@ -39,10 +30,12 @@ struct FilesystemCacheLogElement std::pair file_segment_range{}; std::pair requested_range{}; CacheType cache_type{}; - size_t file_segment_size; + std::string file_segment_key{}; + size_t file_segment_offset = 0; + size_t file_segment_size = 0; bool read_from_cache_attempted; - String read_buffer_id; - std::shared_ptr profile_counters; + String read_buffer_id{}; + std::shared_ptr profile_counters = nullptr; static std::string name() { return "FilesystemCacheLog"; } diff --git a/src/Interpreters/FullSortingMergeJoin.h b/src/Interpreters/FullSortingMergeJoin.h index a6b53a51c04..3fc9f8920ed 100644 --- a/src/Interpreters/FullSortingMergeJoin.h +++ b/src/Interpreters/FullSortingMergeJoin.h @@ -28,6 +28,7 @@ public: LOG_TRACE(&Poco::Logger::get("FullSortingMergeJoin"), "Will use full sorting merge join"); } + std::string getName() const override { return "FullSortingMergeJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } bool addBlockToJoin(const Block & /* block */, bool /* check_limits */) override diff --git a/src/Interpreters/GetAggregatesVisitor.h b/src/Interpreters/GetAggregatesVisitor.h index 7bf6591af69..863c4f81a1b 100644 --- a/src/Interpreters/GetAggregatesVisitor.h +++ b/src/Interpreters/GetAggregatesVisitor.h @@ -23,11 +23,9 @@ public: { const char * assert_no_aggregates = nullptr; const char * assert_no_windows = nullptr; - // Explicit empty initializers are needed to make designated initializers - // work on GCC 10. std::unordered_set uniq_names {}; - ASTs aggregates; - ASTs window_functions; + ASTs aggregates{}; + ASTs window_functions{}; }; static bool needChildVisit(const ASTPtr & node, const ASTPtr & child) diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 5b633fee9b6..08378c3158b 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -115,7 +115,7 @@ public: if (alias.empty()) { auto hash = subquery_or_table_name->getTreeHash(); - external_table_name = fmt::format("_data_{}_{}", hash.first, hash.second); + external_table_name = fmt::format("_data_{}", toString(hash)); } else external_table_name = alias; diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 5d72cf20740..89ea3a326cc 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -1,15 +1,11 @@ #include #include #include -#include #include #include #include -#include -#include -#include #include #include @@ -18,6 +14,9 @@ #include +#include + + namespace CurrentMetrics { extern const Metric TemporaryFilesForJoin; diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index ce519892b0e..44949440467 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -60,6 +60,7 @@ public: ~GraceHashJoin() override; + std::string getName() const override { return "GraceHashJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } void initialize(const Block & sample_block) override; diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index be08b7cbe1e..db1789bf05f 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -11,9 +11,12 @@ #include #include #include +#include + #include #include +#include #include #include @@ -28,6 +31,9 @@ #include #include +#include + + namespace DB { diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 56dea98c1f1..9f55945816c 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -151,6 +151,7 @@ public: ~HashJoin() override; + std::string getName() const override { return "HashJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } /** Add block of data from right hand of JOIN to the map. diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h index 97b119bd795..493a5dd2126 100644 --- a/src/Interpreters/IJoin.h +++ b/src/Interpreters/IJoin.h @@ -48,6 +48,8 @@ class IJoin public: virtual ~IJoin() = default; + virtual std::string getName() const = 0; + virtual const TableJoin & getTableJoin() const = 0; /// Add block of data from right hand of JOIN. diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index e82415f1aca..a7445f89245 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -132,6 +132,21 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot MATERIALIZE TTL as there is no TTL set for table {}", table->getStorageID().getNameForLogs()); + if (mut_command->type == MutationCommand::UPDATE || mut_command->type == MutationCommand::DELETE) + { + /// TODO: add a check for result query size. + auto rewritten_command_ast = replaceNonDeterministicToScalars(*command_ast, getContext()); + if (rewritten_command_ast) + { + auto * new_alter_command = rewritten_command_ast->as(); + mut_command = MutationCommand::parse(new_alter_command); + if (!mut_command) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Alter command '{}' is rewritten to invalid command '{}'", + queryToString(*command_ast), queryToString(*rewritten_command_ast)); + } + } + mutation_commands.emplace_back(std::move(*mut_command)); } else @@ -141,10 +156,10 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) if (typeid_cast(database.get())) { int command_types_count = !mutation_commands.empty() + !partition_commands.empty() + !alter_commands.empty(); - bool mixed_settings_amd_metadata_alter = alter_commands.hasSettingsAlterCommand() && !alter_commands.isSettingsAlter(); + bool mixed_settings_amd_metadata_alter = alter_commands.hasNonReplicatedAlterCommand() && !alter_commands.areNonReplicatedAlterCommands(); if (1 < command_types_count || mixed_settings_amd_metadata_alter) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "For Replicated databases it's not allowed " - "to execute ALTERs of different types in single query"); + "to execute ALTERs of different types (replicated and non replicated) in single query"); } if (mutation_commands.hasNonEmptyMutationCommands()) diff --git a/src/Interpreters/InterpreterBackupQuery.cpp b/src/Interpreters/InterpreterBackupQuery.cpp index e238286a33c..be5fcedce27 100644 --- a/src/Interpreters/InterpreterBackupQuery.cpp +++ b/src/Interpreters/InterpreterBackupQuery.cpp @@ -17,7 +17,7 @@ namespace DB namespace { - Block getResultRow(const BackupsWorker::Info & info) + Block getResultRow(const BackupOperationInfo & info) { auto column_id = ColumnString::create(); auto column_status = ColumnInt8::create(); diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index d56b5029e41..3e87f4fe440 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -18,7 +19,8 @@ namespace ErrorCodes BlockIO InterpreterCreateFunctionQuery::execute() { - ASTCreateFunctionQuery & create_function_query = query_ptr->as(); + const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + ASTCreateFunctionQuery & create_function_query = updated_query_ptr->as(); AccessRightsElements access_rights_elements; access_rights_elements.emplace_back(AccessType::CREATE_FUNCTION); @@ -35,7 +37,7 @@ BlockIO InterpreterCreateFunctionQuery::execute() DDLQueryOnClusterParams params; params.access_to_check = std::move(access_rights_elements); - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query_ptr, current_context, params); } current_context->checkAccess(access_rights_elements); @@ -44,7 +46,7 @@ BlockIO InterpreterCreateFunctionQuery::execute() bool throw_if_exists = !create_function_query.if_not_exists && !create_function_query.or_replace; bool replace_if_exists = create_function_query.or_replace; - UserDefinedSQLFunctionFactory::instance().registerFunction(current_context, function_name, query_ptr, throw_if_exists, replace_if_exists); + UserDefinedSQLFunctionFactory::instance().registerFunction(current_context, function_name, updated_query_ptr, throw_if_exists, replace_if_exists); return {}; } diff --git a/src/Interpreters/InterpreterCreateIndexQuery.cpp b/src/Interpreters/InterpreterCreateIndexQuery.cpp index 752bc6200ce..3b47a002e50 100644 --- a/src/Interpreters/InterpreterCreateIndexQuery.cpp +++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp @@ -16,6 +16,7 @@ namespace ErrorCodes { extern const int TABLE_IS_READ_ONLY; extern const int INCORRECT_QUERY; + extern const int NOT_IMPLEMENTED; } @@ -24,6 +25,15 @@ BlockIO InterpreterCreateIndexQuery::execute() auto current_context = getContext(); const auto & create_index = query_ptr->as(); + if (create_index.unique) + { + if (!current_context->getSettingsRef().create_index_ignore_unique) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CREATE UNIQUE INDEX is not supported." + " SET create_index_ignore_unique=1 to ignore this UNIQUE keyword."); + } + + } // Noop if allow_create_index_without_type = true. throw otherwise if (!create_index.index_decl->as()->type) { diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp index bac59998062..3b0fba5fd9f 100644 --- a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -1,5 +1,4 @@ #include - #include #include #include diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 745dda34828..1cd34c2a0f6 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -71,7 +72,6 @@ #include #include -#include #include #include @@ -219,10 +219,12 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) else { bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; - if (create.uuid != UUIDHelpers::Nil && !is_on_cluster) + if (create.uuid != UUIDHelpers::Nil && !is_on_cluster && !internal) throw Exception(ErrorCodes::INCORRECT_QUERY, "Ordinary database engine does not support UUID"); - /// Ignore UUID if it's ON CLUSTER query + /// The database doesn't support UUID so we'll ignore it. The UUID could be set here because of either + /// a) the initiator of `ON CLUSTER` query generated it to ensure the same UUIDs are used on different hosts; or + /// b) `RESTORE from backup` query generated it to ensure the same UUIDs are used on different hosts. create.uuid = UUIDHelpers::Nil; metadata_path = metadata_path / "metadata" / database_name_escaped; } @@ -704,6 +706,9 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti if (index_desc.type == "annoy" && !settings.allow_experimental_annoy_index) throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index is disabled. Turn on allow_experimental_annoy_index"); + if (index_desc.type == "usearch" && !settings.allow_experimental_usearch_index) + throw Exception(ErrorCodes::INCORRECT_QUERY, "USearch index is disabled. Turn on allow_experimental_usearch_index"); + properties.indices.push_back(index_desc); } if (create.columns_list->projections) @@ -764,7 +769,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti /// Table function without columns list. auto table_function_ast = create.as_table_function->ptr(); auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext()); - properties.columns = table_function->getActualTableStructure(getContext()); + properties.columns = table_function->getActualTableStructure(getContext(), /*is_insert_query*/ true); } else if (create.is_dictionary) { @@ -980,19 +985,6 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); } -static void generateUUIDForTable(ASTCreateQuery & create) -{ - if (create.uuid == UUIDHelpers::Nil) - create.uuid = UUIDHelpers::generateV4(); - - /// If destination table (to_table_id) is not specified for materialized view, - /// then MV will create inner table. We should generate UUID of inner table here, - /// so it will be the same on all hosts if query in ON CLUSTER or database engine is Replicated. - bool need_uuid_for_inner_table = !create.attach && create.is_materialized_view && !create.to_table_id; - if (need_uuid_for_inner_table && create.to_inner_uuid == UUIDHelpers::Nil) - create.to_inner_uuid = UUIDHelpers::generateV4(); -} - void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const DatabasePtr & database) const { const auto * kind = create.is_dictionary ? "Dictionary" : "Table"; @@ -1025,17 +1017,26 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data kind_upper, create.table); } - generateUUIDForTable(create); + create.generateRandomUUID(); } else { bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; bool has_uuid = create.uuid != UUIDHelpers::Nil || create.to_inner_uuid != UUIDHelpers::Nil; - if (has_uuid && !is_on_cluster) + if (has_uuid && !is_on_cluster && !internal) + { + /// We don't show the following error message either + /// 1) if it's a secondary query (an initiator of a CREATE TABLE ON CLUSTER query + /// doesn't know the exact database engines on replicas and generates an UUID, and then the replicas are free to ignore that UUID); or + /// 2) if it's an internal query (for example RESTORE uses internal queries to create tables and it generates an UUID + /// before creating a table to be possibly ignored if the database engine doesn't need it). throw Exception(ErrorCodes::INCORRECT_QUERY, "{} UUID specified, but engine of database {} is not Atomic", kind, create.getDatabase()); + } - /// Ignore UUID if it's ON CLUSTER query + /// The database doesn't support UUID so we'll ignore it. The UUID could be set here because of either + /// a) the initiator of `ON CLUSTER` query generated it to ensure the same UUIDs are used on different hosts; or + /// b) `RESTORE from backup` query generated it to ensure the same UUIDs are used on different hosts. create.uuid = UUIDHelpers::Nil; create.to_inner_uuid = UUIDHelpers::Nil; } @@ -1329,10 +1330,32 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, } data_path = database->getTableDataPath(create); + auto full_data_path = fs::path{getContext()->getPath()} / data_path; - if (!create.attach && !data_path.empty() && fs::exists(fs::path{getContext()->getPath()} / data_path)) - throw Exception(storage_already_exists_error_code, - "Directory for {} data {} already exists", Poco::toLower(storage_name), String(data_path)); + if (!create.attach && !data_path.empty() && fs::exists(full_data_path)) + { + if (getContext()->getZooKeeperMetadataTransaction() && + !getContext()->getZooKeeperMetadataTransaction()->isInitialQuery() && + !DatabaseCatalog::instance().hasUUIDMapping(create.uuid) && + Context::getGlobalContextInstance()->isServerCompletelyStarted() && + Context::getGlobalContextInstance()->getConfigRef().getBool("allow_moving_table_directory_to_trash", false)) + { + /// This is a secondary query from a Replicated database. It cannot be retried with another UUID, we must execute it as is. + /// We don't have a table with this UUID (and all metadata is loaded), + /// so the existing directory probably contains some leftovers from previous unsuccessful attempts to create the table + + fs::path trash_path = fs::path{getContext()->getPath()} / "trash" / data_path / getHexUIntLowercase(thread_local_rng()); + LOG_WARNING(&Poco::Logger::get("InterpreterCreateQuery"), "Directory for {} data {} already exists. Will move it to {}", + Poco::toLower(storage_name), String(data_path), trash_path); + fs::create_directories(trash_path.parent_path()); + renameNoReplace(full_data_path, trash_path); + } + else + { + throw Exception(storage_already_exists_error_code, + "Directory for {} data {} already exists", Poco::toLower(storage_name), String(data_path)); + } + } bool from_path = create.attach_from_path.has_value(); String actual_data_path = data_path; @@ -1594,7 +1617,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, Cont /// For CREATE query generate UUID on initiator, so it will be the same on all hosts. /// It will be ignored if database does not support UUIDs. - generateUUIDForTable(create); + create.generateRandomUUID(); /// For cross-replication cluster we cannot use UUID in replica path. String cluster_name_expanded = local_context->getMacros()->expand(cluster_name); diff --git a/src/Interpreters/InterpreterDescribeCacheQuery.cpp b/src/Interpreters/InterpreterDescribeCacheQuery.cpp index 7822ecdb8be..875c133bb76 100644 --- a/src/Interpreters/InterpreterDescribeCacheQuery.cpp +++ b/src/Interpreters/InterpreterDescribeCacheQuery.cpp @@ -25,7 +25,6 @@ static Block getSampleBlock() ColumnWithTypeAndName{std::make_shared(), "current_size"}, ColumnWithTypeAndName{std::make_shared(), "current_elements"}, ColumnWithTypeAndName{std::make_shared(), "path"}, - ColumnWithTypeAndName{std::make_shared>(), "delayed_cleanup_interval_ms"}, ColumnWithTypeAndName{std::make_shared>(), "background_download_threads"}, ColumnWithTypeAndName{std::make_shared>(), "enable_bypass_cache_with_threshold"}, }; @@ -54,9 +53,8 @@ BlockIO InterpreterDescribeCacheQuery::execute() res_columns[i++]->insert(cache->getUsedCacheSize()); res_columns[i++]->insert(cache->getFileSegmentsNum()); res_columns[i++]->insert(cache->getBasePath()); - res_columns[i++]->insert(settings.delayed_cleanup_interval_ms); res_columns[i++]->insert(settings.background_download_threads); - res_columns[i++]->insert(settings.enable_bypass_cache_with_threashold); + res_columns[i++]->insert(settings.enable_bypass_cache_with_threshold); BlockIO res; size_t num_rows = res_columns[0]->size(); diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 0b9eea86b46..26d9e5254f3 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -96,7 +96,7 @@ BlockIO InterpreterDescribeQuery::execute() else if (table_expression.table_function) { TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression.table_function, getContext()); - auto table_function_column_descriptions = table_function_ptr->getActualTableStructure(getContext()); + auto table_function_column_descriptions = table_function_ptr->getActualTableStructure(getContext(), /*is_insert_query*/ true); for (const auto & table_function_column_description : table_function_column_descriptions) columns.emplace_back(table_function_column_description); } diff --git a/src/Interpreters/InterpreterDropFunctionQuery.cpp b/src/Interpreters/InterpreterDropFunctionQuery.cpp index df81ae661c7..af60d9c5df7 100644 --- a/src/Interpreters/InterpreterDropFunctionQuery.cpp +++ b/src/Interpreters/InterpreterDropFunctionQuery.cpp @@ -1,12 +1,13 @@ -#include +#include #include #include #include #include #include -#include #include +#include +#include namespace DB @@ -20,7 +21,9 @@ namespace ErrorCodes BlockIO InterpreterDropFunctionQuery::execute() { FunctionNameNormalizer().visit(query_ptr.get()); - ASTDropFunctionQuery & drop_function_query = query_ptr->as(); + + const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + ASTDropFunctionQuery & drop_function_query = updated_query_ptr->as(); AccessRightsElements access_rights_elements; access_rights_elements.emplace_back(AccessType::DROP_FUNCTION); @@ -34,7 +37,7 @@ BlockIO InterpreterDropFunctionQuery::execute() DDLQueryOnClusterParams params; params.access_to_check = std::move(access_rights_elements); - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query_ptr, current_context, params); } current_context->checkAccess(access_rights_elements); diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp index cc3444bb4df..fe49b1cfd7c 100644 --- a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -22,11 +22,7 @@ BlockIO InterpreterDropNamedCollectionQuery::execute() return executeDDLQueryOnCluster(query_ptr, current_context, params); } - if (query.if_exists) - NamedCollectionUtils::removeIfExistsFromSQL(query.collection_name, current_context); - else - NamedCollectionUtils::removeFromSQL(query.collection_name, current_context); - + NamedCollectionUtils::removeFromSQL(query, current_context); return {}; } diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 616cf80a446..9dd28cbeaac 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -208,7 +208,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue if (table->isStaticStorage()) throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only"); - table->checkTableCanBeDropped(); + table->checkTableCanBeDropped(context_); TableExclusiveLockHolder table_excl_lock; /// We don't need any lock for ReplicatedMergeTree and for simple MergeTree @@ -228,10 +228,10 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue { /// If DROP DICTIONARY query is not used, check if Dictionary can be dropped with DROP TABLE query if (!query.is_dictionary) - table->checkTableCanBeDropped(); + table->checkTableCanBeDropped(context_); } else - table->checkTableCanBeDropped(); + table->checkTableCanBeDropped(context_); /// Check dependencies before shutting table down bool check_ref_deps = getContext()->getSettingsRef().check_referential_table_dependencies; @@ -247,10 +247,10 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue DatabaseCatalog::instance().removeDependencies(table_id, check_ref_deps, check_loading_deps, is_drop_or_detach_database); database->dropTable(context_, table_id.table_name, query.sync); - /// We have to drop mmapio cache when dropping table from Ordinary database + /// We have to clear mmapio cache when dropping table from Ordinary database /// to avoid reading old data if new table with the same name is created if (database->getUUID() == UUIDHelpers::Nil) - context_->dropMMappedFileCache(); + context_->clearMMappedFileCache(); } db = database; @@ -329,13 +329,12 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, database = tryGetDatabase(database_name, query.if_exists); if (database) { - if (query.kind == ASTDropQuery::Kind::Truncate) - { - throw Exception(ErrorCodes::SYNTAX_ERROR, "Unable to truncate database"); - } - else if (query.kind == ASTDropQuery::Kind::Detach || query.kind == ASTDropQuery::Kind::Drop) + if (query.kind == ASTDropQuery::Kind::Detach || query.kind == ASTDropQuery::Kind::Drop + || query.kind == ASTDropQuery::Kind::Truncate) { bool drop = query.kind == ASTDropQuery::Kind::Drop; + bool truncate = query.kind == ASTDropQuery::Kind::Truncate; + getContext()->checkAccess(AccessType::DROP_DATABASE, database_name); if (query.kind == ASTDropQuery::Kind::Detach && query.permanently) @@ -346,8 +345,15 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, if (database->shouldBeEmptyOnDetach()) { + /// Cancel restarting replicas in that database, wait for remaining RESTART queries to finish. + /// So it will not startup tables concurrently with the flushAndPrepareForShutdown call below. + auto restart_replica_lock = DatabaseCatalog::instance().getLockForDropDatabase(database_name); + ASTDropQuery query_for_table; query_for_table.kind = query.kind; + // For truncate operation on database, drop the tables + if (truncate) + query_for_table.kind = ASTDropQuery::Kind::Drop; query_for_table.if_exists = true; query_for_table.setDatabase(database_name); query_for_table.sync = query.sync; @@ -361,8 +367,9 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, std::vector> tables_to_drop; for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next()) { - iterator->table()->flush(); - tables_to_drop.push_back({iterator->name(), iterator->table()->isDictionary()}); + auto table_ptr = iterator->table(); + table_ptr->flushAndPrepareForShutdown(); + tables_to_drop.push_back({iterator->name(), table_ptr->isDictionary()}); } for (const auto & table : tables_to_drop) @@ -375,8 +382,8 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, uuids_to_wait.push_back(table_to_wait); } } - - if (!drop && query.sync) + // only if operation is DETACH + if ((!drop || !truncate) && query.sync) { /// Avoid "some tables are still in use" when sync mode is enabled for (const auto & table_uuid : uuids_to_wait) @@ -385,12 +392,13 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, /// Protects from concurrent CREATE TABLE queries auto db_guard = DatabaseCatalog::instance().getExclusiveDDLGuardForDatabase(database_name); - - if (!drop) + // only if operation is DETACH + if (!drop || !truncate) database->assertCanBeDetached(true); - /// DETACH or DROP database itself - DatabaseCatalog::instance().detachDatabase(getContext(), database_name, drop, database->shouldBeEmptyOnDetach()); + /// DETACH or DROP database itself. If TRUNCATE skip dropping/erasing the database. + if (!truncate) + DatabaseCatalog::instance().detachDatabase(getContext(), database_name, drop, database->shouldBeEmptyOnDetach()); } } @@ -432,8 +440,9 @@ AccessRightsElements InterpreterDropQuery::getRequiredAccessForDDLOnCluster() co } void InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind kind, ContextPtr global_context, ContextPtr current_context, - const StorageID & target_table_id, bool sync, bool ignore_sync_setting) + const StorageID & target_table_id, bool sync, bool ignore_sync_setting, bool need_ddl_guard) { + auto ddl_guard = (need_ddl_guard ? DatabaseCatalog::instance().getDDLGuard(target_table_id.database_name, target_table_id.table_name) : nullptr); if (DatabaseCatalog::instance().tryGetTable(target_table_id, current_context)) { /// We create and execute `drop` query for internal table. diff --git a/src/Interpreters/InterpreterDropQuery.h b/src/Interpreters/InterpreterDropQuery.h index af7a4ddef25..7ae544a7356 100644 --- a/src/Interpreters/InterpreterDropQuery.h +++ b/src/Interpreters/InterpreterDropQuery.h @@ -25,7 +25,7 @@ public: BlockIO execute() override; static void executeDropQuery(ASTDropQuery::Kind kind, ContextPtr global_context, ContextPtr current_context, - const StorageID & target_table_id, bool sync, bool ignore_sync_setting = false); + const StorageID & target_table_id, bool sync, bool ignore_sync_setting = false, bool need_ddl_guard = false); bool supportsTransactions() const override; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 3a381cd8dab..39cc4df5c2d 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -541,13 +541,13 @@ QueryPipeline InterpreterExplainQuery::executeImpl() InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions()); interpreter.buildQueryPlan(plan); context = interpreter.getContext(); - // collect the selected marks, rows, parts during build query pipeline. - plan.buildQueryPipeline( + // Collect the selected marks, rows, parts during build query pipeline. + // Hold on to the returned QueryPipelineBuilderPtr because `plan` may have pointers into + // it (through QueryPlanResourceHolder). + auto builder = plan.buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); - if (settings.optimize) - plan.optimize(QueryPlanOptimizationSettings::fromContext(context)); plan.explainEstimate(res_columns); insert_buf = false; break; diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 9cd1f2a251c..de3a3d68d39 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -79,6 +81,7 @@ #include #include #include +#include #include #include #include @@ -96,6 +99,7 @@ #include #include #include +#include #include #include #include @@ -201,6 +205,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); @@ -314,6 +322,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 078499fb013..1f6f2336ab8 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -616,6 +616,7 @@ BlockIO InterpreterInsertQuery::execute() presink_chains.at(0).appendChain(std::move(sink_chains.at(0))); res.pipeline = QueryPipeline(std::move(presink_chains[0])); res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); + res.pipeline.setConcurrencyControl(settings.use_concurrency_control); if (query.hasInlinedData() && !async_insert) { diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 3330159aff5..590b7fe37b8 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -24,7 +24,6 @@ #include #include #include -#include #include @@ -296,7 +295,7 @@ BlockIO InterpreterKillQueryQuery::execute() if (res_columns[0]->empty() && access_denied) throw Exception(ErrorCodes::ACCESS_DENIED, "Not allowed to kill mutation. " - "To execute this query it's necessary to have the grant {}", required_access_rights.toString()); + "To execute this query, it's necessary to have the grant {}", required_access_rights.toString()); res_io.pipeline = QueryPipeline(Pipe(std::make_shared(header.cloneWithColumns(std::move(res_columns))))); @@ -360,7 +359,7 @@ BlockIO InterpreterKillQueryQuery::execute() if (res_columns[0]->empty() && access_denied) throw Exception(ErrorCodes::ACCESS_DENIED, "Not allowed to kill move partition. " - "To execute this query it's necessary to have the grant {}", required_access_rights.toString()); + "To execute this query, it's necessary to have the grant {}", required_access_rights.toString()); res_io.pipeline = QueryPipeline(Pipe(std::make_shared(header.cloneWithColumns(std::move(res_columns))))); diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index 75d43b541e1..ae79b3f932e 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -193,7 +193,7 @@ AccessRightsElements InterpreterRenameQuery::getRequiredAccess(InterpreterRename required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.to.getDatabase(), elem.to.getTable()); if (rename.exchange) { - required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT , elem.from.getDatabase(), elem.from.getTable()); + required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.from.getDatabase(), elem.from.getTable()); required_access.emplace_back(AccessType::SELECT | AccessType::DROP_TABLE, elem.to.getDatabase(), elem.to.getTable()); } } diff --git a/src/Interpreters/InterpreterRenameQuery.h b/src/Interpreters/InterpreterRenameQuery.h index 25c707d9962..79a6c0ddef4 100644 --- a/src/Interpreters/InterpreterRenameQuery.h +++ b/src/Interpreters/InterpreterRenameQuery.h @@ -10,7 +10,7 @@ namespace DB class AccessRightsElements; class DDLGuard; -/// To avoid deadlocks, we must acquire locks for tables in same order in any different RENAMES. +/// To avoid deadlocks, we must acquire locks for tables in same order in any different RENAMEs. struct UniqueTableName { String database_name; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d07a6521544..cc0f2bf7283 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -68,7 +69,6 @@ #include #include #include -#include #include #include @@ -84,12 +84,9 @@ #include #include #include -#include #include #include #include -#include -#include #include #include #include @@ -97,7 +94,6 @@ #include #include -#include "config_version.h" namespace ProfileEvents { @@ -299,7 +295,7 @@ void checkAccessRightsForSelect( } throw Exception( ErrorCodes::ACCESS_DENIED, - "{}: Not enough privileges. To execute this query it's necessary to have grant SELECT for at least one column on {}", + "{}: Not enough privileges. To execute this query, it's necessary to have the grant SELECT for at least one column on {}", context->getUserName(), table_id.getFullTableName()); } @@ -426,6 +422,12 @@ InterpreterSelectQuery::InterpreterSelectQuery( RewriteCountDistinctFunctionVisitor(data_rewrite_countdistinct).visit(query_ptr); } + if (settings.optimize_uniq_to_count) + { + RewriteUniqToCountMatcher::Data data_rewrite_uniq_count; + RewriteUniqToCountVisitor(data_rewrite_uniq_count).visit(query_ptr); + } + JoinedTables joined_tables(getSubqueryContext(context), getSelectQuery(), options.with_all_cols, options_.is_create_parameterized_view); bool got_storage_from_query = false; @@ -468,12 +470,6 @@ InterpreterSelectQuery::InterpreterSelectQuery( } } - /// Set skip_unavailable_shards to true only if it wasn't disabled explicitly - if (settings.allow_experimental_parallel_reading_from_replicas > 0 && !settings.skip_unavailable_shards && !settings.isChanged("skip_unavailable_shards")) - { - context->setSetting("skip_unavailable_shards", true); - } - /// Check support for JOIN for parallel replicas with custom key if (joined_tables.tablesCount() > 1 && !settings.parallel_replicas_custom_key.value.empty()) { @@ -610,27 +606,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( { /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. ASTPtr view_table; - NameToNameMap parameter_types; if (view) { query_info.is_parameterized_view = view->isParameterizedView(); - /// We need to fetch the parameters set for SELECT ... FROM parameterized_view() before the query is replaced. - /// replaceWithSubquery replaces the function child and adds the subquery in its place. - /// the parameters are children of function child, if function (which corresponds to parametrised view and has - /// parameters in its arguments: `parametrised_view()`) is replaced the parameters are also gone from tree - /// So we need to get the parameters before they are removed from the tree - /// and after query is replaced, we use these parameters to substitute in the parameterized view query - if (query_info.is_parameterized_view) - { - query_info.parameterized_view_values = analyzeFunctionParamValues(query_ptr); - parameter_types = view->getParameterTypes(); - } view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); - if (query_info.is_parameterized_view) - { - view->replaceQueryParametersIfParametrizedView(query_ptr, query_info.parameterized_view_values); - } - } syntax_analyzer_result = TreeRewriter(context).analyzeSelect( @@ -639,10 +618,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( options, joined_tables.tablesWithColumns(), required_result_column_names, - table_join, - query_info.is_parameterized_view, - query_info.parameterized_view_values, - parameter_types); + table_join); query_info.syntax_analyzer_result = syntax_analyzer_result; @@ -793,7 +769,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( query_info.filter_asts.push_back(parallel_replicas_custom_filter_ast); } - source_header = storage_snapshot->getSampleBlockForColumns(required_columns, query_info.parameterized_view_values); + source_header = storage_snapshot->getSampleBlockForColumns(required_columns); } /// Calculate structure of the result. @@ -1213,12 +1189,12 @@ static InterpolateDescriptionPtr getInterpolateDescription( } col_set.clear(); - for (const auto & column : source_block) + for (const auto & column : result_block) { source_columns.emplace_back(column.name, column.type); col_set.insert(column.name); } - for (const auto & column : result_block) + for (const auto & column : source_block) if (!col_set.contains(column.name)) source_columns.emplace_back(column.name, column.type); } @@ -2274,8 +2250,7 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle && !settings.allow_experimental_query_deduplication && !settings.empty_result_for_aggregation_by_empty_set && storage - && storage->getName() != "MaterializedMySQL" - && !storage->hasLightweightDeletedMask() + && storage->supportsTrivialCountOptimization() && query_info.filter_asts.empty() && query_analyzer->hasAggregation() && (query_analyzer->aggregates().size() == 1) @@ -2287,6 +2262,10 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle auto & query = getSelectQuery(); if (!query.prewhere() && !query.where() && !context->getCurrentTransaction()) { + /// Some storages can optimize trivial count in read() method instead of totalRows() because it still can + /// require reading some data (but much faster than reading columns). + /// Set a special flag in query info so the storage will see it and optimize count in read() method. + query_info.optimize_trivial_count = optimize_trivial_count; return storage->totalRows(settings); } else @@ -2544,6 +2523,8 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc if (!query_plan.getMaxThreads() || is_remote) query_plan.setMaxThreads(max_threads_execute_query); + query_plan.setConcurrencyControl(settings.use_concurrency_control); + /// Aliases in table declaration. if (processing_stage == QueryProcessingStage::FetchColumns && alias_actions) { diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index 8db1d27c073..b8cace5e0ad 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -184,7 +184,7 @@ InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer( , context(buildContext(context_, select_query_options_)) , select_query_options(select_query_options_) , query_tree(query_tree_) - , planner(query_tree_, select_query_options_) + , planner(query_tree_, select_query_options) { } diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index e9118b747e5..2c0baa0d4b3 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -15,7 +15,7 @@ namespace DB BlockIO InterpreterSetQuery::execute() { const auto & ast = query_ptr->as(); - getContext()->checkSettingsConstraints(ast.changes); + getContext()->checkSettingsConstraints(ast.changes, SettingSource::QUERY); auto session_context = getContext()->getSessionContext(); session_context->applySettingsChanges(ast.changes); session_context->addQueryParameters(ast.query_parameters); @@ -28,7 +28,7 @@ void InterpreterSetQuery::executeForCurrentContext(bool ignore_setting_constrain { const auto & ast = query_ptr->as(); if (!ignore_setting_constraints) - getContext()->checkSettingsConstraints(ast.changes); + getContext()->checkSettingsConstraints(ast.changes, SettingSource::QUERY); getContext()->applySettingsChanges(ast.changes); getContext()->resetSettingsToDefaultValue(ast.default_settings); } diff --git a/src/Interpreters/InterpreterShowFunctionsQuery.cpp b/src/Interpreters/InterpreterShowFunctionsQuery.cpp new file mode 100644 index 00000000000..efadb929451 --- /dev/null +++ b/src/Interpreters/InterpreterShowFunctionsQuery.cpp @@ -0,0 +1,46 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + +InterpreterShowFunctionsQuery::InterpreterShowFunctionsQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) + : WithMutableContext(context_), query_ptr(query_ptr_) +{ +} + +BlockIO InterpreterShowFunctionsQuery::execute() +{ + return executeQuery(getRewrittenQuery(), getContext(), true); +} + +String InterpreterShowFunctionsQuery::getRewrittenQuery() +{ + constexpr const char * functions_table = "functions"; + + const auto & query = query_ptr->as(); + + DatabasePtr systemDb = DatabaseCatalog::instance().getSystemDatabase(); + + String rewritten_query = fmt::format( + R"( +SELECT * +FROM {}.{})", + systemDb->getDatabaseName(), + functions_table); + + if (!query.like.empty()) + { + rewritten_query += " WHERE name "; + rewritten_query += query.case_insensitive_like ? "ILIKE " : "LIKE "; + rewritten_query += fmt::format("'{}'", query.like); + } + + return rewritten_query; +} + +} diff --git a/src/Interpreters/InterpreterShowFunctionsQuery.h b/src/Interpreters/InterpreterShowFunctionsQuery.h new file mode 100644 index 00000000000..f15e1ae67b2 --- /dev/null +++ b/src/Interpreters/InterpreterShowFunctionsQuery.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class Context; + +class InterpreterShowFunctionsQuery : public IInterpreter, WithMutableContext +{ +public: + InterpreterShowFunctionsQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_); + + BlockIO execute() override; + + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } + +private: + ASTPtr query_ptr; + + String getRewrittenQuery(); +}; + +} diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index 2f1a4a32bee..5fe0a862e05 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -116,10 +116,10 @@ String InterpreterShowTablesQuery::getRewrittenQuery() return rewritten_query.str(); } - if (query.temporary && !query.from.empty()) + if (query.temporary && !query.getFrom().empty()) throw Exception(ErrorCodes::SYNTAX_ERROR, "The `FROM` and `TEMPORARY` cannot be used together in `SHOW TABLES`"); - String database = getContext()->resolveDatabase(query.from); + String database = getContext()->resolveDatabase(query.getFrom()); DatabaseCatalog::instance().assertDatabaseExists(database); WriteBufferFromOwnString rewritten_query; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 02cdeb0154e..12db14973bb 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -36,8 +36,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -51,7 +53,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -83,58 +87,54 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int TIMEOUT_EXCEEDED; extern const int TABLE_WAS_NOT_DROPPED; + extern const int ABORTED; } namespace ActionLocks { - extern StorageActionBlockType PartsMerge; - extern StorageActionBlockType PartsFetch; - extern StorageActionBlockType PartsSend; - extern StorageActionBlockType ReplicationQueue; - extern StorageActionBlockType DistributedSend; - extern StorageActionBlockType PartsTTLMerge; - extern StorageActionBlockType PartsMove; + extern const StorageActionBlockType PartsMerge; + extern const StorageActionBlockType PartsFetch; + extern const StorageActionBlockType PartsSend; + extern const StorageActionBlockType ReplicationQueue; + extern const StorageActionBlockType DistributedSend; + extern const StorageActionBlockType PartsTTLMerge; + extern const StorageActionBlockType PartsMove; + extern const StorageActionBlockType PullReplicationLog; } namespace { -ExecutionStatus getOverallExecutionStatusOfCommands() +/// Sequentially tries to execute all commands and throws exception with info about failed commands +void executeCommandsAndThrowIfError(std::vector> commands) { - return ExecutionStatus(0); -} + ExecutionStatus result(0); + for (auto & command : commands) + { + try + { + command(); + } + catch (...) + { + ExecutionStatus current_result = ExecutionStatus::fromCurrentException(); -/// Consequently tries to execute all commands and generates final exception message for failed commands -template -ExecutionStatus getOverallExecutionStatusOfCommands(Callable && command, Callables && ... commands) -{ - ExecutionStatus status_head(0); - try - { - command(); - } - catch (...) - { - status_head = ExecutionStatus::fromCurrentException(); + if (result.code == 0) + result.code = current_result.code; + + if (!current_result.message.empty()) + { + if (!result.message.empty()) + result.message += '\n'; + result.message += current_result.message; + } + } } - ExecutionStatus status_tail = getOverallExecutionStatusOfCommands(std::forward(commands)...); - - auto res_status = status_head.code != 0 ? status_head.code : status_tail.code; - auto res_message = status_head.message + (status_tail.message.empty() ? "" : ("\n" + status_tail.message)); - - return ExecutionStatus(res_status, res_message); -} - -/// Consequently tries to execute all commands and throws exception with info about failed commands -template -void executeCommandsAndThrowIfError(Callables && ... commands) -{ - auto status = getOverallExecutionStatusOfCommands(std::forward(commands)...); - if (status.code != 0) - throw Exception::createDeprecated(status.message, status.code); + if (result.code != 0) + throw Exception::createDeprecated(result.message, result.code); } @@ -154,6 +154,8 @@ AccessType getRequiredAccessType(StorageActionBlockType action_type) return AccessType::SYSTEM_TTL_MERGES; else if (action_type == ActionLocks::PartsMove) return AccessType::SYSTEM_MOVES; + else if (action_type == ActionLocks::PullReplicationLog) + return AccessType::SYSTEM_PULLING_REPLICATION_LOG; else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown action type: {}", std::to_string(action_type)); } @@ -321,33 +323,33 @@ BlockIO InterpreterSystemQuery::execute() } case Type::DROP_MARK_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_MARK_CACHE); - system_context->dropMarkCache(); + system_context->clearMarkCache(); break; case Type::DROP_UNCOMPRESSED_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_UNCOMPRESSED_CACHE); - system_context->dropUncompressedCache(); + system_context->clearUncompressedCache(); break; case Type::DROP_INDEX_MARK_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_MARK_CACHE); - system_context->dropIndexMarkCache(); + system_context->clearIndexMarkCache(); break; case Type::DROP_INDEX_UNCOMPRESSED_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_UNCOMPRESSED_CACHE); - system_context->dropIndexUncompressedCache(); + system_context->clearIndexUncompressedCache(); break; case Type::DROP_MMAP_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_MMAP_CACHE); - system_context->dropMMappedFileCache(); + system_context->clearMMappedFileCache(); break; case Type::DROP_QUERY_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE); - getContext()->dropQueryCache(); + getContext()->clearQueryCache(); break; #if USE_EMBEDDED_COMPILER case Type::DROP_COMPILED_EXPRESSION_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE); if (auto * cache = CompiledExpressionCacheFactory::instance().tryGetCache()) - cache->reset(); + cache->clear(); break; #endif #if USE_AWS_S3 @@ -370,16 +372,75 @@ BlockIO InterpreterSystemQuery::execute() else { auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache; - cache->removeAllReleasable(); + if (query.key_to_drop.empty()) + { + cache->removeAllReleasable(); + } + else + { + auto key = FileCacheKey::fromKeyString(query.key_to_drop); + if (query.offset_to_drop.has_value()) + cache->removeFileSegment(key, query.offset_to_drop.value()); + else + cache->removeKey(key); + } } break; } + case Type::SYNC_FILESYSTEM_CACHE: + { + getContext()->checkAccess(AccessType::SYSTEM_SYNC_FILESYSTEM_CACHE); + + ColumnsDescription columns{NamesAndTypesList{ + {"cache_name", std::make_shared()}, + {"path", std::make_shared()}, + {"size", std::make_shared()}, + }}; + Block sample_block; + for (const auto & column : columns) + sample_block.insert({column.type->createColumn(), column.type, column.name}); + + MutableColumns res_columns = sample_block.cloneEmptyColumns(); + + auto fill_data = [&](const std::string & cache_name, const FileCachePtr & cache, const FileSegments & file_segments) + { + for (const auto & file_segment : file_segments) + { + size_t i = 0; + const auto path = cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->getKind()); + res_columns[i++]->insert(cache_name); + res_columns[i++]->insert(path); + res_columns[i++]->insert(file_segment->getDownloadedSize()); + } + }; + + if (query.filesystem_cache_name.empty()) + { + auto caches = FileCacheFactory::instance().getAll(); + for (const auto & [cache_name, cache_data] : caches) + { + auto file_segments = cache_data->cache->sync(); + fill_data(cache_name, cache_data->cache, file_segments); + } + } + else + { + auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache; + auto file_segments = cache->sync(); + fill_data(query.filesystem_cache_name, cache, file_segments); + } + + size_t num_rows = res_columns[0]->size(); + auto source = std::make_shared(sample_block, Chunk(std::move(res_columns), num_rows)); + result.pipeline = QueryPipeline(std::move(source)); + break; + } case Type::DROP_SCHEMA_CACHE: { getContext()->checkAccess(AccessType::SYSTEM_DROP_SCHEMA_CACHE); std::unordered_set caches_to_drop; if (query.schema_cache_storage.empty()) - caches_to_drop = {"FILE", "S3", "HDFS", "URL"}; + caches_to_drop = {"FILE", "S3", "HDFS", "URL", "AZURE"}; else caches_to_drop = {query.schema_cache_storage}; @@ -395,6 +456,10 @@ BlockIO InterpreterSystemQuery::execute() #endif if (caches_to_drop.contains("URL")) StorageURL::getSchemaCache(getContext()).clear(); +#if USE_AZURE_BLOB_STORAGE + if (caches_to_drop.contains("AZURE")) + StorageAzureBlob::getSchemaCache(getContext()).clear(); +#endif break; } case Type::RELOAD_DICTIONARY: @@ -410,10 +475,10 @@ BlockIO InterpreterSystemQuery::execute() case Type::RELOAD_DICTIONARIES: { getContext()->checkAccess(AccessType::SYSTEM_RELOAD_DICTIONARY); - executeCommandsAndThrowIfError( + executeCommandsAndThrowIfError({ [&] { system_context->getExternalDictionariesLoader().reloadAllTriedToLoad(); }, [&] { system_context->getEmbeddedDictionaries().reload(); } - ); + }); ExternalDictionariesLoader::resetAll(); break; } @@ -501,6 +566,12 @@ BlockIO InterpreterSystemQuery::execute() case Type::START_DISTRIBUTED_SENDS: startStopAction(ActionLocks::DistributedSend, true); break; + case Type::STOP_PULLING_REPLICATION_LOG: + startStopAction(ActionLocks::PullReplicationLog, false); + break; + case Type::START_PULLING_REPLICATION_LOG: + startStopAction(ActionLocks::PullReplicationLog, true); + break; case Type::DROP_REPLICA: dropReplica(query); break; @@ -536,28 +607,35 @@ BlockIO InterpreterSystemQuery::execute() case Type::FLUSH_LOGS: { getContext()->checkAccess(AccessType::SYSTEM_FLUSH_LOGS); - executeCommandsAndThrowIfError( - [&] { if (auto query_log = getContext()->getQueryLog()) query_log->flush(true); }, - [&] { if (auto part_log = getContext()->getPartLog("")) part_log->flush(true); }, - [&] { if (auto query_thread_log = getContext()->getQueryThreadLog()) query_thread_log->flush(true); }, - [&] { if (auto trace_log = getContext()->getTraceLog()) trace_log->flush(true); }, - [&] { if (auto text_log = getContext()->getTextLog()) text_log->flush(true); }, - [&] { if (auto metric_log = getContext()->getMetricLog()) metric_log->flush(true); }, - [&] { if (auto asynchronous_metric_log = getContext()->getAsynchronousMetricLog()) asynchronous_metric_log->flush(true); }, - [&] { if (auto opentelemetry_span_log = getContext()->getOpenTelemetrySpanLog()) opentelemetry_span_log->flush(true); }, - [&] { if (auto query_views_log = getContext()->getQueryViewsLog()) query_views_log->flush(true); }, - [&] { if (auto zookeeper_log = getContext()->getZooKeeperLog()) zookeeper_log->flush(true); }, - [&] { if (auto session_log = getContext()->getSessionLog()) session_log->flush(true); }, - [&] { if (auto transactions_info_log = getContext()->getTransactionsInfoLog()) transactions_info_log->flush(true); }, - [&] { if (auto processors_profile_log = getContext()->getProcessorsProfileLog()) processors_profile_log->flush(true); }, - [&] { if (auto cache_log = getContext()->getFilesystemCacheLog()) cache_log->flush(true); }, - [&] { if (auto asynchronous_insert_log = getContext()->getAsynchronousInsertLog()) asynchronous_insert_log->flush(true); } - ); + + auto logs = getContext()->getSystemLogs(); + std::vector> commands; + commands.reserve(logs.size()); + for (auto * system_log : logs) + commands.emplace_back([system_log] { system_log->flush(true); }); + + executeCommandsAndThrowIfError(commands); + break; + } + case Type::STOP_LISTEN: + getContext()->checkAccess(AccessType::SYSTEM_LISTEN); + getContext()->stopServers(query.server_type); + break; + case Type::START_LISTEN: + getContext()->checkAccess(AccessType::SYSTEM_LISTEN); + getContext()->startServers(query.server_type); + break; + case Type::FLUSH_ASYNC_INSERT_QUEUE: + { + getContext()->checkAccess(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE); + auto * queue = getContext()->getAsynchronousInsertQueue(); + if (!queue) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot flush asynchronous insert queue because it is not initialized"); + + queue->flushAll(); break; } - case Type::STOP_LISTEN_QUERIES: - case Type::START_LISTEN_QUERIES: - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not supported yet", query.type); case Type::STOP_THREAD_FUZZER: getContext()->checkAccess(AccessType::SYSTEM_THREAD_FUZZER); ThreadFuzzer::stop(); @@ -606,12 +684,15 @@ void InterpreterSystemQuery::restoreReplica() table_replicated_ptr->restoreMetadataInZooKeeper(); } -StoragePtr InterpreterSystemQuery::tryRestartReplica(const StorageID & replica, ContextMutablePtr system_context, bool need_ddl_guard) +StoragePtr InterpreterSystemQuery::tryRestartReplica(const StorageID & replica, ContextMutablePtr system_context) { LOG_TRACE(log, "Restarting replica {}", replica); - auto table_ddl_guard = need_ddl_guard - ? DatabaseCatalog::instance().getDDLGuard(replica.getDatabaseName(), replica.getTableName()) - : nullptr; + auto table_ddl_guard = DatabaseCatalog::instance().getDDLGuard(replica.getDatabaseName(), replica.getTableName()); + + auto restart_replica_lock = DatabaseCatalog::instance().tryGetLockForRestartReplica(replica.getDatabaseName()); + if (!restart_replica_lock) + throw Exception(ErrorCodes::ABORTED, "Database {} is being dropped or detached, will not restart replica {}", + backQuoteIfNeed(replica.getDatabaseName()), replica.getNameForLogs()); auto [database, table] = DatabaseCatalog::instance().tryGetDatabaseAndTable(replica, getContext()); ASTPtr create_ast; @@ -690,21 +771,13 @@ void InterpreterSystemQuery::restartReplicas(ContextMutablePtr system_context) if (replica_names.empty()) return; - TableGuards guards; - - for (const auto & name : replica_names) - guards.emplace(UniqueTableName{name.database_name, name.table_name}, nullptr); - - for (auto & guard : guards) - guard.second = catalog.getDDLGuard(guard.first.database_name, guard.first.table_name); - size_t threads = std::min(static_cast(getNumberOfPhysicalCPUCores()), replica_names.size()); LOG_DEBUG(log, "Will restart {} replicas using {} threads", replica_names.size(), threads); ThreadPool pool(CurrentMetrics::RestartReplicaThreads, CurrentMetrics::RestartReplicaThreadsActive, threads); for (auto & replica : replica_names) { - pool.scheduleOrThrowOnError([&]() { tryRestartReplica(replica, system_context, false); }); + pool.scheduleOrThrowOnError([&]() { tryRestartReplica(replica, system_context); }); } pool.wait(); } @@ -998,6 +1071,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() case Type::DROP_INDEX_MARK_CACHE: case Type::DROP_INDEX_UNCOMPRESSED_CACHE: case Type::DROP_FILESYSTEM_CACHE: + case Type::SYNC_FILESYSTEM_CACHE: case Type::DROP_SCHEMA_CACHE: #if USE_AWS_S3 case Type::DROP_S3_CLIENT_CACHE: @@ -1062,6 +1136,15 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_MOVES, query.getDatabase(), query.getTable()); break; } + case Type::STOP_PULLING_REPLICATION_LOG: + case Type::START_PULLING_REPLICATION_LOG: + { + if (!query.table) + required_access.emplace_back(AccessType::SYSTEM_PULLING_REPLICATION_LOG); + else + required_access.emplace_back(AccessType::SYSTEM_PULLING_REPLICATION_LOG, query.getDatabase(), query.getTable()); + break; + } case Type::STOP_FETCHES: case Type::START_FETCHES: { @@ -1149,6 +1232,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_FLUSH_LOGS); break; } + case Type::FLUSH_ASYNC_INSERT_QUEUE: + { + required_access.emplace_back(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE); + break; + } case Type::RESTART_DISK: { required_access.emplace_back(AccessType::SYSTEM_RESTART_DISK); @@ -1164,8 +1252,12 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_SYNC_FILE_CACHE); break; } - case Type::STOP_LISTEN_QUERIES: - case Type::START_LISTEN_QUERIES: + case Type::STOP_LISTEN: + case Type::START_LISTEN: + { + required_access.emplace_back(AccessType::SYSTEM_LISTEN); + break; + } case Type::STOP_THREAD_FUZZER: case Type::START_THREAD_FUZZER: case Type::ENABLE_FAILPOINT: diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h index 8a1cdaf8edd..826d4cc0c69 100644 --- a/src/Interpreters/InterpreterSystemQuery.h +++ b/src/Interpreters/InterpreterSystemQuery.h @@ -52,7 +52,7 @@ private: /// Tries to get a replicated table and restart it /// Returns pointer to a newly created table if the restart was successful - StoragePtr tryRestartReplica(const StorageID & replica, ContextMutablePtr context, bool need_ddl_guard = true); + StoragePtr tryRestartReplica(const StorageID & replica, ContextMutablePtr context); void restartReplica(const StorageID & replica, ContextMutablePtr system_context); void restartReplicas(ContextMutablePtr system_context); diff --git a/src/Interpreters/InterpreterUseQuery.cpp b/src/Interpreters/InterpreterUseQuery.cpp index d8a5ae57470..b71f3a9cc1c 100644 --- a/src/Interpreters/InterpreterUseQuery.cpp +++ b/src/Interpreters/InterpreterUseQuery.cpp @@ -10,7 +10,7 @@ namespace DB BlockIO InterpreterUseQuery::execute() { - const String & new_database = query_ptr->as().database; + const String & new_database = query_ptr->as().getDatabase(); getContext()->checkAccess(AccessType::SHOW_DATABASES, new_database); getContext()->getSessionContext()->setCurrentDatabase(new_database); return {}; diff --git a/src/Interpreters/JIT/CompileDAG.cpp b/src/Interpreters/JIT/CompileDAG.cpp index 6da17fb4c67..9c56341eae0 100644 --- a/src/Interpreters/JIT/CompileDAG.cpp +++ b/src/Interpreters/JIT/CompileDAG.cpp @@ -160,9 +160,7 @@ UInt128 CompileDAG::hash() const } } - UInt128 result; - hash.get128(result); - return result; + return hash.get128(); } } diff --git a/src/Interpreters/JIT/CompiledExpressionCache.h b/src/Interpreters/JIT/CompiledExpressionCache.h index 21f7c67226c..8354b33bae9 100644 --- a/src/Interpreters/JIT/CompiledExpressionCache.h +++ b/src/Interpreters/JIT/CompiledExpressionCache.h @@ -19,7 +19,7 @@ public: size_t getCompiledExpressionSize() const { return compiled_expression_size; } - virtual ~CompiledExpressionCacheEntry() {} + virtual ~CompiledExpressionCacheEntry() = default; private: diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp index fb8dec665b4..f50a122f9a2 100644 --- a/src/Interpreters/JIT/compileFunction.cpp +++ b/src/Interpreters/JIT/compileFunction.cpp @@ -357,27 +357,60 @@ static void compileMergeAggregatesStates(llvm::Module & module, const std::vecto llvm::IRBuilder<> b(module.getContext()); auto * aggregate_data_place_type = b.getInt8Ty()->getPointerTo(); - auto * merge_aggregates_states_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { aggregate_data_place_type, aggregate_data_place_type }, false); - auto * merge_aggregates_states_func = llvm::Function::Create(merge_aggregates_states_func_declaration, llvm::Function::ExternalLinkage, name, module); + auto * aggregate_data_places_type = aggregate_data_place_type->getPointerTo(); + auto * size_type = b.getInt64Ty(); + + auto * merge_aggregates_states_func_declaration + = llvm::FunctionType::get(b.getVoidTy(), {aggregate_data_places_type, aggregate_data_places_type, size_type}, false); + auto * merge_aggregates_states_func + = llvm::Function::Create(merge_aggregates_states_func_declaration, llvm::Function::ExternalLinkage, name, module); auto * arguments = merge_aggregates_states_func->args().begin(); - llvm::Value * aggregate_data_place_dst_arg = arguments++; - llvm::Value * aggregate_data_place_src_arg = arguments++; + llvm::Value * aggregate_data_places_dst_arg = arguments++; + llvm::Value * aggregate_data_places_src_arg = arguments++; + llvm::Value * aggregate_places_size_arg = arguments++; auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", merge_aggregates_states_func); b.SetInsertPoint(entry); + /// Initialize loop + + auto * end = llvm::BasicBlock::Create(b.getContext(), "end", merge_aggregates_states_func); + auto * loop = llvm::BasicBlock::Create(b.getContext(), "loop", merge_aggregates_states_func); + b.CreateCondBr(b.CreateICmpEQ(aggregate_places_size_arg, llvm::ConstantInt::get(size_type, 0)), end, loop); + + b.SetInsertPoint(loop); + + /// Loop + + auto * counter_phi = b.CreatePHI(size_type, 2); + counter_phi->addIncoming(llvm::ConstantInt::get(size_type, 0), entry); + for (const auto & function_to_compile : functions) { + auto * aggregate_data_place_dst = b.CreateLoad(aggregate_data_place_type, + b.CreateInBoundsGEP(aggregate_data_place_type->getPointerTo(), aggregate_data_places_dst_arg, counter_phi)); + auto * aggregate_data_place_src = b.CreateLoad(aggregate_data_place_type, + b.CreateInBoundsGEP(aggregate_data_place_type->getPointerTo(), aggregate_data_places_src_arg, counter_phi)); + size_t aggregate_function_offset = function_to_compile.aggregate_data_offset; - auto * aggregate_data_place_merge_dst_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_dst_arg, aggregate_function_offset); - auto * aggregate_data_place_merge_src_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_src_arg, aggregate_function_offset); + auto * aggregate_data_place_merge_dst_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_dst, aggregate_function_offset); + auto * aggregate_data_place_merge_src_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_src, aggregate_function_offset); const auto * aggregate_function_ptr = function_to_compile.function; aggregate_function_ptr->compileMerge(b, aggregate_data_place_merge_dst_with_offset, aggregate_data_place_merge_src_with_offset); } + /// End of loop + + auto * current_block = b.GetInsertBlock(); + auto * incremeted_counter = b.CreateAdd(counter_phi, llvm::ConstantInt::get(size_type, 1)); + counter_phi->addIncoming(incremeted_counter, current_block); + + b.CreateCondBr(b.CreateICmpEQ(incremeted_counter, aggregate_places_size_arg), end, loop); + + b.SetInsertPoint(end); b.CreateRetVoid(); } diff --git a/src/Interpreters/JIT/compileFunction.h b/src/Interpreters/JIT/compileFunction.h index fe5abe1988c..84abfa0925a 100644 --- a/src/Interpreters/JIT/compileFunction.h +++ b/src/Interpreters/JIT/compileFunction.h @@ -56,7 +56,7 @@ struct AggregateFunctionWithOffset using JITCreateAggregateStatesFunction = void (*)(AggregateDataPtr); using JITAddIntoAggregateStatesFunction = void (*)(ColumnDataRowsOffset, ColumnDataRowsOffset, ColumnData *, AggregateDataPtr *); using JITAddIntoAggregateStatesFunctionSinglePlace = void (*)(ColumnDataRowsOffset, ColumnDataRowsOffset, ColumnData *, AggregateDataPtr); -using JITMergeAggregateStatesFunction = void (*)(AggregateDataPtr, AggregateDataPtr); +using JITMergeAggregateStatesFunction = void (*)(AggregateDataPtr *, AggregateDataPtr *, size_t); using JITInsertAggregateStatesIntoColumnsFunction = void (*)(ColumnDataRowsOffset, ColumnDataRowsOffset, ColumnData *, AggregateDataPtr *); struct CompiledAggregateFunctions diff --git a/src/Interpreters/JoinSwitcher.h b/src/Interpreters/JoinSwitcher.h index fb5066b2d04..1d2ebc6b456 100644 --- a/src/Interpreters/JoinSwitcher.h +++ b/src/Interpreters/JoinSwitcher.h @@ -18,6 +18,7 @@ class JoinSwitcher : public IJoin public: JoinSwitcher(std::shared_ptr table_join_, const Block & right_sample_block_); + std::string getName() const override { return "JoinSwitcher"; } const TableJoin & getTableJoin() const override { return *table_join; } /// Add block of data from right hand of JOIN into current join object. diff --git a/src/Interpreters/JoinUtils.cpp b/src/Interpreters/JoinUtils.cpp index 67ee2a64264..33c9dfa76ca 100644 --- a/src/Interpreters/JoinUtils.cpp +++ b/src/Interpreters/JoinUtils.cpp @@ -303,6 +303,11 @@ ColumnPtr emptyNotNullableClone(const ColumnPtr & column) return column->cloneEmpty(); } +ColumnPtr materializeColumn(const ColumnPtr & column) +{ + return recursiveRemoveLowCardinality(recursiveRemoveSparse(column->convertToFullColumnIfConst())); +} + ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names) { ColumnRawPtrs ptrs; @@ -311,7 +316,7 @@ ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names) for (const auto & column_name : names) { auto & column = block.getByName(column_name).column; - column = recursiveRemoveLowCardinality(recursiveRemoveSparse(column->convertToFullColumnIfConst())); + column = materializeColumn(column); ptrs.push_back(column.get()); } @@ -326,12 +331,7 @@ ColumnPtrMap materializeColumnsInplaceMap(const Block & block, const Names & nam for (const auto & column_name : names) { ColumnPtr column = block.getByName(column_name).column; - - column = column->convertToFullColumnIfConst(); - column = recursiveRemoveLowCardinality(column); - column = recursiveRemoveSparse(column); - - ptrs[column_name] = column; + ptrs[column_name] = materializeColumn(column); } return ptrs; @@ -340,8 +340,7 @@ ColumnPtrMap materializeColumnsInplaceMap(const Block & block, const Names & nam ColumnPtr materializeColumn(const Block & block, const String & column_name) { const auto & src_column = block.getByName(column_name).column; - return recursiveRemoveLowCardinality( - recursiveRemoveSparse(src_column->convertToFullColumnIfConst())); + return materializeColumn(src_column); } Columns materializeColumns(const Block & block, const Names & names) @@ -539,7 +538,7 @@ JoinMask getColumnAsMask(const Block & block, const String & column_name) return JoinMask(const_cond->getBool(0), block.rows()); } - ColumnPtr join_condition_col = recursiveRemoveLowCardinality(src_col.column->convertToFullColumnIfConst()); + ColumnPtr join_condition_col = materializeColumn(src_col.column); if (const auto * nullable_col = typeid_cast(join_condition_col.get())) { if (isNothing(assert_cast(*col_type).getNestedType())) diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index ceef1371f16..6f0c8f1bff4 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -1034,7 +1034,7 @@ std::shared_ptr MergeJoin::loadRightBlock(size_t pos) const { auto load_func = [&]() -> std::shared_ptr { - TemporaryFileStreamLegacy input(flushed_right_blocks[pos]->getPath(), materializeBlock(right_sample_block)); + TemporaryFileStreamLegacy input(flushed_right_blocks[pos]->getAbsolutePath(), materializeBlock(right_sample_block)); return std::make_shared(input.block_in->read()); }; diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index 03a661c5b8a..98fae1d419f 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -22,6 +22,7 @@ class MergeJoin : public IJoin public: MergeJoin(std::shared_ptr table_join_, const Block & right_sample_block); + std::string getName() const override { return "PartialMergeJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp index 24f77f7d0ba..91ba0f64e9b 100644 --- a/src/Interpreters/MetricLog.cpp +++ b/src/Interpreters/MetricLog.cpp @@ -16,7 +16,6 @@ NamesAndTypesList MetricLogElement::getNamesAndTypes() columns_with_type_and_name.emplace_back("event_date", std::make_shared()); columns_with_type_and_name.emplace_back("event_time", std::make_shared()); columns_with_type_and_name.emplace_back("event_time_microseconds", std::make_shared(6)); - columns_with_type_and_name.emplace_back("milliseconds", std::make_shared()); for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i) { @@ -45,7 +44,6 @@ void MetricLogElement::appendToBlock(MutableColumns & columns) const columns[column_idx++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); columns[column_idx++]->insert(event_time); columns[column_idx++]->insert(event_time_microseconds); - columns[column_idx++]->insert(milliseconds); for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i) columns[column_idx++]->insert(profile_events[i]); @@ -96,7 +94,6 @@ void MetricLog::metricThreadFunction() MetricLogElement elem; elem.event_time = std::chrono::system_clock::to_time_t(current_time); elem.event_time_microseconds = timeInMicroseconds(current_time); - elem.milliseconds = timeInMilliseconds(current_time) - timeInSeconds(current_time) * 1000; elem.profile_events.resize(ProfileEvents::end()); for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) @@ -113,7 +110,7 @@ void MetricLog::metricThreadFunction() elem.current_metrics[i] = CurrentMetrics::values[i]; } - this->add(elem); + this->add(std::move(elem)); /// We will record current time into table but align it to regular time intervals to avoid time drift. /// We may drop some time points if the server is overloaded and recording took too much time. diff --git a/src/Interpreters/MetricLog.h b/src/Interpreters/MetricLog.h index aacdd4f49d2..a57f1cebf71 100644 --- a/src/Interpreters/MetricLog.h +++ b/src/Interpreters/MetricLog.h @@ -22,7 +22,6 @@ struct MetricLogElement { time_t event_time{}; Decimal64 event_time_microseconds{}; - UInt64 milliseconds{}; std::vector profile_events; std::vector current_metrics; diff --git a/src/Interpreters/MonotonicityCheckVisitor.h b/src/Interpreters/MonotonicityCheckVisitor.h index c95f5209760..c1838fa105c 100644 --- a/src/Interpreters/MonotonicityCheckVisitor.h +++ b/src/Interpreters/MonotonicityCheckVisitor.h @@ -39,8 +39,8 @@ public: bool canOptimize(const ASTFunction & ast_function) const { /// if GROUP BY contains the same function ORDER BY shouldn't be optimized - auto hash = ast_function.getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + const auto hash = ast_function.getTreeHash(); + const auto key = toString(hash); if (group_by_function_hashes.count(key)) return false; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 25c52ad8925..e50f8488cac 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -113,13 +113,14 @@ QueryTreeNodePtr prepareQueryAffectedQueryTree(const std::vector & has_index_or_projection) + const StorageInMemoryMetadata::HasDependencyCallback & has_dependency) { NameSet new_updated_columns = updated_columns; ColumnDependencies dependencies; + while (!new_updated_columns.empty()) { - auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true, has_index_or_projection); + auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true, has_dependency); new_updated_columns.clear(); for (const auto & dependency : new_dependencies) { @@ -292,9 +293,14 @@ bool MutationsInterpreter::Source::materializeTTLRecalculateOnly() const return data && data->getSettings()->materialize_ttl_recalculate_only; } -bool MutationsInterpreter::Source::hasIndexOrProjection(const String & file_name) const +bool MutationsInterpreter::Source::hasSecondaryIndex(const String & name) const { - return part && part->checksums.has(file_name); + return part && part->hasSecondaryIndex(name); +} + +bool MutationsInterpreter::Source::hasProjection(const String & name) const +{ + return part && part->hasProjection(name); } static Names getAvailableColumnsWithVirtuals(StorageMetadataPtr metadata_snapshot, const IStorage & storage) @@ -533,13 +539,24 @@ void MutationsInterpreter::prepare(bool dry_run) validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized); } - std::function has_index_or_projection - = [&](const String & file_name) { return source.hasIndexOrProjection(file_name); }; + StorageInMemoryMetadata::HasDependencyCallback has_dependency = + [&](const String & name, ColumnDependency::Kind kind) + { + if (kind == ColumnDependency::PROJECTION) + return source.hasProjection(name); + + if (kind == ColumnDependency::SKIP_INDEX) + return source.hasSecondaryIndex(name); + + return true; + }; if (settings.recalculate_dependencies_of_updated_columns) - dependencies = getAllColumnDependencies(metadata_snapshot, updated_columns, has_index_or_projection); + dependencies = getAllColumnDependencies(metadata_snapshot, updated_columns, has_dependency); + bool has_alter_delete = false; std::vector read_columns; + /// First, break a sequence of commands into stages. for (auto & command : commands) { @@ -558,6 +575,7 @@ void MutationsInterpreter::prepare(bool dry_run) predicate = makeASTFunction("isZeroOrNull", predicate); stages.back().filters.push_back(predicate); + has_alter_delete = true; } else if (command.type == MutationCommand::UPDATE) { @@ -692,8 +710,7 @@ void MutationsInterpreter::prepare(bool dry_run) if (it == std::cend(indices_desc)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown index: {}", command.index_name); - if (!source.hasIndexOrProjection("skp_idx_" + it->name + ".idx") - && !source.hasIndexOrProjection("skp_idx_" + it->name + ".idx2")) + if (!source.hasSecondaryIndex(it->name)) { auto query = (*it).expression_list_ast->clone(); auto syntax_result = TreeRewriter(context).analyze(query, all_columns); @@ -707,7 +724,7 @@ void MutationsInterpreter::prepare(bool dry_run) { mutation_kind.set(MutationKind::MUTATE_INDEX_PROJECTION); const auto & projection = projections_desc.get(command.projection_name); - if (!source.hasIndexOrProjection(projection.getDirectoryName())) + if (!source.hasProjection(projection.name)) { for (const auto & column : projection.required_columns) dependencies.emplace(column, ColumnDependency::PROJECTION); @@ -731,8 +748,9 @@ void MutationsInterpreter::prepare(bool dry_run) { // just recalculate ttl_infos without remove expired data auto all_columns_vec = all_columns.getNames(); - auto new_dependencies = metadata_snapshot->getColumnDependencies( - NameSet(all_columns_vec.begin(), all_columns_vec.end()), false, has_index_or_projection); + auto all_columns_set = NameSet(all_columns_vec.begin(), all_columns_vec.end()); + auto new_dependencies = metadata_snapshot->getColumnDependencies(all_columns_set, false, has_dependency); + for (const auto & dependency : new_dependencies) { if (dependency.kind == ColumnDependency::TTL_EXPRESSION) @@ -757,8 +775,8 @@ void MutationsInterpreter::prepare(bool dry_run) } auto all_columns_vec = all_columns.getNames(); - auto all_dependencies = getAllColumnDependencies( - metadata_snapshot, NameSet(all_columns_vec.begin(), all_columns_vec.end()), has_index_or_projection); + auto all_columns_set = NameSet(all_columns_vec.begin(), all_columns_vec.end()); + auto all_dependencies = getAllColumnDependencies(metadata_snapshot, all_columns_set, has_dependency); for (const auto & dependency : all_dependencies) { @@ -767,7 +785,7 @@ void MutationsInterpreter::prepare(bool dry_run) } /// Recalc only skip indices and projections of columns which could be updated by TTL. - auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true, has_index_or_projection); + auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true, has_dependency); for (const auto & dependency : new_dependencies) { if (dependency.kind == ColumnDependency::SKIP_INDEX || dependency.kind == ColumnDependency::PROJECTION) @@ -861,30 +879,44 @@ void MutationsInterpreter::prepare(bool dry_run) for (const auto & index : metadata_snapshot->getSecondaryIndices()) { - if (source.hasIndexOrProjection("skp_idx_" + index.name + ".idx") || source.hasIndexOrProjection("skp_idx_" + index.name + ".idx2")) + if (!source.hasSecondaryIndex(index.name)) + continue; + + if (has_alter_delete) { - const auto & index_cols = index.expression->getRequiredColumns(); - bool changed = std::any_of( - index_cols.begin(), - index_cols.end(), - [&](const auto & col) { return updated_columns.contains(col) || changed_columns.contains(col); }); - if (changed) - materialized_indices.insert(index.name); + materialized_indices.insert(index.name); + continue; } + + const auto & index_cols = index.expression->getRequiredColumns(); + bool changed = std::any_of( + index_cols.begin(), + index_cols.end(), + [&](const auto & col) { return updated_columns.contains(col) || changed_columns.contains(col); }); + + if (changed) + materialized_indices.insert(index.name); } for (const auto & projection : metadata_snapshot->getProjections()) { - if (source.hasIndexOrProjection(projection.getDirectoryName())) + if (!source.hasProjection(projection.name)) + continue; + + if (has_alter_delete) { - const auto & projection_cols = projection.required_columns; - bool changed = std::any_of( - projection_cols.begin(), - projection_cols.end(), - [&](const auto & col) { return updated_columns.contains(col) || changed_columns.contains(col); }); - if (changed) - materialized_projections.insert(projection.name); + materialized_projections.insert(projection.name); + continue; } + + const auto & projection_cols = projection.required_columns; + bool changed = std::any_of( + projection_cols.begin(), + projection_cols.end(), + [&](const auto & col) { return updated_columns.contains(col) || changed_columns.contains(col); }); + + if (changed) + materialized_projections.insert(projection.name); } /// Stages might be empty when we materialize skip indices or projections which don't add any diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index d783b503531..9b4caaae231 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -120,7 +120,8 @@ public: bool supportsLightweightDelete() const; bool hasLightweightDeleteMask() const; bool materializeTTLRecalculateOnly() const; - bool hasIndexOrProjection(const String & file_name) const; + bool hasSecondaryIndex(const String & name) const; + bool hasProjection(const String & name) const; void read( Stage & first_stage, diff --git a/src/Interpreters/MutationsNonDeterministicHelpers.cpp b/src/Interpreters/MutationsNonDeterministicHelpers.cpp index 46c203e15bc..119759265ef 100644 --- a/src/Interpreters/MutationsNonDeterministicHelpers.cpp +++ b/src/Interpreters/MutationsNonDeterministicHelpers.cpp @@ -63,6 +63,71 @@ public: using FirstNonDeterministicFunctionFinder = InDepthNodeVisitor; using FirstNonDeterministicFunctionData = FirstNonDeterministicFunctionMatcher::Data; +/// Executes and replaces with literals +/// non-deterministic functions in query. +/// Similar to ExecuteScalarSubqueriesVisitor. +class ExecuteNonDeterministicConstFunctionsMatcher +{ +public: + + struct Data + { + ContextPtr context; + std::optional max_literal_size; + }; + + static bool needChildVisit(const ASTPtr & ast, const ASTPtr & /*child*/) + { + /// Do not visit subqueries because they are executed separately. + return !ast->as(); + } + + static void visit(ASTPtr & ast, const Data & data) + { + if (auto * function = ast->as()) + visit(*function, ast, data); + } + + static void visit(ASTFunction & function, ASTPtr & ast, const Data & data) + { + if (!FunctionFactory::instance().has(function.name)) + return; + + /// It makes sense to execute functions which are deterministic + /// in scope of query because they are usually constant expressions. + auto builder = FunctionFactory::instance().get(function.name, data.context); + if (builder->isDeterministic() || !builder->isDeterministicInScopeOfQuery()) + return; + + Field field; + DataTypePtr type; + + try + { + std::tie(field, type) = evaluateConstantExpression(ast, data.context); + } + catch (...) + { + /// An exception can be thrown if the expression is not constant. + /// It's ok in that context and we just do nothing in that case. + /// It's bad pattern but it's quite hard to implement it in another way. + return; + } + + auto column = type->createColumn(); + column->insert(field); + + Block scalar{{std::move(column), type, "_constant"}}; + if (worthConvertingScalarToLiteral(scalar, data.max_literal_size)) + { + auto literal = std::make_unique(std::move(field)); + ast = addTypeConversionToAST(std::move(literal), type->getName()); + } + } +}; + +using ExecuteNonDeterministicConstFunctionsVisitor = InDepthNodeVisitor; + } FirstNonDeterministicFunctionResult findFirstNonDeterministicFunction(const MutationCommand & command, ContextPtr context) @@ -97,4 +162,51 @@ FirstNonDeterministicFunctionResult findFirstNonDeterministicFunction(const Muta return {}; } +ASTPtr replaceNonDeterministicToScalars(const ASTAlterCommand & alter_command, ContextPtr context) +{ + const auto & settings = context->getSettingsRef(); + if (!settings.mutations_execute_subqueries_on_initiator + && !settings.mutations_execute_nondeterministic_on_initiator) + return nullptr; + + auto query = alter_command.clone(); + auto & new_alter_command = *query->as(); + + if (settings.mutations_execute_subqueries_on_initiator) + { + Scalars scalars; + Scalars local_scalars; + + ExecuteScalarSubqueriesVisitor::Data data{ + WithContext{context}, + /*subquery_depth=*/ 0, + scalars, + local_scalars, + /*only_analyze=*/ false, + /*is_create_parameterized_view=*/ false, + /*replace_only_to_literals=*/ true, + settings.mutations_max_literal_size_to_replace}; + + ExecuteScalarSubqueriesVisitor visitor(data); + if (new_alter_command.update_assignments) + visitor.visit(new_alter_command.update_assignments); + if (new_alter_command.predicate) + visitor.visit(new_alter_command.predicate); + } + + if (settings.mutations_execute_nondeterministic_on_initiator) + { + ExecuteNonDeterministicConstFunctionsVisitor::Data data{ + context, settings.mutations_max_literal_size_to_replace}; + + ExecuteNonDeterministicConstFunctionsVisitor visitor(data); + if (new_alter_command.update_assignments) + visitor.visit(new_alter_command.update_assignments); + if (new_alter_command.predicate) + visitor.visit(new_alter_command.predicate); + } + + return query; +} + } diff --git a/src/Interpreters/MutationsNonDeterministicHelpers.h b/src/Interpreters/MutationsNonDeterministicHelpers.h index eb3a3b1c7a4..5d9aa9752f1 100644 --- a/src/Interpreters/MutationsNonDeterministicHelpers.h +++ b/src/Interpreters/MutationsNonDeterministicHelpers.h @@ -7,6 +7,7 @@ namespace DB { struct MutationCommand; +class ASTAlterCommand; struct FirstNonDeterministicFunctionResult { @@ -18,4 +19,9 @@ struct FirstNonDeterministicFunctionResult /// may also be non-deterministic in expressions of mutation command. FirstNonDeterministicFunctionResult findFirstNonDeterministicFunction(const MutationCommand & command, ContextPtr context); +/// Executes non-deterministic functions and subqueries in expressions of mutation +/// command and replaces them to the literals with a result of expressions. +/// Returns rewritten query if expressions were replaced, nullptr otherwise. +ASTPtr replaceNonDeterministicToScalars(const ASTAlterCommand & alter_command, ContextPtr context); + } diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp index 6a9251cec49..dd205ae6508 100644 --- a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp @@ -118,35 +118,37 @@ void OptimizeDateOrDateTimeConverterWithPreimageMatcher::visit(const ASTFunction {"greaterOrEquals", "lessOrEquals"}, }; - if (!swap_relations.contains(function.name)) return; + if (!swap_relations.contains(function.name)) + return; - if (!function.arguments || function.arguments->children.size() != 2) return; + if (!function.arguments || function.arguments->children.size() != 2) + return; size_t func_id = function.arguments->children.size(); for (size_t i = 0; i < function.arguments->children.size(); i++) - { if (const auto * func = function.arguments->children[i]->as()) - { func_id = i; - } - } - if (func_id == function.arguments->children.size()) return; + if (func_id == function.arguments->children.size()) + return; size_t literal_id = 1 - func_id; const auto * literal = function.arguments->children[literal_id]->as(); - if (!literal || literal->value.getType() != Field::Types::UInt64) return; + if (!literal || literal->value.getType() != Field::Types::UInt64) + return; String comparator = literal_id > func_id ? function.name : swap_relations.at(function.name); const auto * ast_func = function.arguments->children[func_id]->as(); /// Currently we only handle single-argument functions. - if (!ast_func || !ast_func->arguments || ast_func->arguments->children.size() != 1) return; + if (!ast_func || !ast_func->arguments || ast_func->arguments->children.size() != 1) + return; const auto * column_id = ast_func->arguments->children.at(0)->as(); - if (!column_id) return; + if (!column_id) + return; auto pos = IdentifierSemantic::getMembership(*column_id); if (!pos) @@ -158,21 +160,30 @@ void OptimizeDateOrDateTimeConverterWithPreimageMatcher::visit(const ASTFunction return; auto data_type_and_name = data.tables[*pos].columns.tryGetByName(column_id->shortName()); - if (!data_type_and_name) return; + if (!data_type_and_name) + return; + + const auto column_type = data_type_and_name->type; + if (!column_type || (!isDateOrDate32(*column_type) && !isDateTime(*column_type) && !isDateTime64(*column_type))) + return; const auto & converter = FunctionFactory::instance().tryGet(ast_func->name, data.context); - if (!converter) return; + if (!converter) + return; ColumnsWithTypeAndName args; - args.emplace_back(data_type_and_name->type, "tmp"); + args.emplace_back(column_type, "tmp"); auto converter_base = converter->build(args); - if (!converter_base || !converter_base->hasInformationAboutPreimage()) return; + if (!converter_base || !converter_base->hasInformationAboutPreimage()) + return; - auto preimage_range = converter_base->getPreimage(*(data_type_and_name->type), literal->value); - if (!preimage_range) return; + auto preimage_range = converter_base->getPreimage(*column_type, literal->value); + if (!preimage_range) + return; const auto new_ast = generateOptimizedDateFilterAST(comparator, *data_type_and_name, *preimage_range); - if (!new_ast) return; + if (!new_ast) + return; ast = new_ast; } diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index 881fcae4de6..d0344468153 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -107,6 +107,7 @@ NamesAndTypesList PartLogElement::getNamesAndTypes() {"table_uuid", std::make_shared()}, {"part_name", std::make_shared()}, {"partition_id", std::make_shared()}, + {"partition", std::make_shared()}, {"part_type", std::make_shared()}, {"disk_name", std::make_shared()}, {"path_on_disk", std::make_shared()}, @@ -157,6 +158,7 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(table_uuid); columns[i++]->insert(part_name); columns[i++]->insert(partition_id); + columns[i++]->insert(partition); columns[i++]->insert(part_type.toString()); columns[i++]->insert(disk_name); columns[i++]->insert(path_on_disk); @@ -229,6 +231,10 @@ bool PartLog::addNewParts( elem.table_name = table_id.table_name; elem.table_uuid = table_id.uuid; elem.partition_id = part->info.partition_id; + { + WriteBufferFromString out(elem.partition); + part->partition.serializeText(part->storage, out, {}); + } elem.part_name = part->name; elem.disk_name = part->getDataPartStorage().getDiskName(); elem.path_on_disk = part->getDataPartStorage().getFullPath(); @@ -242,7 +248,7 @@ bool PartLog::addNewParts( elem.profile_counters = part_log_entry.profile_counters; - part_log->add(elem); + part_log->add(std::move(elem)); } } catch (...) diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h index 843792d03a9..462314f2768 100644 --- a/src/Interpreters/PartLog.h +++ b/src/Interpreters/PartLog.h @@ -63,6 +63,7 @@ struct PartLogElement UUID table_uuid{UUIDHelpers::Nil}; String part_name; String partition_id; + String partition; String disk_name; String path_on_disk; diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index 67822ecf440..e0551dff2ad 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -198,7 +198,11 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context) CompletedPipelineExecutor executor(pipeline); executor.execute(); - set_and_key->set->checkIsCreated(); + /// SET may not be created successfully at this step because of the sub-query timeout, but if we have + /// timeout_overflow_mode set to `break`, no exception is thrown, and the executor just stops executing + /// the pipeline without setting `set_and_key->set->is_created` to true. + if (!set_and_key->set->isCreated()) + return nullptr; return set_and_key->set; } @@ -207,7 +211,7 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context) String PreparedSets::toString(const PreparedSets::Hash & key, const DataTypes & types) { WriteBufferFromOwnString buf; - buf << "__set_" << key.first << "_" << key.second; + buf << "__set_" << DB::toString(key); if (!types.empty()) { buf << "("; diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index cb240f5260a..b953b8470e1 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -127,10 +127,10 @@ class PreparedSets { public: - using Hash = std::pair; + using Hash = CityHash_v1_0_2::uint128; struct Hashing { - UInt64 operator()(const Hash & key) const { return key.first ^ key.second; } + UInt64 operator()(const Hash & key) const { return key.low64 ^ key.high64; } }; using SetsFromTuple = std::unordered_map>, Hashing>; diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 1503e396298..8572470abc1 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -223,7 +223,10 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q { /// Set up memory profiling thread_group->memory_tracker.setProfilerStep(settings.memory_profiler_step); + thread_group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + thread_group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); + thread_group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); thread_group->performance_counters.setTraceProfileEvents(settings.trace_profile_events); } @@ -587,8 +590,10 @@ QueryStatusInfo QueryStatus::getInfo(bool get_thread_list, bool get_profile_even res.peak_memory_usage = thread_group->memory_tracker.getPeak(); if (get_thread_list) + { res.thread_ids = thread_group->getInvolvedThreadIds(); - + res.peak_threads_usage = thread_group->getPeakThreadsUsage(); + } if (get_profile_events) res.profile_counters = std::make_shared(thread_group->performance_counters.getPartiallyAtomicSnapshot()); } diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 2eea49e1267..75a0eaa34bc 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -67,6 +67,7 @@ struct QueryStatusInfo /// Optional fields, filled by query std::vector thread_ids; + size_t peak_threads_usage; std::shared_ptr profile_counters; std::shared_ptr query_settings; std::string current_database; diff --git a/src/Interpreters/ProcessorsProfileLog.cpp b/src/Interpreters/ProcessorsProfileLog.cpp index e78a07bb752..14159ad3438 100644 --- a/src/Interpreters/ProcessorsProfileLog.cpp +++ b/src/Interpreters/ProcessorsProfileLog.cpp @@ -73,12 +73,5 @@ void ProcessorProfileLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(output_bytes); } -ProcessorsProfileLog::ProcessorsProfileLog(ContextPtr context_, const String & database_name_, - const String & table_name_, const String & storage_def_, - size_t flush_interval_milliseconds_) - : SystemLog(context_, database_name_, table_name_, - storage_def_, flush_interval_milliseconds_) -{ -} } diff --git a/src/Interpreters/ProcessorsProfileLog.h b/src/Interpreters/ProcessorsProfileLog.h index 81d58edd913..63791c0374c 100644 --- a/src/Interpreters/ProcessorsProfileLog.h +++ b/src/Interpreters/ProcessorsProfileLog.h @@ -45,12 +45,7 @@ struct ProcessorProfileLogElement class ProcessorsProfileLog : public SystemLog { public: - ProcessorsProfileLog( - ContextPtr context_, - const String & database_name_, - const String & table_name_, - const String & storage_def_, - size_t flush_interval_milliseconds_); + using SystemLog::SystemLog; }; } diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp index bf8d060bd3c..bd421ae8e33 100644 --- a/src/Interpreters/ProfileEventsExt.cpp +++ b/src/Interpreters/ProfileEventsExt.cpp @@ -86,9 +86,16 @@ static void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::Mutabl columns[i++]->insert(static_cast(snapshot.current_time)); columns[i++]->insert(static_cast(snapshot.thread_id)); columns[i++]->insert(Type::GAUGE); - columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); - columns[i++]->insert(snapshot.memory_usage); + columns[i]->insert(snapshot.memory_usage); + + i = 0; + columns[i++]->insertData(host_name.data(), host_name.size()); + columns[i++]->insert(static_cast(snapshot.current_time)); + columns[i++]->insert(static_cast(snapshot.thread_id)); + columns[i++]->insert(Type::GAUGE); + columns[i++]->insertData(MemoryTracker::PEAK_USAGE_EVENT_NAME, strlen(MemoryTracker::PEAK_USAGE_EVENT_NAME)); + columns[i]->insert(snapshot.peak_memory_usage); } void getProfileEvents( @@ -121,6 +128,7 @@ void getProfileEvents( group_snapshot.thread_id = 0; group_snapshot.current_time = time(nullptr); group_snapshot.memory_usage = thread_group->memory_tracker.get(); + group_snapshot.peak_memory_usage = thread_group->memory_tracker.getPeak(); auto group_counters = thread_group->performance_counters.getPartiallyAtomicSnapshot(); auto prev_group_snapshot = last_sent_snapshots.find(0); group_snapshot.counters = diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h index 7d9fc512d15..cc338530510 100644 --- a/src/Interpreters/ProfileEventsExt.h +++ b/src/Interpreters/ProfileEventsExt.h @@ -16,6 +16,7 @@ struct ProfileEventsSnapshot UInt64 thread_id; CountersIncrement counters; Int64 memory_usage; + Int64 peak_memory_usage; time_t current_time; }; diff --git a/src/Interpreters/QueryAliasesVisitor.cpp b/src/Interpreters/QueryAliasesVisitor.cpp index 18fb4b75365..148f1d305cc 100644 --- a/src/Interpreters/QueryAliasesVisitor.cpp +++ b/src/Interpreters/QueryAliasesVisitor.cpp @@ -20,15 +20,10 @@ namespace constexpr auto dummy_subquery_name_prefix = "_subquery"; - String wrongAliasMessage(const ASTPtr & ast, const ASTPtr & prev_ast, const String & alias) + PreformattedMessage wrongAliasMessage(const ASTPtr & ast, const ASTPtr & prev_ast, const String & alias) { - WriteBufferFromOwnString message; - message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":\n"; - formatAST(*ast, message, false, true); - message << "\nand\n"; - formatAST(*prev_ast, message, false, true); - message << '\n'; - return message.str(); + return PreformattedMessage::create("Different expressions with the same alias {}:\n{}\nand\n{}\n", + backQuoteIfNeed(alias), serializeAST(*ast), serializeAST(*prev_ast)); } } @@ -127,7 +122,7 @@ void QueryAliasesMatcher::visitOther(const ASTPtr & ast, Data & data) if (!alias.empty()) { if (aliases.contains(alias) && ast->getTreeHash() != aliases[alias]->getTreeHash()) - throw Exception::createDeprecated(wrongAliasMessage(ast, aliases[alias], alias), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS); + throw Exception(wrongAliasMessage(ast, aliases[alias], alias), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS); aliases[alias] = ast; } diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index ec0315c2f95..57f3968fba1 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -41,6 +41,15 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"ExceptionWhileProcessing", static_cast(EXCEPTION_WHILE_PROCESSING)} }); + auto query_cache_usage_datatype = std::make_shared( + DataTypeEnum8::Values + { + {"Unknown", static_cast(QueryCache::Usage::Unknown)}, + {"None", static_cast(QueryCache::Usage::None)}, + {"Write", static_cast(QueryCache::Usage::Write)}, + {"Read", static_cast(QueryCache::Usage::Read)} + }); + auto low_cardinality_string = std::make_shared(std::make_shared()); auto array_low_cardinality_string = std::make_shared(low_cardinality_string); @@ -109,6 +118,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"log_comment", std::make_shared()}, {"thread_ids", std::make_shared(std::make_shared())}, + {"peak_threads_usage", std::make_shared()}, {"ProfileEvents", std::make_shared(low_cardinality_string, std::make_shared())}, {"Settings", std::make_shared(low_cardinality_string, low_cardinality_string)}, @@ -126,6 +136,8 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"transaction_id", getTransactionIDDataType()}, + {"query_cache_usage", std::move(query_cache_usage_datatype)}, + {"asynchronous_read_counters", std::make_shared(low_cardinality_string, std::make_shared())}, }; } @@ -219,6 +231,8 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(threads_array); } + columns[i++]->insert(peak_threads_usage); + if (profile_counters) { auto * column = columns[i++].get(); @@ -277,6 +291,8 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(Tuple{tid.start_csn, tid.local_tid, tid.host_id}); + columns[i++]->insert(query_cache_usage); + if (async_read_counters) async_read_counters->dumpToMapColumn(columns[i++].get()); else diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index 570d1297239..fe9b7cbdbc8 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -4,8 +4,9 @@ #include #include #include -#include +#include #include +#include #include #include #include @@ -90,12 +91,15 @@ struct QueryLogElement String log_comment; std::vector thread_ids; + UInt64 peak_threads_usage = 0; std::shared_ptr profile_counters; std::shared_ptr async_read_counters; std::shared_ptr query_settings; TransactionID tid; + QueryCache::Usage query_cache_usage = QueryCache::Usage::Unknown; + static std::string name() { return "QueryLog"; } static NamesAndTypesList getNamesAndTypes(); diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/src/Interpreters/ReplaceQueryParameterVisitor.cpp index e4a052388ac..46dcc6129bc 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -66,10 +66,11 @@ void ReplaceQueryParameterVisitor::visitChildren(ASTPtr & ast) const String & ReplaceQueryParameterVisitor::getParamValue(const String & name) { auto search = query_parameters.find(name); - if (search != query_parameters.end()) - return search->second; - else + if (search == query_parameters.end()) throw Exception(ErrorCodes::UNKNOWN_QUERY_PARAMETER, "Substitution {} is not set", backQuote(name)); + + ++num_replaced_parameters; + return search->second; } void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast) @@ -131,6 +132,7 @@ void ReplaceQueryParameterVisitor::visitIdentifier(ASTPtr & ast) if (ast_identifier->children.empty()) return; + bool replaced_parameter = false; auto & name_parts = ast_identifier->name_parts; for (size_t i = 0, j = 0, size = name_parts.size(); i < size; ++i) { @@ -138,9 +140,14 @@ void ReplaceQueryParameterVisitor::visitIdentifier(ASTPtr & ast) { const auto & ast_param = ast_identifier->children[j++]->as(); name_parts[i] = getParamValue(ast_param.name); + replaced_parameter = true; } } + /// Do not touch AST if there are no parameters + if (!replaced_parameter) + return; + /// FIXME: what should this mean? if (!ast_identifier->semantic->special && name_parts.size() >= 2) ast_identifier->semantic->table = ast_identifier->name_parts.end()[-2]; diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.h b/src/Interpreters/ReplaceQueryParameterVisitor.h index dd785cd768e..7d5da7ea85b 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.h +++ b/src/Interpreters/ReplaceQueryParameterVisitor.h @@ -20,8 +20,12 @@ public: void visit(ASTPtr & ast); + size_t getNumberOfReplacedParameters() const { return num_replaced_parameters; } + private: const NameToNameMap & query_parameters; + size_t num_replaced_parameters = 0; + const String & getParamValue(const String & name); void visitIdentifier(ASTPtr & ast); void visitQueryParameter(ASTPtr & ast); diff --git a/src/Interpreters/RewriteArrayExistsFunctionVisitor.cpp b/src/Interpreters/RewriteArrayExistsFunctionVisitor.cpp index 8d1a37647ad..64e03767c49 100644 --- a/src/Interpreters/RewriteArrayExistsFunctionVisitor.cpp +++ b/src/Interpreters/RewriteArrayExistsFunctionVisitor.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { @@ -14,6 +15,28 @@ void RewriteArrayExistsFunctionMatcher::visit(ASTPtr & ast, Data & data) visit(*func, ast, data); } + else if (auto * join = ast->as()) + { + if (join->using_expression_list) + { + auto * it = std::find(join->children.begin(), join->children.end(), join->using_expression_list); + + visit(join->using_expression_list, data); + + if (it && *it != join->using_expression_list) + *it = join->using_expression_list; + } + + if (join->on_expression) + { + auto * it = std::find(join->children.begin(), join->children.end(), join->on_expression); + + visit(join->on_expression, data); + + if (it && *it != join->on_expression) + *it = join->on_expression; + } + } } void RewriteArrayExistsFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data &) @@ -76,4 +99,14 @@ void RewriteArrayExistsFunctionMatcher::visit(const ASTFunction & func, ASTPtr & } } +bool RewriteArrayExistsFunctionMatcher::needChildVisit(const ASTPtr & ast, const ASTPtr &) +{ + /// Children of ASTTableJoin are handled separately in visit() function + if (auto * join = ast->as()) + return false; + + return true; +} + + } diff --git a/src/Interpreters/RewriteArrayExistsFunctionVisitor.h b/src/Interpreters/RewriteArrayExistsFunctionVisitor.h index 763dd2d4df2..22c28fd005b 100644 --- a/src/Interpreters/RewriteArrayExistsFunctionVisitor.h +++ b/src/Interpreters/RewriteArrayExistsFunctionVisitor.h @@ -18,7 +18,7 @@ public: static void visit(ASTPtr & ast, Data &); static void visit(const ASTFunction &, ASTPtr & ast, Data &); - static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } + static bool needChildVisit(const ASTPtr & ast, const ASTPtr &); }; using RewriteArrayExistsFunctionVisitor = InDepthNodeVisitor; diff --git a/src/Interpreters/RewriteUniqToCountVisitor.cpp b/src/Interpreters/RewriteUniqToCountVisitor.cpp new file mode 100644 index 00000000000..7445068207a --- /dev/null +++ b/src/Interpreters/RewriteUniqToCountVisitor.cpp @@ -0,0 +1,163 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +using Aliases = std::unordered_map; + +namespace +{ + +bool matchFnUniq(String func_name) +{ + auto name = Poco::toLower(func_name); + return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta" || name == "uniqCombined" + || name == "uniqCombined64"; +} + +bool expressionEquals(const ASTPtr & lhs, const ASTPtr & rhs, const Aliases & alias) +{ + if (lhs->getTreeHash() == rhs->getTreeHash()) + { + return true; + } + else + { + auto * lhs_idf = lhs->as(); + auto * rhs_idf = rhs->as(); + if (lhs_idf && rhs_idf) + { + /// compound identifiers, such as: + if (lhs_idf->shortName() == rhs_idf->shortName()) + return true; + + /// translate alias + if (alias.find(lhs_idf->shortName()) != alias.end()) + lhs_idf = alias.find(lhs_idf->shortName())->second->as(); + + if (alias.find(rhs_idf->shortName()) != alias.end()) + rhs_idf = alias.find(rhs_idf->shortName())->second->as(); + + if (lhs_idf->shortName() == rhs_idf->shortName()) + return true; + } + } + return false; +} + +bool expressionListEquals(ASTExpressionList * lhs, ASTExpressionList * rhs, const Aliases & alias) +{ + if (!lhs || !rhs) + return false; + if (lhs->children.size() != rhs->children.size()) + return false; + for (size_t i = 0; i < lhs->children.size(); i++) + { + if (!expressionEquals(lhs->children[i], rhs->children[i], alias)) + return false; + } + return true; +} + +/// Test whether lhs contains all expressions in rhs. +bool expressionListContainsAll(ASTExpressionList * lhs, ASTExpressionList * rhs, const Aliases & alias) +{ + if (!lhs || !rhs) + return false; + if (lhs->children.size() < rhs->children.size()) + return false; + for (const auto & re : rhs->children) + { + auto predicate = [&re, &alias](ASTPtr & le) { return expressionEquals(le, re, alias); }; + if (std::find_if(lhs->children.begin(), lhs->children.end(), predicate) == lhs->children.end()) + return false; + } + return true; +} + +} + +void RewriteUniqToCountMatcher::visit(ASTPtr & ast, Data & /*data*/) +{ + auto * selectq = ast->as(); + if (!selectq || !selectq->tables() || selectq->tables()->children.size() != 1) + return; + auto expr_list = selectq->select(); + if (!expr_list || expr_list->children.size() != 1) + return; + auto * func = expr_list->children[0]->as(); + if (!func || !matchFnUniq(func->name)) + return; + if (selectq->tables()->as()->children[0]->as()->children.size() != 1) + return; + auto * table_expr = selectq->tables() + ->as() + ->children[0] + ->as() + ->children[0] + ->as(); + if (!table_expr || table_expr->children.size() != 1 || !table_expr->subquery) + return; + auto * subquery = table_expr->subquery->as(); + if (!subquery) + return; + auto * sub_selectq = subquery->children[0] + ->as()->children[0] + ->as()->children[0] + ->as(); + if (!sub_selectq) + return; + auto sub_expr_list = sub_selectq->select(); + if (!sub_expr_list) + return; + + /// collect subquery select expressions alias + Aliases alias; + for (const auto & expr : sub_expr_list->children) + { + if (!expr->tryGetAlias().empty()) + alias.insert({expr->tryGetAlias(), expr}); + } + + /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' + auto match_subquery_with_distinct = [&]() -> bool + { + if (!sub_selectq->distinct) + return false; + /// uniq expression list == subquery group by expression list + if (!expressionListEquals(func->children[0]->as(), sub_expr_list->as(), alias)) + return false; + return true; + }; + + /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' + auto match_subquery_with_group_by = [&]() -> bool + { + auto group_by = sub_selectq->groupBy(); + if (!group_by) + return false; + /// uniq expression list == subquery group by expression list + if (!expressionListEquals(func->children[0]->as(), group_by->as(), alias)) + return false; + /// subquery select expression list must contain all columns in uniq expression list + if (!expressionListContainsAll(sub_expr_list->as(), func->children[0]->as(), alias)) + return false; + return true; + }; + + if (match_subquery_with_distinct() || match_subquery_with_group_by()) + expr_list->children[0] = makeASTFunction("count"); +} + +} diff --git a/src/Interpreters/RewriteUniqToCountVisitor.h b/src/Interpreters/RewriteUniqToCountVisitor.h new file mode 100644 index 00000000000..94528ccf2ee --- /dev/null +++ b/src/Interpreters/RewriteUniqToCountVisitor.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include +#include "Interpreters/TreeRewriter.h" + +namespace DB +{ + +class ASTFunction; + +/** Optimize `uniq` into `count` over subquery. + * Example: 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' to + * Result: 'SELECT count() FROM (SELECT DISTINCT x ...)' + * + * Example: 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' to + * Result: 'SELECT count() FROM (SELECT x ... GROUP BY x ...)' + * + * Note that we can rewrite all uniq variants except uniqUpTo. + */ +class RewriteUniqToCountMatcher +{ +public: + struct Data {}; + static void visit(ASTPtr & ast, Data &); + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } +}; + +using RewriteUniqToCountVisitor = InDepthNodeVisitor; +} diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 0fbcfc9e6a1..84d31bae13f 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -14,7 +15,6 @@ #include #include -#include #include #include #include @@ -24,6 +24,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int INVALID_SETTING_VALUE; +} + namespace { @@ -52,19 +57,23 @@ ServerAsynchronousMetrics::ServerAsynchronousMetrics( : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_) , WithContext(global_context_) , heavy_metric_update_period(heavy_metrics_update_period_seconds) -{} +{ + /// sanity check + if (update_period_seconds == 0 || heavy_metrics_update_period_seconds == 0) + throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting asynchronous_metrics_update_period_s and asynchronous_heavy_metrics_update_period_s must not be zero"); +} void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values, TimePoint update_time, TimePoint current_time) { if (auto mark_cache = getContext()->getMarkCache()) { - new_values["MarkCacheBytes"] = { mark_cache->weight(), "Total size of mark cache in bytes" }; + new_values["MarkCacheBytes"] = { mark_cache->sizeInBytes(), "Total size of mark cache in bytes" }; new_values["MarkCacheFiles"] = { mark_cache->count(), "Total number of mark files cached in the mark cache" }; } if (auto uncompressed_cache = getContext()->getUncompressedCache()) { - new_values["UncompressedCacheBytes"] = { uncompressed_cache->weight(), + new_values["UncompressedCacheBytes"] = { uncompressed_cache->sizeInBytes(), "Total size of uncompressed cache in bytes. Uncompressed cache does not usually improve the performance and should be mostly avoided." }; new_values["UncompressedCacheCells"] = { uncompressed_cache->count(), "Total number of entries in the uncompressed cache. Each entry represents a decompressed block of data. Uncompressed cache does not usually improve performance and should be mostly avoided." }; @@ -72,13 +81,13 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values if (auto index_mark_cache = getContext()->getIndexMarkCache()) { - new_values["IndexMarkCacheBytes"] = { index_mark_cache->weight(), "Total size of mark cache for secondary indices in bytes." }; + new_values["IndexMarkCacheBytes"] = { index_mark_cache->sizeInBytes(), "Total size of mark cache for secondary indices in bytes." }; new_values["IndexMarkCacheFiles"] = { index_mark_cache->count(), "Total number of mark files cached in the mark cache for secondary indices." }; } if (auto index_uncompressed_cache = getContext()->getIndexUncompressedCache()) { - new_values["IndexUncompressedCacheBytes"] = { index_uncompressed_cache->weight(), + new_values["IndexUncompressedCacheBytes"] = { index_uncompressed_cache->sizeInBytes(), "Total size of uncompressed cache in bytes for secondary indices. Uncompressed cache does not usually improve the performance and should be mostly avoided." }; new_values["IndexUncompressedCacheCells"] = { index_uncompressed_cache->count(), "Total number of entries in the uncompressed cache for secondary indices. Each entry represents a decompressed block of data. Uncompressed cache does not usually improve performance and should be mostly avoided." }; @@ -92,6 +101,12 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values " The files opened with `mmap` are kept in the cache to avoid costly TLB flushes."}; } + if (auto query_cache = getContext()->getQueryCache()) + { + new_values["QueryCacheBytes"] = { query_cache->sizeInBytes(), "Total size of the query cache in bytes." }; + new_values["QueryCacheEntries"] = { query_cache->count(), "Total number of entries in the query cache." }; + } + { auto caches = FileCacheFactory::instance().getAll(); size_t total_bytes = 0; @@ -109,18 +124,10 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values "Total number of cached file segments in the `cache` virtual filesystem. This cache is hold on disk." }; } -#if USE_ROCKSDB - if (auto metadata_cache = getContext()->tryGetMergeTreeMetadataCache()) - { - new_values["MergeTreeMetadataCacheSize"] = { metadata_cache->getEstimateNumKeys(), - "The size of the metadata cache for tables. This cache is experimental and not used in production." }; - } -#endif - #if USE_EMBEDDED_COMPILER if (auto * compiled_expression_cache = CompiledExpressionCacheFactory::instance().tryGetCache()) { - new_values["CompiledExpressionCacheBytes"] = { compiled_expression_cache->weight(), + new_values["CompiledExpressionCacheBytes"] = { compiled_expression_cache->sizeInBytes(), "Total bytes used for the cache of JIT-compiled code." }; new_values["CompiledExpressionCacheCount"] = { compiled_expression_cache->count(), "Total entries in the cache of JIT-compiled code." }; @@ -236,15 +243,26 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values size_t total_number_of_rows = 0; size_t total_number_of_parts = 0; + size_t total_number_of_tables_system = 0; + + size_t total_number_of_bytes_system = 0; + size_t total_number_of_rows_system = 0; + size_t total_number_of_parts_system = 0; + for (const auto & db : databases) { /// Check if database can contain MergeTree tables if (!db.second->canContainMergeTreeTables()) continue; + bool is_system = db.first == DatabaseCatalog::SYSTEM_DATABASE; + for (auto iterator = db.second->getTablesIterator(getContext()); iterator->isValid(); iterator->next()) { ++total_number_of_tables; + if (is_system) + ++total_number_of_tables_system; + const auto & table = iterator->table(); if (!table) continue; @@ -254,9 +272,21 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values const auto & settings = getContext()->getSettingsRef(); calculateMax(max_part_count_for_partition, table_merge_tree->getMaxPartsCountAndSizeForPartition().first); - total_number_of_bytes += table_merge_tree->totalBytes(settings).value(); - total_number_of_rows += table_merge_tree->totalRows(settings).value(); - total_number_of_parts += table_merge_tree->getActivePartsCount(); + + size_t bytes = table_merge_tree->totalBytes(settings).value(); + size_t rows = table_merge_tree->totalRows(settings).value(); + size_t parts = table_merge_tree->getActivePartsCount(); + + total_number_of_bytes += bytes; + total_number_of_rows += rows; + total_number_of_parts += parts; + + if (is_system) + { + total_number_of_bytes_system += bytes; + total_number_of_rows_system += rows; + total_number_of_parts_system += parts; + } } if (StorageReplicatedMergeTree * table_replicated_merge_tree = typeid_cast(table.get())) @@ -310,6 +340,12 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values new_values["TotalRowsOfMergeTreeTables"] = { total_number_of_rows, "Total amount of rows (records) stored in all tables of MergeTree family." }; new_values["TotalPartsOfMergeTreeTables"] = { total_number_of_parts, "Total amount of data parts in all tables of MergeTree family." " Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key." }; + + new_values["NumberOfTablesSystem"] = { total_number_of_tables_system, "Total number of tables in the system database on the server stored in tables of MergeTree family."}; + + new_values["TotalBytesOfMergeTreeTablesSystem"] = { total_number_of_bytes_system, "Total amount of bytes (compressed, including data and indices) stored in tables of MergeTree family in the system database." }; + new_values["TotalRowsOfMergeTreeTablesSystem"] = { total_number_of_rows_system, "Total amount of rows (records) stored in tables of MergeTree family in the system database." }; + new_values["TotalPartsOfMergeTreeTablesSystem"] = { total_number_of_parts_system, "Total amount of data parts in tables of MergeTree family in the system database." }; } #if USE_NURAFT diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 97b056cfc32..439bf6056ba 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -3,11 +3,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -200,7 +202,6 @@ private: LOG_TEST(log, "Schedule closing session with session_id: {}, user_id: {}", session.key.second, session.key.first); - } void cleanThread() @@ -240,7 +241,7 @@ private: if (session != sessions.end() && session->second->close_cycle <= current_cycle) { - if (!session->second.unique()) + if (session->second.use_count() != 1) { LOG_TEST(log, "Delay closing session with session_id: {}, user_id: {}", key.second, key.first); @@ -298,9 +299,9 @@ Session::~Session() if (notified_session_log_about_login) { + LOG_DEBUG(log, "{} Logout, user_id: {}", toString(auth_id), toString(*user_id)); if (auto session_log = getSessionLog()) { - /// TODO: We have to ensure that the same info is added to the session log on a LoginSuccess event and on the corresponding Logout event. session_log->addLogOut(auth_id, user, getClientInfo()); } } @@ -319,6 +320,7 @@ AuthenticationType Session::getAuthenticationTypeOrLogInFailure(const String & u } catch (const Exception & e) { + LOG_ERROR(log, "{} Authentication failed with error: {}", toString(auth_id), e.what()); if (auto session_log = getSessionLog()) session_log->addLoginFailure(auth_id, getClientInfo(), user_name, e); @@ -336,6 +338,9 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So if (session_context) throw Exception(ErrorCodes::LOGICAL_ERROR, "If there is a session context it must be created after authentication"); + if (session_tracker_handle) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session tracker handle was created before authentication finish"); + auto address = address_; if ((address == Poco::Net::SocketAddress{}) && (prepared_client_info->interface == ClientInfo::Interface::LOCAL)) address = Poco::Net::SocketAddress{"127.0.0.1", 0}; @@ -490,6 +495,8 @@ ContextMutablePtr Session::makeSessionContext() throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created before any query context"); if (!user_id) throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication"); + if (session_tracker_handle) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session tracker handle was created before making session"); LOG_DEBUG(log, "{} Creating session context with user_id: {}", toString(auth_id), toString(*user_id)); @@ -503,13 +510,19 @@ ContextMutablePtr Session::makeSessionContext() prepared_client_info.reset(); /// Set user information for the new context: current profiles, roles, access rights. - if (user_id) - new_session_context->setUser(*user_id); + new_session_context->setUser(*user_id); /// Session context is ready. session_context = new_session_context; user = session_context->getUser(); + session_tracker_handle = session_context->getSessionTracker().trackSession( + *user_id, + {}, + session_context->getSettingsRef().max_sessions_for_user); + + recordLoginSucess(session_context); + return session_context; } @@ -521,6 +534,8 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created before any query context"); if (!user_id) throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication"); + if (session_tracker_handle) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session tracker handle was created before making session"); LOG_DEBUG(log, "{} Creating named session context with name: {}, user_id: {}", toString(auth_id), session_name_, toString(*user_id)); @@ -541,9 +556,23 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: new_session_context->setClientInfo(*prepared_client_info); prepared_client_info.reset(); + auto access = new_session_context->getAccess(); + UInt64 max_sessions_for_user = 0; /// Set user information for the new context: current profiles, roles, access rights. - if (user_id && !new_session_context->getAccess()->tryGetUser()) + if (!access->tryGetUser()) + { new_session_context->setUser(*user_id); + max_sessions_for_user = new_session_context->getSettingsRef().max_sessions_for_user; + } + else + { + // Always get setting from profile + // profile can be changed by ALTER PROFILE during single session + auto settings = access->getDefaultSettings(); + const Field * max_session_for_user_field = settings.tryGet("max_sessions_for_user"); + if (max_session_for_user_field) + max_sessions_for_user = max_session_for_user_field->safeGet(); + } /// Session context is ready. session_context = std::move(new_session_context); @@ -551,6 +580,13 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: named_session_created = new_named_session_created; user = session_context->getUser(); + session_tracker_handle = session_context->getSessionTracker().trackSession( + *user_id, + { session_name_ }, + max_sessions_for_user); + + recordLoginSucess(session_context); + return session_context; } @@ -585,7 +621,7 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t if (auto query_context_user = query_context->getAccess()->tryGetUser()) { - LOG_DEBUG(log, "{} Creating query context from {} context, user_id: {}, parent context user: {}", + LOG_TRACE(log, "{} Creating query context from {} context, user_id: {}, parent context user: {}", toString(auth_id), from_session_context ? "session" : "global", toString(*user_id), @@ -624,24 +660,38 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t if (user_id) user = query_context->getUser(); - if (!notified_session_log_about_login) - { - if (auto session_log = getSessionLog()) - { - session_log->addLoginSuccess( - auth_id, - named_session ? std::optional(named_session->key.second) : std::nullopt, - *query_context, - user); - - notified_session_log_about_login = true; - } - } + /// Interserver does not create session context + recordLoginSucess(query_context); return query_context; } +void Session::recordLoginSucess(ContextPtr login_context) const +{ + if (notified_session_log_about_login) + return; + + if (!login_context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session or query context must be created"); + + if (auto session_log = getSessionLog()) + { + const auto & settings = login_context->getSettingsRef(); + const auto access = login_context->getAccess(); + + session_log->addLoginSuccess(auth_id, + named_session ? named_session->key.second : "", + settings, + access, + getClientInfo(), + user); + } + + notified_session_log_about_login = true; +} + + void Session::releaseSessionID() { if (!named_session) diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 36f811ccd24..81ef987b428 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -96,6 +97,8 @@ public: private: std::shared_ptr getSessionLog() const; ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const; + void recordLoginSucess(ContextPtr login_context) const; + mutable bool notified_session_log_about_login = false; const UUID auth_id; @@ -113,6 +116,8 @@ private: std::shared_ptr named_session; bool named_session_created = false; + SessionTracker::SessionTrackerHandle session_tracker_handle; + Poco::Logger * log = nullptr; }; diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index c930013e52b..b847eaf9824 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -199,12 +199,13 @@ void SessionLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(auth_failure_reason.data(), auth_failure_reason.length()); } -void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional session_id, const Context & login_context, const UserPtr & login_user) +void SessionLog::addLoginSuccess(const UUID & auth_id, + const String & session_id, + const Settings & settings, + const ContextAccessPtr & access, + const ClientInfo & client_info, + const UserPtr & login_user) { - const auto access = login_context.getAccess(); - const auto & settings = login_context.getSettingsRef(); - const auto & client_info = login_context.getClientInfo(); - DB::SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS); log_entry.client_info = client_info; @@ -215,8 +216,7 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional ses } log_entry.external_auth_server = login_user ? login_user->auth_data.getLDAPServerName() : ""; - if (session_id) - log_entry.session_id = *session_id; + log_entry.session_id = session_id; if (const auto roles_info = access->getRolesInfo()) log_entry.roles = roles_info->getCurrentRolesNames(); @@ -227,7 +227,7 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional ses for (const auto & s : settings.allChanged()) log_entry.settings.emplace_back(s.getName(), s.getValueString()); - add(log_entry); + add(std::move(log_entry)); } void SessionLog::addLoginFailure( @@ -243,7 +243,7 @@ void SessionLog::addLoginFailure( log_entry.client_info = info; log_entry.user_identified_with = AuthenticationType::NO_PASSWORD; - add(log_entry); + add(std::move(log_entry)); } void SessionLog::addLogOut(const UUID & auth_id, const UserPtr & login_user, const ClientInfo & client_info) @@ -257,7 +257,7 @@ void SessionLog::addLogOut(const UUID & auth_id, const UserPtr & login_user, con log_entry.external_auth_server = login_user ? login_user->auth_data.getLDAPServerName() : ""; log_entry.client_info = client_info; - add(log_entry); + add(std::move(log_entry)); } } diff --git a/src/Interpreters/SessionLog.h b/src/Interpreters/SessionLog.h index 1282ac09c4d..8757bc12270 100644 --- a/src/Interpreters/SessionLog.h +++ b/src/Interpreters/SessionLog.h @@ -20,6 +20,7 @@ enum SessionLogElementType : int8_t class ContextAccess; struct User; using UserPtr = std::shared_ptr; +using ContextAccessPtr = std::shared_ptr; /** A struct which will be inserted as row into session_log table. * @@ -72,7 +73,13 @@ class SessionLog : public SystemLog using SystemLog::SystemLog; public: - void addLoginSuccess(const UUID & auth_id, std::optional session_id, const Context & login_context, const UserPtr & login_user); + void addLoginSuccess(const UUID & auth_id, + const String & session_id, + const Settings & settings, + const ContextAccessPtr & access, + const ClientInfo & client_info, + const UserPtr & login_user); + void addLoginFailure(const UUID & auth_id, const ClientInfo & info, const std::optional & user, const Exception & reason); void addLogOut(const UUID & auth_id, const UserPtr & login_user, const ClientInfo & client_info); }; diff --git a/src/Interpreters/SessionTracker.cpp b/src/Interpreters/SessionTracker.cpp new file mode 100644 index 00000000000..4636766e288 --- /dev/null +++ b/src/Interpreters/SessionTracker.cpp @@ -0,0 +1,62 @@ +#include "SessionTracker.h" + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int USER_SESSION_LIMIT_EXCEEDED; +} + +SessionTracker::Session::Session(SessionTracker & tracker_, + const UUID& user_id_, + SessionInfos::const_iterator session_info_iter_) noexcept + : tracker(tracker_), user_id(user_id_), session_info_iter(session_info_iter_) +{ +} + +SessionTracker::Session::~Session() +{ + tracker.stopTracking(user_id, session_info_iter); +} + +SessionTracker::SessionTrackerHandle +SessionTracker::trackSession(const UUID & user_id, + const SessionInfo & session_info, + size_t max_sessions_for_user) +{ + std::lock_guard lock(mutex); + + auto sessions_for_user_iter = sessions_for_user.find(user_id); + if (sessions_for_user_iter == sessions_for_user.end()) + sessions_for_user_iter = sessions_for_user.emplace(user_id, SessionInfos()).first; + + SessionInfos & session_infos = sessions_for_user_iter->second; + if (max_sessions_for_user && session_infos.size() >= max_sessions_for_user) + { + throw Exception(ErrorCodes::USER_SESSION_LIMIT_EXCEEDED, + "User {} has overflown session count {}", + toString(user_id), + max_sessions_for_user); + } + + session_infos.emplace_front(session_info); + + return std::make_unique(*this, user_id, session_infos.begin()); +} + +void SessionTracker::stopTracking(const UUID& user_id, SessionInfos::const_iterator session_info_iter) +{ + std::lock_guard lock(mutex); + + auto sessions_for_user_iter = sessions_for_user.find(user_id); + chassert(sessions_for_user_iter != sessions_for_user.end()); + + sessions_for_user_iter->second.erase(session_info_iter); + if (sessions_for_user_iter->second.empty()) + sessions_for_user.erase(sessions_for_user_iter); +} + +} diff --git a/src/Interpreters/SessionTracker.h b/src/Interpreters/SessionTracker.h new file mode 100644 index 00000000000..0827213aeed --- /dev/null +++ b/src/Interpreters/SessionTracker.h @@ -0,0 +1,60 @@ +#pragma once + +#include "ClientInfo.h" + +#include +#include +#include +#include + +namespace DB +{ + +struct SessionInfo +{ + const String session_id; +}; + +using SessionInfos = std::list; + +using SessionsForUser = std::unordered_map; + +class SessionTracker; + +class SessionTracker +{ +public: + class Session : boost::noncopyable + { + public: + explicit Session(SessionTracker & tracker_, + const UUID & user_id_, + SessionInfos::const_iterator session_info_iter_) noexcept; + + ~Session(); + + private: + friend class SessionTracker; + + SessionTracker & tracker; + const UUID user_id; + const SessionInfos::const_iterator session_info_iter; + }; + + using SessionTrackerHandle = std::unique_ptr; + + SessionTrackerHandle trackSession(const UUID & user_id, + const SessionInfo & session_info, + size_t max_sessions_for_user); + +private: + /// disallow manual messing with session tracking + friend class Session; + + std::mutex mutex; + SessionsForUser sessions_for_user TSA_GUARDED_BY(mutex); + + void stopTracking(const UUID& user_id, SessionInfos::const_iterator session_info_iter); +}; + +} diff --git a/src/Interpreters/SortedBlocksWriter.cpp b/src/Interpreters/SortedBlocksWriter.cpp index e09a66a38e6..c9f48ee1be9 100644 --- a/src/Interpreters/SortedBlocksWriter.cpp +++ b/src/Interpreters/SortedBlocksWriter.cpp @@ -39,7 +39,7 @@ namespace TemporaryFileOnDiskHolder flushToFile(const DiskPtr & disk, const Block & header, QueryPipelineBuilder pipeline, const String & codec) { auto tmp_file = std::make_unique(disk, CurrentMetrics::TemporaryFilesForJoin); - auto write_stat = TemporaryFileStreamLegacy::write(tmp_file->getPath(), header, std::move(pipeline), codec); + auto write_stat = TemporaryFileStreamLegacy::write(tmp_file->getAbsolutePath(), header, std::move(pipeline), codec); ProfileEvents::increment(ProfileEvents::ExternalProcessingCompressedBytesTotal, write_stat.compressed_bytes); ProfileEvents::increment(ProfileEvents::ExternalProcessingUncompressedBytesTotal, write_stat.uncompressed_bytes); @@ -267,7 +267,7 @@ SortedBlocksWriter::SortedFiles SortedBlocksWriter::finishMerge(std::function(file->getPath(), materializeBlock(sample_block))); + return Pipe(std::make_shared(file->getAbsolutePath(), materializeBlock(sample_block))); } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 0b89b1dec26..07ef6c33d29 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,11 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } +namespace ActionLocks +{ + extern const StorageActionBlockType PartsMerge; +} + namespace { class StorageWithComment : public IAST @@ -101,7 +107,6 @@ namespace namespace { -constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500; constexpr size_t DEFAULT_METRIC_LOG_COLLECT_INTERVAL_MILLISECONDS = 1000; /// Creates a system log with MergeTree engine using parameters from config @@ -124,18 +129,24 @@ std::shared_ptr createSystemLog( LOG_DEBUG(&Poco::Logger::get("SystemLog"), "Creating {}.{} from {}", default_database_name, default_table_name, config_prefix); - String database = config.getString(config_prefix + ".database", default_database_name); - String table = config.getString(config_prefix + ".table", default_table_name); + SystemLogSettings log_settings; - if (database != default_database_name) + log_settings.queue_settings.database = config.getString(config_prefix + ".database", default_database_name); + log_settings.queue_settings.table = config.getString(config_prefix + ".table", default_table_name); + + if (log_settings.queue_settings.database != default_database_name) { /// System tables must be loaded before other tables, but loading order is undefined for all databases except `system` - LOG_ERROR(&Poco::Logger::get("SystemLog"), "Custom database name for a system table specified in config." - " Table `{}` will be created in `system` database instead of `{}`", table, database); - database = default_database_name; + LOG_ERROR( + &Poco::Logger::get("SystemLog"), + "Custom database name for a system table specified in config." + " Table `{}` will be created in `system` database instead of `{}`", + log_settings.queue_settings.table, + log_settings.queue_settings.database); + + log_settings.queue_settings.database = default_database_name; } - String engine; if (config.has(config_prefix + ".engine")) { if (config.has(config_prefix + ".partition_by")) @@ -159,26 +170,26 @@ std::shared_ptr createSystemLog( "If 'engine' is specified for system table, SETTINGS parameters should " "be specified directly inside 'engine' and 'settings' setting doesn't make sense"); - engine = config.getString(config_prefix + ".engine"); + log_settings.engine = config.getString(config_prefix + ".engine"); } else { /// ENGINE expr is necessary. - engine = "ENGINE = MergeTree"; + log_settings.engine = "ENGINE = MergeTree"; /// PARTITION expr is not necessary. String partition_by = config.getString(config_prefix + ".partition_by", "toYYYYMM(event_date)"); if (!partition_by.empty()) - engine += " PARTITION BY (" + partition_by + ")"; + log_settings.engine += " PARTITION BY (" + partition_by + ")"; /// TTL expr is not necessary. String ttl = config.getString(config_prefix + ".ttl", ""); if (!ttl.empty()) - engine += " TTL " + ttl; + log_settings.engine += " TTL " + ttl; /// ORDER BY expr is necessary. String order_by = config.getString(config_prefix + ".order_by", TSystemLog::getDefaultOrderBy()); - engine += " ORDER BY (" + order_by + ")"; + log_settings.engine += " ORDER BY (" + order_by + ")"; /// SETTINGS expr is not necessary. /// https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#settings @@ -188,24 +199,52 @@ std::shared_ptr createSystemLog( String settings = config.getString(config_prefix + ".settings", ""); if (!storage_policy.empty() || !settings.empty()) { - engine += " SETTINGS"; + log_settings.engine += " SETTINGS"; /// If 'storage_policy' is repeated, the 'settings' configuration is preferred. if (!storage_policy.empty()) - engine += " storage_policy = " + quoteString(storage_policy); + log_settings.engine += " storage_policy = " + quoteString(storage_policy); if (!settings.empty()) - engine += (storage_policy.empty() ? " " : ", ") + settings; + log_settings.engine += (storage_policy.empty() ? " " : ", ") + settings; } } /// Validate engine definition syntax to prevent some configuration errors. ParserStorageWithComment storage_parser; - parseQuery(storage_parser, engine.data(), engine.data() + engine.size(), + parseQuery(storage_parser, log_settings.engine.data(), log_settings.engine.data() + log_settings.engine.size(), "Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - size_t flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds", - DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS); + log_settings.queue_settings.flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds", + TSystemLog::getDefaultFlushIntervalMilliseconds()); - return std::make_shared(context, database, table, engine, flush_interval_milliseconds); + log_settings.queue_settings.max_size_rows = config.getUInt64(config_prefix + ".max_size_rows", + TSystemLog::getDefaultMaxSize()); + + if (log_settings.queue_settings.max_size_rows < 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{0}.max_size_rows {1} should be 1 at least", + config_prefix, + log_settings.queue_settings.max_size_rows); + + log_settings.queue_settings.reserved_size_rows = config.getUInt64(config_prefix + ".reserved_size_rows", + TSystemLog::getDefaultReservedSize()); + + if (log_settings.queue_settings.max_size_rows < log_settings.queue_settings.reserved_size_rows) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "{0}.max_size_rows {1} should be greater or equal to {0}.reserved_size_rows {2}", + config_prefix, + log_settings.queue_settings.max_size_rows, + log_settings.queue_settings.reserved_size_rows); + } + + log_settings.queue_settings.buffer_size_rows_flush_threshold = config.getUInt64(config_prefix + ".buffer_size_rows_flush_threshold", + log_settings.queue_settings.max_size_rows / 2); + + log_settings.queue_settings.notify_flush_on_crash = config.getBool(config_prefix + ".flush_on_crash", + TSystemLog::shouldNotifyFlushOnCrash()); + + log_settings.queue_settings.turn_off_logger = TSystemLog::shouldTurnOffLogger(); + + return std::make_shared(context, log_settings); } @@ -249,6 +288,7 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf global_context, "system", "transactions_info_log", config, "transactions_info_log"); processors_profile_log = createSystemLog(global_context, "system", "processors_profile_log", config, "processors_profile_log"); asynchronous_insert_log = createSystemLog(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log"); + backup_log = createSystemLog(global_context, "system", "backup_log", config, "backup_log"); if (query_log) logs.emplace_back(query_log.get()); @@ -287,6 +327,8 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf logs.emplace_back(filesystem_read_prefetches_log.get()); if (asynchronous_insert_log) logs.emplace_back(asynchronous_insert_log.get()); + if (backup_log) + logs.emplace_back(backup_log.get()); try { @@ -325,23 +367,25 @@ void SystemLogs::shutdown() log->shutdown(); } +void SystemLogs::handleCrash() +{ + for (auto & log : logs) + log->handleCrash(); +} template SystemLog::SystemLog( ContextPtr context_, - const String & database_name_, - const String & table_name_, - const String & storage_def_, - size_t flush_interval_milliseconds_, + const SystemLogSettings & settings_, std::shared_ptr> queue_) - : Base(database_name_ + "." + table_name_, flush_interval_milliseconds_, queue_) + : Base(settings_.queue_settings, queue_) , WithContext(context_) - , log(&Poco::Logger::get("SystemLog (" + database_name_ + "." + table_name_ + ")")) - , table_id(database_name_, table_name_) - , storage_def(storage_def_) + , log(&Poco::Logger::get("SystemLog (" + settings_.queue_settings.database + "." + settings_.queue_settings.table + ")")) + , table_id(settings_.queue_settings.database, settings_.queue_settings.table) + , storage_def(settings_.engine) , create_query(serializeAST(*getCreateTableQuery())) { - assert(database_name_ == DatabaseCatalog::SYSTEM_DATABASE); + assert(settings_.queue_settings.database == DatabaseCatalog::SYSTEM_DATABASE); } template @@ -526,6 +570,10 @@ void SystemLog::prepareTable() rename->elements.emplace_back(std::move(elem)); + ActionLock merges_lock; + if (DatabaseCatalog::instance().getDatabase(table_id.database_name)->getUUID() == UUIDHelpers::Nil) + merges_lock = table->getActionLock(ActionLocks::PartsMerge); + auto query_context = Context::createCopy(context); /// As this operation is performed automatically we don't want it to fail because of user dependencies on log tables query_context->setSetting("check_table_dependencies", Field{false}); diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 5d8bb30150d..ec04e1f4162 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -49,6 +49,7 @@ class ProcessorsProfileLog; class FilesystemCacheLog; class FilesystemReadPrefetchesLog; class AsynchronousInsertLog; +class BackupLog; /// System logs should be destroyed in destructor of the last Context and before tables, /// because SystemLog destruction makes insert query while flushing data into underlying tables @@ -58,6 +59,7 @@ struct SystemLogs ~SystemLogs(); void shutdown(); + void handleCrash(); std::shared_ptr query_log; /// Used to log queries. std::shared_ptr query_thread_log; /// Used to log query threads. @@ -83,10 +85,18 @@ struct SystemLogs /// Used to log processors profiling std::shared_ptr processors_profile_log; std::shared_ptr asynchronous_insert_log; + /// Backup and restore events + std::shared_ptr backup_log; std::vector logs; }; +struct SystemLogSettings +{ + SystemLogQueueSettings queue_settings; + + String engine; +}; template class SystemLog : public SystemLogBase, private boost::noncopyable, WithContext @@ -103,13 +113,9 @@ public: * where N - is a minimal number from 1, for that table with corresponding name doesn't exist yet; * and new table get created - as if previous table was not exist. */ - SystemLog( - ContextPtr context_, - const String & database_name_, - const String & table_name_, - const String & storage_def_, - size_t flush_interval_milliseconds_, - std::shared_ptr> queue_ = nullptr); + SystemLog(ContextPtr context_, + const SystemLogSettings & settings_, + std::shared_ptr> queue_ = nullptr); /** Append a record into log. * Writing to table will be done asynchronously and in case of failure, record could be lost. @@ -128,8 +134,6 @@ protected: using Base::queue; private: - - /* Saving thread data */ const StorageID table_id; const String storage_def; diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index cabd0be1aa3..1701889f855 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -10,6 +10,9 @@ #include #include +#include +#include +#include #include @@ -40,6 +43,7 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; + extern const int NOT_FOUND_COLUMN_IN_BLOCK; } namespace @@ -135,7 +139,12 @@ void TableJoin::resetCollected() void TableJoin::addUsingKey(const ASTPtr & ast) { - addKey(ast->getColumnName(), renamedRightColumnName(ast->getAliasOrColumnName()), ast); + /** For USING key and right key AST are the same. + * Example: + * SELECT ... FROM t1 JOIN t2 USING (key) + * Both key_asts_left and key_asts_right will reference the same ASTIdentifer `key` + */ + addKey(ast->getColumnName(), renamedRightColumnName(ast->getAliasOrColumnName()), ast, ast); } void TableJoin::addDisjunct() @@ -146,9 +155,9 @@ void TableJoin::addDisjunct() throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StorageJoin with ORs is not supported"); } -void TableJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast) +void TableJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast, bool null_safe_comparison) { - addKey(left_table_ast->getColumnName(), right_table_ast->getAliasOrColumnName(), left_table_ast, right_table_ast); + addKey(left_table_ast->getColumnName(), right_table_ast->getAliasOrColumnName(), left_table_ast, right_table_ast, null_safe_comparison); right_key_aliases[right_table_ast->getColumnName()] = right_table_ast->getAliasOrColumnName(); } @@ -425,55 +434,180 @@ static void renameIfNeeded(String & name, const NameToNameMap & renames) name = it->second; } +static void makeColumnNameUnique(const ColumnsWithTypeAndName & source_columns, String & name) +{ + for (const auto & source_col : source_columns) + { + if (source_col.name != name) + continue; + + /// Duplicate found, slow path + NameSet names; + for (const auto & col : source_columns) + names.insert(col.name); + + String base_name = name; + for (size_t i = 0; ; ++i) + { + name = base_name + "_" + toString(i); + if (!names.contains(name)) + return; + } + } +} + +static ActionsDAGPtr createWrapWithTupleActions( + const ColumnsWithTypeAndName & source_columns, + std::unordered_set && column_names_to_wrap, + NameToNameMap & new_names) +{ + if (column_names_to_wrap.empty()) + return nullptr; + + auto actions_dag = std::make_shared(source_columns); + + FunctionOverloadResolverPtr func_builder = std::make_unique(std::make_shared()); + + for (const auto * input_node : actions_dag->getInputs()) + { + const auto & column_name = input_node->result_name; + auto it = column_names_to_wrap.find(column_name); + if (it == column_names_to_wrap.end()) + continue; + column_names_to_wrap.erase(it); + + String node_name = "__wrapNullsafe(" + column_name + ")"; + makeColumnNameUnique(source_columns, node_name); + + const auto & dst_node = actions_dag->addFunction(func_builder, {input_node}, node_name); + new_names[column_name] = dst_node.result_name; + actions_dag->addOrReplaceInOutputs(dst_node); + } + + if (!column_names_to_wrap.empty()) + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Can't find columns {} in input columns [{}]", + fmt::join(column_names_to_wrap, ", "), Block(source_columns).dumpNames()); + + return actions_dag; +} + +/// Wrap only those keys that are nullable on both sides +std::pair TableJoin::getKeysForNullSafeComparion(const ColumnsWithTypeAndName & left_sample_columns, const ColumnsWithTypeAndName & right_sample_columns) +{ + std::unordered_map left_idx; + for (size_t i = 0; i < left_sample_columns.size(); ++i) + left_idx[left_sample_columns[i].name] = i; + + std::unordered_map right_idx; + for (size_t i = 0; i < right_sample_columns.size(); ++i) + right_idx[right_sample_columns[i].name] = i; + + NameSet left_keys_to_wrap; + NameSet right_keys_to_wrap; + + for (const auto & clause : clauses) + { + for (size_t i : clause.nullsafe_compare_key_indexes) + { + const auto & left_key = clause.key_names_left[i]; + const auto & right_key = clause.key_names_right[i]; + auto lit = left_idx.find(left_key); + if (lit == left_idx.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't find key {} in left columns [{}]", + left_key, Block(left_sample_columns).dumpNames()); + auto rit = right_idx.find(right_key); + if (rit == right_idx.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't find key {} in right columns [{}]", + right_key, Block(right_sample_columns).dumpNames()); + + if (!left_sample_columns[lit->second].type->isNullable() || !right_sample_columns[rit->second].type->isNullable()) + continue; + + left_keys_to_wrap.insert(left_key); + right_keys_to_wrap.insert(right_key); + } + } + + return {left_keys_to_wrap, right_keys_to_wrap}; +} + +static void mergeDags(ActionsDAGPtr & result_dag, ActionsDAGPtr && new_dag) +{ + if (result_dag) + result_dag->mergeInplace(std::move(*new_dag)); + else + result_dag = std::move(new_dag); +} + std::pair TableJoin::createConvertingActions( const ColumnsWithTypeAndName & left_sample_columns, const ColumnsWithTypeAndName & right_sample_columns) { + ActionsDAGPtr left_dag = nullptr; + ActionsDAGPtr right_dag = nullptr; + /** If the types are not equal, we need to convert them to a common type. + * Example: + * SELECT * FROM t1 JOIN t2 ON t1.a = t2.b + * Assume that t1.a is UInt16 and t2.b is Int8. The supertype for them is Int32. + * The query will be semantically transformed to: + * SELECT * FROM t1 JOIN t2 ON CAST(t1.a AS 'Int32') = CAST(t2.b AS 'Int32') + * As a result, the user will get the original columns `a` and `b` without `CAST`. + * + */ + NameToNameMap left_column_rename; + NameToNameMap right_column_rename; inferJoinKeyCommonType(left_sample_columns, right_sample_columns, !isSpecialStorage(), isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE)); - - NameToNameMap left_key_column_rename; - NameToNameMap right_key_column_rename; - auto left_converting_actions = applyKeyConvertToTable( - left_sample_columns, left_type_map, left_key_column_rename, forceNullableLeft()); - auto right_converting_actions = applyKeyConvertToTable( - right_sample_columns, right_type_map, right_key_column_rename, forceNullableRight()); - + if (!left_type_map.empty() || !right_type_map.empty()) { - auto log_actions = [](const String & side, const ActionsDAGPtr & dag) - { - if (dag) - { - /// Just debug message - std::vector input_cols; - for (const auto & col : dag->getRequiredColumns()) - input_cols.push_back(col.name + ": " + col.type->getName()); - - std::vector output_cols; - for (const auto & col : dag->getResultColumns()) - output_cols.push_back(col.name + ": " + col.type->getName()); - - LOG_DEBUG(&Poco::Logger::get("TableJoin"), "{} JOIN converting actions: [{}] -> [{}]", - side, fmt::join(input_cols, ", "), fmt::join(output_cols, ", ")); - } - else - { - LOG_DEBUG(&Poco::Logger::get("TableJoin"), "{} JOIN converting actions: empty", side); - return; - } - }; - log_actions("Left", left_converting_actions); - log_actions("Right", right_converting_actions); + left_dag = applyKeyConvertToTable(left_sample_columns, left_type_map, JoinTableSide::Left, left_column_rename); + right_dag = applyKeyConvertToTable(right_sample_columns, right_type_map, JoinTableSide::Right, right_column_rename); } - forAllKeys(clauses, [&](auto & left_key, auto & right_key) + /** + * Similarly, when we have a null-safe comparison (a IS NOT DISTICT FROM b), + * we need to wrap keys with a non-nullable type. + * The type `tuple` can be used for this purpose, + * because value tuple(NULL) is not NULL itself (moreover it has type Tuple(Nullable(T) which is not Nullable). + * Thus, join algorithm will match keys with values tuple(NULL). + * Example: + * SELECT * FROM t1 JOIN t2 ON t1.a <=> t2.b + * This will be semantically transformed to: + * SELECT * FROM t1 JOIN t2 ON tuple(t1.a) == tuple(t2.b) + */ + auto [left_keys_nullsafe_comparison, right_keys_nullsafe_comparison] = getKeysForNullSafeComparion( + left_dag ? left_dag->getResultColumns() : left_sample_columns, + right_dag ? right_dag->getResultColumns() : right_sample_columns); + if (!left_keys_nullsafe_comparison.empty() || !right_keys_nullsafe_comparison.empty()) { - renameIfNeeded(left_key, left_key_column_rename); - renameIfNeeded(right_key, right_key_column_rename); - return true; - }); + auto new_left_dag = applyNullsafeWrapper( + left_dag ? left_dag->getResultColumns() : left_sample_columns, + left_keys_nullsafe_comparison, JoinTableSide::Left, left_column_rename); + mergeDags(left_dag, std::move(new_left_dag)); - return {left_converting_actions, right_converting_actions}; + auto new_right_dag = applyNullsafeWrapper( + right_dag ? right_dag->getResultColumns() : right_sample_columns, + right_keys_nullsafe_comparison, JoinTableSide::Right, right_column_rename); + mergeDags(right_dag, std::move(new_right_dag)); + } + + if (forceNullableLeft()) + { + auto new_left_dag = applyJoinUseNullsConversion( + left_dag ? left_dag->getResultColumns() : left_sample_columns, + left_column_rename); + mergeDags(left_dag, std::move(new_left_dag)); + } + + if (forceNullableRight()) + { + auto new_right_dag = applyJoinUseNullsConversion( + right_dag ? right_dag->getResultColumns() : right_sample_columns, + right_column_rename); + mergeDags(right_dag, std::move(new_right_dag)); + } + + return {left_dag, right_dag}; } template @@ -608,30 +742,66 @@ static ActionsDAGPtr changeTypesToNullable( ActionsDAGPtr TableJoin::applyKeyConvertToTable( const ColumnsWithTypeAndName & cols_src, const NameToTypeMap & type_mapping, - NameToNameMap & key_column_rename, - bool make_nullable) const + JoinTableSide table_side, + NameToNameMap & key_column_rename) { + if (type_mapping.empty()) + return nullptr; + /// Create DAG to convert key columns - ActionsDAGPtr dag_stage1 = changeKeyTypes(cols_src, type_mapping, !hasUsing(), key_column_rename); + ActionsDAGPtr convert_dag = changeKeyTypes(cols_src, type_mapping, !hasUsing(), key_column_rename); + applyRename(table_side, key_column_rename); + return convert_dag; +} + +ActionsDAGPtr TableJoin::applyNullsafeWrapper( + const ColumnsWithTypeAndName & cols_src, + const NameSet & columns_for_nullsafe_comparison, + JoinTableSide table_side, + NameToNameMap & key_column_rename) +{ + if (columns_for_nullsafe_comparison.empty()) + return nullptr; + + std::unordered_set column_names_to_wrap; + for (const auto & name : columns_for_nullsafe_comparison) + { + /// Take into account column renaming for type conversion + /// if we changed key `a == b` to `_CAST(a, 'UInt64') = b` we need to wrap `tuple(_CAST(a, 'UInt64')) = tuple(b)` + if (auto it = key_column_rename.find(name); it != key_column_rename.end()) + column_names_to_wrap.insert(it->second); + else + column_names_to_wrap.insert(name); + } + + /// Create DAG to wrap keys with tuple for null-safe comparison + ActionsDAGPtr null_safe_wrap_dag = createWrapWithTupleActions(cols_src, std::move(column_names_to_wrap), key_column_rename); + for (auto & clause : clauses) + { + for (size_t i : clause.nullsafe_compare_key_indexes) + { + if (table_side == JoinTableSide::Left) + renameIfNeeded(clause.key_names_left[i], key_column_rename); + else + renameIfNeeded(clause.key_names_right[i], key_column_rename); + } + } + + return null_safe_wrap_dag; +} + +ActionsDAGPtr TableJoin::applyJoinUseNullsConversion( + const ColumnsWithTypeAndName & cols_src, + const NameToNameMap & key_column_rename) +{ + /// Do not need to make nullable temporary columns that would be used only as join keys, but is not visible to user + NameSet exclude_columns; + for (const auto & it : key_column_rename) + exclude_columns.insert(it.second); /// Create DAG to make columns nullable if needed - if (make_nullable) - { - /// Do not need to make nullable temporary columns that would be used only as join keys, but is not visible to user - NameSet cols_not_nullable; - for (const auto & t : key_column_rename) - cols_not_nullable.insert(t.second); - - ColumnsWithTypeAndName input_cols = dag_stage1 ? dag_stage1->getResultColumns() : cols_src; - ActionsDAGPtr dag_stage2 = changeTypesToNullable(input_cols, cols_not_nullable); - - /// Merge dags if we got two ones - if (dag_stage1) - return ActionsDAG::merge(std::move(*dag_stage1), std::move(*dag_stage2)); - else - return dag_stage2; - } - return dag_stage1; + ActionsDAGPtr add_nullable_dag = changeTypesToNullable(cols_src, exclude_columns); + return add_nullable_dag; } void TableJoin::setStorageJoin(std::shared_ptr storage) @@ -674,12 +844,13 @@ void TableJoin::setRename(const String & from, const String & to) renames[from] = to; } -void TableJoin::addKey(const String & left_name, const String & right_name, const ASTPtr & left_ast, const ASTPtr & right_ast) +void TableJoin::addKey(const String & left_name, const String & right_name, + const ASTPtr & left_ast, const ASTPtr & right_ast, + bool null_safe_comparison) { - clauses.back().key_names_left.emplace_back(left_name); - key_asts_left.emplace_back(left_ast); + clauses.back().addKey(left_name, right_name, null_safe_comparison); - clauses.back().key_names_right.emplace_back(right_name); + key_asts_left.emplace_back(left_ast); key_asts_right.emplace_back(right_ast ? right_ast : left_ast); } @@ -731,6 +902,19 @@ Names TableJoin::getAllNames(JoinTableSide side) const return res; } +void TableJoin::applyRename(JoinTableSide side, const NameToNameMap & name_map) +{ + auto rename_callback = [&name_map](auto & key_name) + { + renameIfNeeded(key_name, name_map); + return true; + }; + if (side == JoinTableSide::Left) + forAllKeys(clauses, rename_callback); + else + forAllKeys(clauses, rename_callback); +} + void TableJoin::assertHasOneOnExpr() const { if (!oneDisjunct()) diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 5d14a57759f..7736fbfcf5c 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -51,6 +51,13 @@ public: Names key_names_left; Names key_names_right; /// Duplicating right key names are qualified + /** JOIN ON a1 == a2 AND b1 <=> b2 AND c1 == c2 AND d1 <=> d2 + * key_names_left: [a1, b1, c1, d1] + * key_names_right: [a2, b2, c2, d2] + * nullsafe_compare_key_indexes: {1, 3} + */ + std::unordered_set nullsafe_compare_key_indexes; + ASTPtr on_filter_condition_left; ASTPtr on_filter_condition_right; @@ -59,6 +66,14 @@ public: JoinOnClause() = default; + void addKey(const String & left_name, const String & right_name, bool null_safe_comparison) + { + key_names_left.push_back(left_name); + key_names_right.push_back(right_name); + if (null_safe_comparison) + nullsafe_compare_key_indexes.insert(key_names_left.size() - 1); + } + std::pair condColumnNames() const { std::pair res; @@ -177,11 +192,24 @@ private: /// Create converting actions and change key column names if required ActionsDAGPtr applyKeyConvertToTable( - const ColumnsWithTypeAndName & cols_src, const NameToTypeMap & type_mapping, - NameToNameMap & key_column_rename, - bool make_nullable) const; + const ColumnsWithTypeAndName & cols_src, + const NameToTypeMap & type_mapping, + JoinTableSide table_side, + NameToNameMap & key_column_rename); - void addKey(const String & left_name, const String & right_name, const ASTPtr & left_ast, const ASTPtr & right_ast = nullptr); + ActionsDAGPtr applyNullsafeWrapper( + const ColumnsWithTypeAndName & cols_src, + const NameSet & columns_for_nullsafe_comparison, + JoinTableSide table_side, + NameToNameMap & key_column_rename); + + ActionsDAGPtr applyJoinUseNullsConversion( + const ColumnsWithTypeAndName & cols_src, + const NameToNameMap & key_column_rename); + + void applyRename(JoinTableSide side, const NameToNameMap & name_map); + + void addKey(const String & left_name, const String & right_name, const ASTPtr & left_ast, const ASTPtr & right_ast, bool null_safe_comparison = false); void assertHasOneOnExpr() const; @@ -189,10 +217,11 @@ private: template void inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right, bool strict); - NamesAndTypesList correctedColumnsAddedByJoin() const; - void deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix); + std::pair + getKeysForNullSafeComparion(const ColumnsWithTypeAndName & left_sample_columns, const ColumnsWithTypeAndName & right_sample_columns); + public: TableJoin() = default; @@ -271,7 +300,7 @@ public: void addDisjunct(); - void addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast); + void addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast, bool null_safe_comparison); /* Conditions for left/right table from JOIN ON section. * @@ -331,7 +360,7 @@ public: const ColumnsWithTypeAndName & right_sample_columns); void setAsofInequality(ASOFJoinInequality inequality) { asof_inequality = inequality; } - ASOFJoinInequality getAsofInequality() { return asof_inequality; } + ASOFJoinInequality getAsofInequality() const { return asof_inequality; } ASTPtr leftKeysList() const; ASTPtr rightKeysList() const; /// For ON syntax only @@ -371,6 +400,8 @@ public: bool isSpecialStorage() const { return !right_storage_name.empty() || right_storage_join || right_kv_storage; } std::shared_ptr getStorageKeyValue() { return right_kv_storage; } + + NamesAndTypesList correctedColumnsAddedByJoin() const; }; } diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index 69fef21dbab..0aa2a0b9f4a 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -235,9 +235,9 @@ TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const : parent(parent_) , header(header_) , file(std::move(file_)) - , out_writer(std::make_unique(std::make_unique(file->getPath()), header)) + , out_writer(std::make_unique(std::make_unique(file->getAbsolutePath()), header)) { - LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to temporary file {}", file->getPath()); + LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to temporary file {}", file->getAbsolutePath()); } TemporaryFileStream::TemporaryFileStream(FileSegmentsHolderPtr segments_, const Block & header_, TemporaryDataOnDisk * parent_) @@ -365,7 +365,7 @@ void TemporaryFileStream::release() String TemporaryFileStream::getPath() const { if (file) - return file->getPath(); + return file->getAbsolutePath(); if (segment_holder && !segment_holder->empty()) return segment_holder->front().getPathInLocalCache(); diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp index 108135c78b3..36c33a5e7cc 100644 --- a/src/Interpreters/TextLog.cpp +++ b/src/Interpreters/TextLog.cpp @@ -36,7 +36,6 @@ NamesAndTypesList TextLogElement::getNamesAndTypes() {"event_date", std::make_shared()}, {"event_time", std::make_shared()}, {"event_time_microseconds", std::make_shared(6)}, - {"microseconds", std::make_shared()}, {"thread_name", std::make_shared(std::make_shared())}, {"thread_id", std::make_shared()}, @@ -62,7 +61,6 @@ void TextLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); columns[i++]->insert(event_time_microseconds); - columns[i++]->insert(microseconds); columns[i++]->insertData(thread_name.data(), thread_name.size()); columns[i++]->insert(thread_id); @@ -80,15 +78,10 @@ void TextLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(message_format_string); } -TextLog::TextLog(ContextPtr context_, const String & database_name_, - const String & table_name_, const String & storage_def_, - size_t flush_interval_milliseconds_) - : SystemLog(context_, database_name_, table_name_, - storage_def_, flush_interval_milliseconds_, getLogQueue(flush_interval_milliseconds_)) +TextLog::TextLog(ContextPtr context_, + const SystemLogSettings & settings) + : SystemLog(context_, settings, getLogQueue(settings.queue_settings)) { - // SystemLog methods may write text logs, so we disable logging for the text - // log table to avoid recursion. - log->setLevel(0); } } diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h index 60ca11632aa..bfeca324fde 100644 --- a/src/Interpreters/TextLog.h +++ b/src/Interpreters/TextLog.h @@ -14,7 +14,6 @@ struct TextLogElement { time_t event_time{}; Decimal64 event_time_microseconds{}; - UInt32 microseconds{}; String thread_name; UInt64 thread_id{}; @@ -42,18 +41,15 @@ class TextLog : public SystemLog public: using Queue = SystemLogQueue; - TextLog( - ContextPtr context_, - const String & database_name_, - const String & table_name_, - const String & storage_def_, - size_t flush_interval_milliseconds_); + explicit TextLog(ContextPtr context_, const SystemLogSettings & settings); - static std::shared_ptr getLogQueue(size_t flush_interval_milliseconds) + static std::shared_ptr getLogQueue(const SystemLogQueueSettings & settings) { - static std::shared_ptr queue = std::make_shared("text_log", flush_interval_milliseconds, true); + static std::shared_ptr queue = std::make_shared(settings); return queue; } + + static consteval bool shouldTurnOffLogger() { return true; } }; } diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 398bea26b87..efb8c6792bc 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -61,10 +61,27 @@ std::vector ThreadGroup::getInvolvedThreadIds() const return res; } -void ThreadGroup::linkThread(UInt64 thread_it) +size_t ThreadGroup::getPeakThreadsUsage() const { std::lock_guard lock(mutex); - thread_ids.insert(thread_it); + return peak_threads_usage; +} + + +void ThreadGroup::linkThread(UInt64 thread_id) +{ + std::lock_guard lock(mutex); + thread_ids.insert(thread_id); + + ++active_thread_count; + peak_threads_usage = std::max(peak_threads_usage, active_thread_count); +} + +void ThreadGroup::unlinkThread() +{ + std::lock_guard lock(mutex); + chassert(active_thread_count > 0); + --active_thread_count; } ThreadGroupPtr ThreadGroup::createForQuery(ContextPtr query_context_, std::function fatal_error_callback_) @@ -83,6 +100,8 @@ ThreadGroupPtr ThreadGroup::createForBackgroundProcess(ContextPtr storage_contex const Settings & settings = storage_context->getSettingsRef(); group->memory_tracker.setProfilerStep(settings.memory_profiler_step); group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); + group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); group->memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); group->memory_tracker.setParent(&background_memory_tracker); if (settings.memory_tracker_fault_probability > 0.0) @@ -241,6 +260,8 @@ void ThreadStatus::detachFromGroup() /// Extract MemoryTracker out from query and user context memory_tracker.setParent(&total_memory_tracker); + thread_group->unlinkThread(); + thread_group.reset(); query_id_from_query_context.clear(); @@ -380,12 +401,10 @@ void ThreadStatus::finalizePerformanceCounters() updatePerformanceCounters(); // We want to close perf file descriptors if the perf events were enabled for - // one query. What this code does in practice is less clear -- e.g., if I run - // 'select 1 settings metrics_perf_events_enabled = 1', I still get - // query_context->getSettingsRef().metrics_perf_events_enabled == 0 *shrug*. + // one query. bool close_perf_descriptors = true; - if (auto query_context_ptr = query_context.lock()) - close_perf_descriptors = !query_context_ptr->getSettingsRef().metrics_perf_events_enabled; + if (auto global_context_ptr = global_context.lock()) + close_perf_descriptors = !global_context_ptr->getSettingsRef().metrics_perf_events_enabled; try { @@ -408,7 +427,7 @@ void ThreadStatus::finalizePerformanceCounters() if (settings.log_queries && settings.log_query_threads) { const auto now = std::chrono::system_clock::now(); - Int64 query_duration_ms = std::chrono::duration_cast(now - query_start_time.point).count(); + Int64 query_duration_ms = std::chrono::duration_cast(now - query_start_time.point).count(); if (query_duration_ms >= settings.log_queries_min_query_duration_ms.totalMilliseconds()) { if (auto thread_log = global_context_ptr->getQueryThreadLog()) @@ -513,7 +532,7 @@ void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log, const String } } - thread_log.add(elem); + thread_log.add(std::move(elem)); } static String getCleanQueryAst(const ASTPtr q, ContextPtr context) @@ -573,7 +592,7 @@ void ThreadStatus::logToQueryViewsLog(const ViewRuntimeData & vinfo) element.stack_trace = getExceptionStackTraceString(vinfo.exception); } - views_log->add(element); + views_log->add(std::move(element)); } void CurrentThread::attachToGroup(const ThreadGroupPtr & thread_group) diff --git a/src/Interpreters/TraceCollector.cpp b/src/Interpreters/TraceCollector.cpp index cb00e37df69..30fbe26d038 100644 --- a/src/Interpreters/TraceCollector.cpp +++ b/src/Interpreters/TraceCollector.cpp @@ -112,6 +112,9 @@ void TraceCollector::run() Int64 size; readPODBinary(size, in); + UInt64 ptr; + readPODBinary(ptr, in); + ProfileEvents::Event event; readPODBinary(event, in); @@ -127,8 +130,9 @@ void TraceCollector::run() UInt64 time = static_cast(ts.tv_sec * 1000000000LL + ts.tv_nsec); UInt64 time_in_microseconds = static_cast((ts.tv_sec * 1000000LL) + (ts.tv_nsec / 1000)); - TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, event, increment}; - trace_log->add(element); + + TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, ptr, event, increment}; + trace_log->add(std::move(element)); } } } diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index 0408ebe504b..cd5f965a679 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -38,6 +38,7 @@ NamesAndTypesList TraceLogElement::getNamesAndTypes() {"query_id", std::make_shared()}, {"trace", std::make_shared(std::make_shared())}, {"size", std::make_shared()}, + {"ptr", std::make_shared()}, {"event", std::make_shared(std::make_shared())}, {"increment", std::make_shared()}, }; @@ -57,6 +58,7 @@ void TraceLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(query_id.data(), query_id.size()); columns[i++]->insert(trace); columns[i++]->insert(size); + columns[i++]->insert(ptr); String event_name; if (event != ProfileEvents::end()) diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index c481f033a72..71aec0b50c4 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -27,8 +27,10 @@ struct TraceLogElement UInt64 thread_id{}; String query_id{}; Array trace{}; - /// Allocation size in bytes for TraceType::Memory. + /// Allocation size in bytes for TraceType::Memory and TraceType::MemorySample. Int64 size{}; + /// Allocation ptr for TraceType::MemorySample. + UInt64 ptr{}; /// ProfileEvent for TraceType::ProfileEvent. ProfileEvents::Event event{ProfileEvents::end()}; /// Increment of profile event for TraceType::ProfileEvent. diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp index 6257e617d4a..66b23f09ba0 100644 --- a/src/Interpreters/TransactionLog.cpp +++ b/src/Interpreters/TransactionLog.cpp @@ -34,7 +34,7 @@ try elem.tid = tid; elem.csn = csn; elem.fillCommonFields(nullptr); - system_log->add(elem); + system_log->add(std::move(elem)); } catch (...) { @@ -409,7 +409,7 @@ CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn, bool { std::bernoulli_distribution fault(fault_probability_before_commit); if (fault(thread_local_rng)) - throw Coordination::Exception("Fault injected (before commit)", Coordination::Error::ZCONNECTIONLOSS); + throw Coordination::Exception::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Fault injected (before commit)"); } /// Commit point @@ -419,7 +419,7 @@ CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn, bool { std::bernoulli_distribution fault(fault_probability_after_commit); if (fault(thread_local_rng)) - throw Coordination::Exception("Fault injected (after commit)", Coordination::Error::ZCONNECTIONLOSS); + throw Coordination::Exception::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Fault injected (after commit)"); } } catch (const Coordination::Exception & e) @@ -482,7 +482,7 @@ CSN TransactionLog::finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN bool removed = running_list.erase(txn->tid.getHash()); if (!removed) { - LOG_ERROR(log , "I's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid); + LOG_ERROR(log, "It's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid); abort(); } } diff --git a/src/Interpreters/TransactionsInfoLog.cpp b/src/Interpreters/TransactionsInfoLog.cpp index b62cd4672d8..90f5022a444 100644 --- a/src/Interpreters/TransactionsInfoLog.cpp +++ b/src/Interpreters/TransactionsInfoLog.cpp @@ -101,7 +101,7 @@ try elem.type = type; elem.tid = tid; elem.fillCommonFields(&context); - system_log->add(elem); + system_log->add(std::move(elem)); } catch (...) { diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index aeb912ddfbb..130ce2194fd 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -249,13 +249,6 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt if (first_table || !data.join_using_columns.contains(column.name)) { std::string column_name = column.name; - - /// replaceQueryParameterWithValue is used for parameterized view (which are created using query parameters - /// and SELECT is used with substitution of these query parameters ) - if (!data.parameter_values.empty()) - column_name - = StorageView::replaceQueryParameterWithValue(column_name, data.parameter_values, data.parameter_types); - addIdentifier(columns, table.table, column_name); } } diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h index 6c804ad6c90..73e45fc7ea0 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -28,15 +28,11 @@ public: const TablesWithColumns & tables; std::unordered_set join_using_columns; bool has_columns; - NameToNameMap parameter_values; - NameToNameMap parameter_types; - Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true, const NameToNameMap & parameter_values_ = {}, const NameToNameMap & parameter_types_ = {}) + Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true) : source_columns(source_columns_) , tables(tables_) , has_columns(has_columns_) - , parameter_values(parameter_values_) - , parameter_types(parameter_types_) {} bool hasColumn(const String & name) const { return source_columns.count(name); } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index fd4d2c9d846..9612a4d6c3f 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -289,13 +289,6 @@ void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query) elems = std::move(unique_elems); } -/// Optimize duplicate ORDER BY -void optimizeDuplicateOrderBy(ASTPtr & query, ContextPtr context) -{ - DuplicateOrderByVisitor::Data order_by_data{context}; - DuplicateOrderByVisitor(order_by_data).visit(query); -} - /// Return simple subselect (without UNIONs or JOINs or SETTINGS) if any const ASTSelectQuery * getSimpleSubselect(const ASTSelectQuery & select) { @@ -379,41 +372,6 @@ std::unordered_set getDistinctNames(const ASTSelectQuery & select) return names; } -/// Remove DISTINCT from query if columns are known as DISTINCT from subquery -void optimizeDuplicateDistinct(ASTSelectQuery & select) -{ - if (!select.select() || select.select()->children.empty()) - return; - - const ASTSelectQuery * subselect = getSimpleSubselect(select); - if (!subselect) - return; - - std::unordered_set distinct_names = getDistinctNames(*subselect); - std::unordered_set selected_names; - - /// Check source column names from select list (ignore aliases and table names) - for (const auto & id : select.select()->children) - { - const auto * identifier = id->as(); - if (!identifier) - return; - - const String & name = identifier->shortName(); - if (!distinct_names.contains(name)) - return; /// Not a distinct column, keep DISTINCT for it. - - selected_names.emplace(name); - } - - /// select columns list != distinct columns list - /// SELECT DISTINCT a FROM (SELECT DISTINCT a, b FROM ...)) -- cannot remove DISTINCT - if (selected_names.size() != distinct_names.size()) - return; - - select.distinct = false; -} - /// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression, /// has a single argument and not an aggregate functions. void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, ContextPtr context, @@ -451,8 +409,8 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context { for (auto & elem : set->children) { - auto hash = elem->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + const auto hash = elem->getTreeHash(); + const auto key = toString(hash); group_by_hashes.insert(key); } } @@ -461,8 +419,8 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context { for (auto & elem : group_by->children) { - auto hash = elem->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + const auto hash = elem->getTreeHash(); + const auto key = toString(hash); group_by_hashes.insert(key); } } @@ -830,17 +788,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, && !select_query->group_by_with_cube) optimizeAggregateFunctionsOfGroupByKeys(select_query, query); - /// Remove duplicate ORDER BY and DISTINCT from subqueries. - if (settings.optimize_duplicate_order_by_and_distinct) - { - optimizeDuplicateOrderBy(query, context); - - /// DISTINCT has special meaning in Distributed query with enabled distributed_group_by_no_merge - /// TODO: disable Distributed/remote() tables only - if (!settings.distributed_group_by_no_merge) - optimizeDuplicateDistinct(*select_query); - } - /// Remove functions from ORDER BY if its argument is also in ORDER BY if (settings.optimize_redundant_functions_in_order_by) optimizeRedundantFunctionsInOrderBy(select_query, context); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index aa493a1b55d..d87ac1ed435 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -110,6 +110,9 @@ using CustomizeCountDistinctVisitor = InDepthNodeVisitor>, true>; +char countdistinctif[] = "countdistinctif"; +using CustomizeCountDistinctIfVisitor = InDepthNodeVisitor>, true>; + char in[] = "in"; using CustomizeInVisitor = InDepthNodeVisitor>, true>; @@ -299,11 +302,10 @@ using ReplacePositionalArgumentsVisitor = InDepthNodeVisitorinterpolate()) + { + auto & children = select_query->interpolate()->children; + if (!children.empty()) + { + NameToNameSetMap expressions; + + auto interpolate_visitor = [](const ASTPtr ast, NameSet & columns) -> void + { + auto interpolate_visitor_impl = [](const ASTPtr node, NameSet & cols, auto self) -> void + { + if (const auto * ident = node->as()) + cols.insert(ident->name()); + else if (const auto * func = node->as()) + for (const auto & elem : func->arguments->children) + self(elem, cols, self); + }; + interpolate_visitor_impl(ast, columns, interpolate_visitor_impl); + }; + + for (const auto & elem : children) + { + if (auto * interpolate = elem->as()) + { + NameSet needed_columns; + interpolate_visitor(interpolate->expr, needed_columns); + expressions.emplace(interpolate->column, std::move(needed_columns)); + } + } + + for (const auto & name : required_result_columns) + if (const auto it = expressions.find(name); it != expressions.end()) + required_by_interpolate.insert(it->second.begin(), it->second.end()); + } + } + ASTs new_elements; new_elements.reserve(elements.size()); @@ -401,6 +441,11 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const new_elements.push_back(elem); --it->second; } + else if (required_by_interpolate.contains(name)) + { + /// Columns required by interpolate expression are not always in the required_result_columns + new_elements.push_back(elem); + } else if (select_query->distinct || hasArrayJoin(elem)) { /// ARRAY JOIN cannot be optimized out since it may change number of rows, @@ -559,15 +604,13 @@ std::optional tryEvaluateConstCondition(ASTPtr expr, ContextPtr context) Field eval_res; DataTypePtr eval_res_type; - try { - std::tie(eval_res, eval_res_type) = evaluateConstantExpression(expr, context); - } - catch (DB::Exception &) - { - /// not a constant expression - return {}; + auto constant_expression_result = tryEvaluateConstantExpression(expr, context); + if (!constant_expression_result) + return {}; + std::tie(eval_res, eval_res_type) = std::move(constant_expression_result.value()); } + /// UInt8, maybe Nullable, maybe LowCardinality, and NULL are allowed eval_res_type = removeNullable(removeLowCardinality(eval_res_type)); if (auto which = WhichDataType(eval_res_type); !which.isUInt8() && !which.isNothing()) @@ -914,7 +957,7 @@ void TreeRewriterResult::collectSourceColumns(bool add_special) /// Calculate which columns are required to execute the expression. /// Then, delete all other columns from the list of available columns. /// After execution, columns will only contain the list of columns needed to read from the table. -void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select, bool visit_index_hint) +bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select, bool visit_index_hint, bool no_throw) { /// We calculate required_source_columns with source_columns modifications and swap them on exit required_source_columns = source_columns; @@ -1133,6 +1176,8 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select ss << " '" << name << "'"; } + if (no_throw) + return false; throw Exception(PreformattedMessage{ss.str(), format_string}, ErrorCodes::UNKNOWN_IDENTIFIER); } @@ -1141,6 +1186,7 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select { source_column_names.insert(column.name); } + return true; } NameSet TreeRewriterResult::getArrayJoinSourceNameSet() const @@ -1157,10 +1203,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( const SelectQueryOptions & select_options, const TablesWithColumns & tables_with_columns, const Names & required_result_columns, - std::shared_ptr table_join, - bool is_parameterized_view, - const NameToNameMap parameter_values, - const NameToNameMap parameter_types) const + std::shared_ptr table_join) const { auto * select_query = query->as(); if (!select_query) @@ -1198,7 +1241,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.analyzed_join->setColumnsFromJoinedTable(std::move(columns_from_joined_table), source_columns_set, right_table.table.getQualifiedNamePrefix()); } - translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns, parameter_values, parameter_types); + translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns); /// Optimizes logical expressions. LogicalExpressionsOptimizer(select_query, tables_with_columns, settings.optimize_min_equality_disjunction_chain_length.value).perform(); @@ -1256,15 +1299,6 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.window_function_asts = getWindowFunctions(query, *select_query); result.expressions_with_window_function = getExpressionsWithWindowFunctions(query); - /// replaceQueryParameterWithValue is used for parameterized view (which are created using query parameters - /// and SELECT is used with substitution of these query parameters ) - /// the replaced column names will be used in the next steps - if (is_parameterized_view) - { - for (auto & column : result.source_columns) - column.name = StorageView::replaceQueryParameterWithValue(column.name, parameter_values, parameter_types); - } - result.collectUsedColumns(query, true, settings.query_plan_optimize_primary_key); result.required_source_columns_before_expanding_alias_columns = result.required_source_columns.getNames(); @@ -1362,7 +1396,9 @@ TreeRewriterResultPtr TreeRewriter::analyze( else assertNoAggregates(query, "in wrong place"); - result.collectUsedColumns(query, false, settings.query_plan_optimize_primary_key); + bool is_ok = result.collectUsedColumns(query, false, settings.query_plan_optimize_primary_key, no_throw); + if (!is_ok) + return {}; return std::make_shared(result); } @@ -1381,6 +1417,12 @@ void TreeRewriter::normalize( CustomizeIfDistinctVisitor::Data data_distinct_if{"DistinctIf"}; CustomizeIfDistinctVisitor(data_distinct_if).visit(query); + if (settings.rewrite_count_distinct_if_with_count_distinct_implementation) + { + CustomizeCountDistinctIfVisitor::Data data_count_distinct_if{settings.count_distinct_implementation.toString() + "If"}; + CustomizeCountDistinctIfVisitor(data_count_distinct_if).visit(query); + } + ExistsExpressionVisitor::Data exists; ExistsExpressionVisitor(exists).visit(query); diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h index 206a63541a6..60832f49b35 100644 --- a/src/Interpreters/TreeRewriter.h +++ b/src/Interpreters/TreeRewriter.h @@ -87,7 +87,7 @@ struct TreeRewriterResult bool add_special = true); void collectSourceColumns(bool add_special); - void collectUsedColumns(const ASTPtr & query, bool is_select, bool visit_index_hint); + bool collectUsedColumns(const ASTPtr & query, bool is_select, bool visit_index_hint, bool no_throw = false); Names requiredSourceColumns() const { return required_source_columns.getNames(); } const Names & requiredSourceColumnsForAccessCheck() const { return required_source_columns_before_expanding_alias_columns; } NameSet getArrayJoinSourceNameSet() const; @@ -108,7 +108,10 @@ using TreeRewriterResultPtr = std::shared_ptr; class TreeRewriter : WithContext { public: - explicit TreeRewriter(ContextPtr context_) : WithContext(context_) {} + explicit TreeRewriter(ContextPtr context_, bool no_throw_ = false) + : WithContext(context_) + , no_throw(no_throw_) + {} /// Analyze and rewrite not select query TreeRewriterResultPtr analyze( @@ -128,13 +131,13 @@ public: const SelectQueryOptions & select_options = {}, const std::vector & tables_with_columns = {}, const Names & required_result_columns = {}, - std::shared_ptr table_join = {}, - bool is_parameterized_view = false, - const NameToNameMap parameter_values = {}, - const NameToNameMap parameter_types = {}) const; + std::shared_ptr table_join = {}) const; private: static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view = false); + + /// Do not throw exception from analyze on unknown identifiers, but only return nullptr. + bool no_throw = false; }; } diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp index 7ed7788cf1d..8a7a5024d69 100644 --- a/src/Interpreters/WindowDescription.cpp +++ b/src/Interpreters/WindowDescription.cpp @@ -91,34 +91,30 @@ void WindowFrame::toString(WriteBuffer & buf) const void WindowFrame::checkValid() const { // Check the validity of offsets. - if (type == WindowFrame::FrameType::ROWS - || type == WindowFrame::FrameType::GROUPS) + if (begin_type == BoundaryType::Offset + && !((begin_offset.getType() == Field::Types::UInt64 + || begin_offset.getType() == Field::Types::Int64) + && begin_offset.get() >= 0 + && begin_offset.get() < INT_MAX)) { - if (begin_type == BoundaryType::Offset - && !((begin_offset.getType() == Field::Types::UInt64 - || begin_offset.getType() == Field::Types::Int64) - && begin_offset.get() >= 0 - && begin_offset.get() < INT_MAX)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", - type, - applyVisitor(FieldVisitorToString(), begin_offset), - begin_offset.getType()); - } + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", + type, + applyVisitor(FieldVisitorToString(), begin_offset), + begin_offset.getType()); + } - if (end_type == BoundaryType::Offset - && !((end_offset.getType() == Field::Types::UInt64 - || end_offset.getType() == Field::Types::Int64) - && end_offset.get() >= 0 - && end_offset.get() < INT_MAX)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", - type, - applyVisitor(FieldVisitorToString(), end_offset), - end_offset.getType()); - } + if (end_type == BoundaryType::Offset + && !((end_offset.getType() == Field::Types::UInt64 + || end_offset.getType() == Field::Types::Int64) + && end_offset.get() >= 0 + && end_offset.get() < INT_MAX)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", + type, + applyVisitor(FieldVisitorToString(), end_offset), + end_offset.getType()); } // Check relative positioning of offsets. diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 89c73117960..4e38103ac1f 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -32,6 +33,7 @@ namespace DB namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; + extern const int LOGICAL_ERROR; extern const int TYPE_MISMATCH; extern const int UNEXPECTED_DATA_AFTER_PARSED_VALUE; } @@ -281,6 +283,11 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID /// Already in needed type. return src; } + if (which_type.isIPv4() && src.getType() == Field::Types::UInt64) + { + /// convert to UInt32 which is the underlying type for native IPv4 + return convertNumericType(src, type); + } } else if (which_type.isUUID() && src.getType() == Field::Types::UUID) { @@ -565,4 +572,39 @@ Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_t return converted; } +template +static bool decimalEqualsFloat(Field field, Float64 float_value) +{ + auto decimal_field = field.get>(); + auto decimal_to_float = DecimalUtils::convertTo(decimal_field.getValue(), decimal_field.getScale()); + return decimal_to_float == float_value; +} + +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type) +{ + Field result_value = convertFieldToType(from_value, to_type); + + if (Field::isDecimal(from_value.getType()) && Field::isDecimal(result_value.getType())) + { + bool is_equal = applyVisitor(FieldVisitorAccurateEquals{}, from_value, result_value); + return is_equal ? result_value : std::optional{}; + } + + if (from_value.getType() == Field::Types::Float64 && Field::isDecimal(result_value.getType())) + { + /// Convert back to Float64 and compare + if (result_value.getType() == Field::Types::Decimal32) + return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + if (result_value.getType() == Field::Types::Decimal64) + return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + if (result_value.getType() == Field::Types::Decimal128) + return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + if (result_value.getType() == Field::Types::Decimal256) + return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown decimal type {}", result_value.getTypeName()); + } + + return result_value; +} + } diff --git a/src/Interpreters/convertFieldToType.h b/src/Interpreters/convertFieldToType.h index 91b631d0e12..7f49ea5479d 100644 --- a/src/Interpreters/convertFieldToType.h +++ b/src/Interpreters/convertFieldToType.h @@ -20,4 +20,8 @@ Field convertFieldToType(const Field & from_value, const IDataType & to_type, co /// Does the same, but throws ARGUMENT_OUT_OF_BOUND if value does not fall into the range. Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_type, const IDataType * from_type_hint = nullptr); +/// Applies stricter rules than convertFieldToType, doesn't allow loss of precision converting to Decimal. +/// Returns `Field` if the conversion was successful and the result is equal to the original value, otherwise returns nullopt. +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type); + } diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index 921cd5ff553..6d5a0c4bdfa 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -28,7 +28,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -static std::pair> getFieldAndDataTypeFromLiteral(ASTLiteral * literal) +static EvaluateConstantExpressionResult getFieldAndDataTypeFromLiteral(ASTLiteral * literal) { auto type = applyVisitor(FieldToDataType(), literal->value); /// In case of Array field nested fields can have different types. @@ -39,7 +39,7 @@ static std::pair> getFieldAndDataTypeFro return {res, type}; } -std::pair> evaluateConstantExpression(const ASTPtr & node, const ContextPtr & context) +std::optional evaluateConstantExpressionImpl(const ASTPtr & node, const ContextPtr & context, bool no_throw) { if (ASTLiteral * literal = node->as()) return getFieldAndDataTypeFromLiteral(literal); @@ -67,7 +67,9 @@ std::pair> evaluateConstantExpression(co if (context->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && context->getSettingsRef().normalize_function_names) FunctionNameNormalizer().visit(ast.get()); - auto syntax_result = TreeRewriter(context).analyze(ast, source_columns); + auto syntax_result = TreeRewriter(context, no_throw).analyze(ast, source_columns); + if (!syntax_result) + return {}; /// AST potentially could be transformed to literal during TreeRewriter analyze. /// For example if we have SQL user defined function that return literal AS subquery. @@ -108,6 +110,18 @@ std::pair> evaluateConstantExpression(co return std::make_pair((*result_column)[0], result_type); } +std::optional tryEvaluateConstantExpression(const ASTPtr & node, const ContextPtr & context) +{ + return evaluateConstantExpressionImpl(node, context, true); +} + +EvaluateConstantExpressionResult evaluateConstantExpression(const ASTPtr & node, const ContextPtr & context) +{ + auto res = evaluateConstantExpressionImpl(node, context, false); + if (!res) + throw Exception(ErrorCodes::LOGICAL_ERROR, "evaluateConstantExpression expected to return a result or throw an exception"); + return *res; +} ASTPtr evaluateConstantExpressionAsLiteral(const ASTPtr & node, const ContextPtr & context) { diff --git a/src/Interpreters/evaluateConstantExpression.h b/src/Interpreters/evaluateConstantExpression.h index 91f3ac5dffd..7efb498c9ea 100644 --- a/src/Interpreters/evaluateConstantExpression.h +++ b/src/Interpreters/evaluateConstantExpression.h @@ -17,13 +17,16 @@ class IDataType; using ExpressionActionsPtr = std::shared_ptr; +using EvaluateConstantExpressionResult = std::pair>; + /** Evaluate constant expression and its type. * Used in rare cases - for elements of set for IN, for data to INSERT. * Throws exception if it's not a constant expression. * Quite suboptimal. */ -std::pair> evaluateConstantExpression(const ASTPtr & node, const ContextPtr & context); +EvaluateConstantExpressionResult evaluateConstantExpression(const ASTPtr & node, const ContextPtr & context); +std::optional tryEvaluateConstantExpression(const ASTPtr & node, const ContextPtr & context); /** Evaluate constant expression and returns ASTLiteral with its value. */ diff --git a/src/Interpreters/examples/hash_map_string_small.cpp b/src/Interpreters/examples/hash_map_string_small.cpp index b58cdfbacd0..5f0312b3bdd 100644 --- a/src/Interpreters/examples/hash_map_string_small.cpp +++ b/src/Interpreters/examples/hash_map_string_small.cpp @@ -64,8 +64,8 @@ inline bool operator==(SmallStringRef lhs, SmallStringRef rhs) if (lhs.size == 0) return true; -#ifdef __SSE2__ - return memequalSSE2Wide(lhs.data(), rhs.data(), lhs.size); +#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON)) + return memequalWide(lhs.data(), rhs.data(), lhs.size); #else return 0 == memcmp(lhs.data(), rhs.data(), lhs.size); #endif diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 04c1d545207..750affdfe71 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -49,7 +49,7 @@ static ZooKeeperRetriesInfo getRetriesInfo() ); } -bool isSupportedAlterType(int type) +bool isSupportedAlterTypeForOnClusterDDLQuery(int type) { assert(type != ASTAlterCommand::NO_TYPE); static const std::unordered_set unsupported_alter_types{ @@ -90,7 +90,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, { for (const auto & command : query_alter->command_list->children) { - if (!isSupportedAlterType(command->as().type)) + if (!isSupportedAlterTypeForOnClusterDDLQuery(command->as().type)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type of ALTER query"); } } @@ -559,7 +559,7 @@ Strings DDLQueryStatusSource::getChildrenAllowNoNode(const std::shared_ptrtryGetChildren(node_path, res); if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE) - throw Coordination::Exception(code, node_path); + throw Coordination::Exception::fromPath(code, node_path); return res; } diff --git a/src/Interpreters/executeDDLQueryOnCluster.h b/src/Interpreters/executeDDLQueryOnCluster.h index 40db13d7ef5..7daf9babf9f 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.h +++ b/src/Interpreters/executeDDLQueryOnCluster.h @@ -21,7 +21,7 @@ class Cluster; using ClusterPtr = std::shared_ptr; /// Returns true if provided ALTER type can be executed ON CLUSTER -bool isSupportedAlterType(int type); +bool isSupportedAlterTypeForOnClusterDDLQuery(int type); struct DDLQueryOnClusterParams { diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 688d3b9967d..1bfeeaa8ad4 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -209,7 +210,7 @@ static void logException(ContextPtr context, QueryLogElement & elem, bool log_er } static void -addStatusInfoToQueryElement(QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) +addStatusInfoToQueryLogElement(QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) { const auto time_now = std::chrono::system_clock::now(); UInt64 elapsed_microseconds = info.elapsed_microseconds; @@ -240,6 +241,7 @@ addStatusInfoToQueryElement(QueryLogElement & element, const QueryStatusInfo & i element.memory_usage = info.peak_memory_usage > 0 ? info.peak_memory_usage : 0; element.thread_ids = info.thread_ids; + element.peak_threads_usage = info.peak_threads_usage; element.profile_counters = info.profile_counters; /// We need to refresh the access info since dependent views might have added extra information, either during @@ -347,6 +349,7 @@ void logQueryFinish( const QueryPipeline & query_pipeline, bool pulling_pipeline, std::shared_ptr query_span, + QueryCache::Usage query_cache_usage, bool internal) { const Settings & settings = context->getSettingsRef(); @@ -364,7 +367,7 @@ void logQueryFinish( QueryStatusInfo info = process_list_elem->getInfo(true, context->getSettingsRef().log_profile_events); elem.type = QueryLogElementType::QUERY_FINISH; - addStatusInfoToQueryElement(elem, info, query_ast, context); + addStatusInfoToQueryLogElement(elem, info, query_ast, context); if (pulling_pipeline) { @@ -399,6 +402,8 @@ void logQueryFinish( ReadableSize(elem.read_bytes / elapsed_seconds)); } + elem.query_cache_usage = query_cache_usage; + if (log_queries && elem.type >= log_queries_min_type && static_cast(elem.query_duration_ms) >= log_queries_min_query_duration_ms) { @@ -499,13 +504,15 @@ void logQueryException( if (process_list_elem) { QueryStatusInfo info = process_list_elem->getInfo(true, settings.log_profile_events, false); - addStatusInfoToQueryElement(elem, info, query_ast, context); + addStatusInfoToQueryLogElement(elem, info, query_ast, context); } else { elem.query_duration_ms = start_watch.elapsedMilliseconds(); } + elem.query_cache_usage = QueryCache::Usage::None; + if (settings.calculate_text_stack_trace && log_error) setExceptionStackTrace(elem); logException(context, elem, log_error); @@ -723,11 +730,16 @@ static std::tuple executeQueryImpl( is_create_parameterized_view = create_query->isParameterizedView(); /// Replace ASTQueryParameter with ASTLiteral for prepared statements. - if (!is_create_parameterized_view && context->hasQueryParameters()) + /// Even if we don't have parameters in query_context, check that AST doesn't have unknown parameters + bool probably_has_params = find_first_symbols<'{'>(begin, end) != end; + if (!is_create_parameterized_view && probably_has_params) { ReplaceQueryParameterVisitor visitor(context->getQueryParameters()); visitor.visit(ast); - query = serializeAST(*ast); + if (visitor.getNumberOfReplacedParameters()) + query = serializeAST(*ast); + else + query.assign(begin, query_end); } else { @@ -975,7 +987,7 @@ static std::tuple executeQueryImpl( QueryCachePtr query_cache = context->getQueryCache(); const bool can_use_query_cache = query_cache != nullptr && settings.use_query_cache && !internal && (ast->as() || ast->as()); - bool write_into_query_cache = false; + QueryCache::Usage query_cache_usage = QueryCache::Usage::None; if (!async_insert) { @@ -992,6 +1004,7 @@ static std::tuple executeQueryImpl( QueryPipeline pipeline; pipeline.readFromQueryCache(reader.getSource(), reader.getSourceTotals(), reader.getSourceExtremes()); res.pipeline = std::move(pipeline); + query_cache_usage = QueryCache::Usage::Read; return true; } } @@ -1027,6 +1040,11 @@ static std::tuple executeQueryImpl( } + // InterpreterSelectQueryAnalyzer does not build QueryPlan in the constructor. + // We need to force to build it here to check if we need to ignore quota. + if (auto * interpreter_with_analyzer = dynamic_cast(interpreter.get())) + interpreter_with_analyzer->getQueryPlan(); + if (!interpreter->ignoreQuota() && !quota_checked) { quota = context->getQuota(); @@ -1095,7 +1113,7 @@ static std::tuple executeQueryImpl( settings.query_cache_max_size_in_bytes, settings.query_cache_max_entries)); res.pipeline.writeResultIntoQueryCache(query_cache_writer); - write_into_query_cache = true; + query_cache_usage = QueryCache::Usage::Write; } } @@ -1147,19 +1165,19 @@ static std::tuple executeQueryImpl( auto finish_callback = [elem, context, ast, - write_into_query_cache, + query_cache_usage, internal, implicit_txn_control, execute_implicit_tcl_query, pulling_pipeline = pipeline.pulling(), query_span](QueryPipeline & query_pipeline) mutable { - if (write_into_query_cache) + if (query_cache_usage == QueryCache::Usage::Write) /// Trigger the actual write of the buffered query result into the query cache. This is done explicitly to prevent /// partial/garbage results in case of exceptions during query execution. query_pipeline.finalizeWriteInQueryCache(); - logQueryFinish(elem, context, ast, query_pipeline, pulling_pipeline, query_span, internal); + logQueryFinish(elem, context, ast, query_pipeline, pulling_pipeline, query_span, query_cache_usage, internal); if (*implicit_txn_control) execute_implicit_tcl_query(context, ASTTransactionControl::COMMIT); diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index 53624f8c812..c2c2e081b38 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -20,9 +20,9 @@ struct QueryStatusInfo; struct QueryResultDetails { String query_id; - std::optional content_type; - std::optional format; - std::optional timezone; + std::optional content_type = {}; + std::optional format = {}; + std::optional timezone = {}; }; using SetResultDetailsFunc = std::function; @@ -92,6 +92,7 @@ void logQueryFinish( const QueryPipeline & query_pipeline, bool pulling_pipeline, std::shared_ptr query_span, + QueryCache::Usage query_cache_usage, bool internal); void logQueryException( diff --git a/src/Interpreters/fuzzers/CMakeLists.txt b/src/Interpreters/fuzzers/CMakeLists.txt index 8e301470de2..4ac002d3d4a 100644 --- a/src/Interpreters/fuzzers/CMakeLists.txt +++ b/src/Interpreters/fuzzers/CMakeLists.txt @@ -5,5 +5,4 @@ target_link_libraries(execute_query_fuzzer PRIVATE clickhouse_table_functions clickhouse_aggregate_functions clickhouse_dictionaries - clickhouse_dictionaries_embedded - ${LIB_FUZZING_ENGINE}) + clickhouse_dictionaries_embedded) diff --git a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp index 284e780ed1f..f12c01120cf 100644 --- a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp +++ b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp @@ -13,43 +13,44 @@ using namespace DB; extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) -try { - std::string input = std::string(reinterpret_cast(data), size); - - static SharedContextHolder shared_context; - static ContextMutablePtr context; - - auto initialize = [&]() mutable + try { - shared_context = Context::createShared(); - context = Context::createGlobal(shared_context.get()); - context->makeGlobalContext(); - context->setApplicationType(Context::ApplicationType::LOCAL); + std::string input = std::string(reinterpret_cast(data), size); - registerFunctions(); - registerAggregateFunctions(); - registerTableFunctions(); - registerStorages(); - registerDictionaries(); - registerDisks(/* global_skip_access_check= */ true); - registerFormats(); + static SharedContextHolder shared_context; + static ContextMutablePtr context; - return true; - }; + auto initialize = [&]() mutable + { + shared_context = Context::createShared(); + context = Context::createGlobal(shared_context.get()); + context->makeGlobalContext(); + context->setApplicationType(Context::ApplicationType::LOCAL); - static bool initialized = initialize(); - (void) initialized; + registerFunctions(); + registerAggregateFunctions(); + registerTableFunctions(); + registerStorages(); + registerDictionaries(); + registerDisks(/* global_skip_access_check= */ true); + registerFormats(); - auto io = DB::executeQuery(input, context, true, QueryProcessingStage::Complete); + return true; + }; - PullingPipelineExecutor executor(io.pipeline); - Block res; - while (!res && executor.pull(res)); + static bool initialized = initialize(); + (void) initialized; + + auto io = DB::executeQuery(input, context, true, QueryProcessingStage::Complete); + + PullingPipelineExecutor executor(io.pipeline); + Block res; + while (!res && executor.pull(res)); + } + catch (...) + { + } return 0; } -catch (...) -{ - return 1; -} diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index 5bbd2667f55..4cac2f0e20c 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -306,7 +306,9 @@ void fillMissingColumns( return; size_t level = ISerialization::getArrayLevel(subpath); - assert(level < num_dimensions); + /// It can happen if element of Array is Map. + if (level >= num_dimensions) + return; auto stream_name = ISerialization::getFileNameForStream(*requested_column, subpath); auto it = offsets_columns.find(stream_name); diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 83af2684322..aeb45c08bea 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -45,10 +45,10 @@ namespace ErrorCodes namespace ActionLocks { - extern StorageActionBlockType PartsMerge; - extern StorageActionBlockType PartsFetch; - extern StorageActionBlockType PartsSend; - extern StorageActionBlockType DistributedSend; + extern const StorageActionBlockType PartsMerge; + extern const StorageActionBlockType PartsFetch; + extern const StorageActionBlockType PartsSend; + extern const StorageActionBlockType DistributedSend; } static void executeCreateQuery( @@ -250,6 +250,9 @@ static void loadSystemDatabaseImpl(ContextMutablePtr context, const String & dat { String path = context->getPath() + "metadata/" + database_name; String metadata_file = path + ".sql"; + if (fs::exists(metadata_file + ".tmp")) + fs::remove(metadata_file + ".tmp"); + if (fs::exists(fs::path(metadata_file))) { /// 'has_force_restore_data_flag' is true, to not fail on loading query_log table, if it is corrupted. diff --git a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp new file mode 100644 index 00000000000..7dc452a0fcb --- /dev/null +++ b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp @@ -0,0 +1,59 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + + +static bool isUserDefinedFunctionQuery(const ASTPtr & query) +{ + return query->as() + || query->as(); +} + +static bool isAccessControlQuery(const ASTPtr & query) +{ + return query->as() + || query->as() + || query->as() + || query->as() + || query->as() + || query->as(); +} + +ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, const WithoutOnClusterASTRewriteParams & params) +{ + auto * query_on_cluster = dynamic_cast(query.get()); + + if (!query_on_cluster || query_on_cluster->cluster.empty()) + return query; + + if ((isUserDefinedFunctionQuery(query) + && context->getSettings().ignore_on_cluster_for_replicated_udf_queries + && context->getUserDefinedSQLObjectsLoader().isReplicated()) + || (isAccessControlQuery(query) + && context->getSettings().ignore_on_cluster_for_replicated_access_entities_queries + && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE))) + { + LOG_DEBUG(&Poco::Logger::get("removeOnClusterClauseIfNeeded"), "ON CLUSTER clause was ignored for query {}", query->getID()); + return query_on_cluster->getRewrittenASTWithoutOnCluster(params); + } + + return query; +} +} diff --git a/src/Interpreters/removeOnClusterClauseIfNeeded.h b/src/Interpreters/removeOnClusterClauseIfNeeded.h new file mode 100644 index 00000000000..0cbc196c9f2 --- /dev/null +++ b/src/Interpreters/removeOnClusterClauseIfNeeded.h @@ -0,0 +1,12 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query_ptr, ContextPtr context, const WithoutOnClusterASTRewriteParams & params = {}); + +} diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index dab14a66ed7..5efa3fa7023 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -55,7 +55,7 @@ void download(const std::string & cache_base_path, DB::FileSegment & file_segmen fs::create_directories(subdir); std::string data(size, '0'); - file_segment.write(data.data(), size, file_segment.getCurrentWriteOffset(false)); + file_segment.write(data.data(), size, file_segment.getCurrentWriteOffset()); } using Range = FileSegment::Range; @@ -69,13 +69,16 @@ fs::path caches_dir = fs::current_path() / "lru_cache_test"; std::string cache_base_path = caches_dir / "cache1" / ""; -void assertEqual(const HolderPtr & holder, const Ranges & expected_ranges, const States & expected_states = {}) +void assertEqual(FileSegments::const_iterator segments_begin, FileSegments::const_iterator segments_end, size_t segments_size, const Ranges & expected_ranges, const States & expected_states = {}) { - std::cerr << "Holder: " << holder->toString() << "\n"; - ASSERT_EQ(holder->size(), expected_ranges.size()); + std::cerr << "File segments: "; + for (auto it = segments_begin; it != segments_end; ++it) + std::cerr << (*it)->range().toString() << ", "; + + ASSERT_EQ(segments_size, expected_ranges.size()); if (!expected_states.empty()) - ASSERT_EQ(holder->size(), expected_states.size()); + ASSERT_EQ(segments_size, expected_states.size()); auto get_expected_state = [&](size_t i) { @@ -86,14 +89,25 @@ void assertEqual(const HolderPtr & holder, const Ranges & expected_ranges, const }; size_t i = 0; - for (const auto & file_segment : *holder) + for (auto it = segments_begin; it != segments_end; ++it) { + const auto & file_segment = *it; ASSERT_EQ(file_segment->range(), expected_ranges[i]); ASSERT_EQ(file_segment->state(), get_expected_state(i)); ++i; } } +void assertEqual(const FileSegments & file_segments, const Ranges & expected_ranges, const States & expected_states = {}) +{ + assertEqual(file_segments.begin(), file_segments.end(), file_segments.size(), expected_ranges, expected_states); +} + +void assertEqual(const FileSegmentsHolderPtr & file_segments, const Ranges & expected_ranges, const States & expected_states = {}) +{ + assertEqual(file_segments->begin(), file_segments->end(), file_segments->size(), expected_ranges, expected_states); +} + FileSegment & get(const HolderPtr & holder, int i) { auto it = std::next(holder->begin(), i); @@ -108,7 +122,7 @@ void download(FileSegment & file_segment) ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId()); ASSERT_EQ(file_segment.state(), State::DOWNLOADING); - ASSERT_EQ(file_segment.getDownloadedSize(false), 0); + ASSERT_EQ(file_segment.getDownloadedSize(), 0); ASSERT_TRUE(file_segment.reserve(file_segment.range().size())); download(cache_base_path, file_segment); @@ -121,7 +135,7 @@ void download(FileSegment & file_segment) void assertDownloadFails(FileSegment & file_segment) { ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId()); - ASSERT_EQ(file_segment.getDownloadedSize(false), 0); + ASSERT_EQ(file_segment.getDownloadedSize(), 0); ASSERT_FALSE(file_segment.reserve(file_segment.range().size())); file_segment.complete(); } @@ -209,7 +223,7 @@ TEST_F(FileCacheTest, get) { std::cerr << "Step 1\n"; - auto cache = DB::FileCache(settings); + auto cache = DB::FileCache("1", settings); cache.initialize(); auto key = cache.createKeyForPath("key1"); @@ -479,7 +493,7 @@ TEST_F(FileCacheTest, get) cv.notify_one(); file_segment2.wait(file_segment2.range().right); - ASSERT_EQ(file_segment2.getDownloadedSize(false), file_segment2.range().size()); + ASSERT_EQ(file_segment2.getDownloadedSize(), file_segment2.range().size()); }); { @@ -568,7 +582,7 @@ TEST_F(FileCacheTest, get) { /// Test LRUCache::restore(). - auto cache2 = DB::FileCache(settings); + auto cache2 = DB::FileCache("2", settings); cache2.initialize(); auto key = cache2.createKeyForPath("key1"); @@ -587,7 +601,7 @@ TEST_F(FileCacheTest, get) settings2.max_file_segment_size = 10; settings2.base_path = caches_dir / "cache2"; fs::create_directories(settings2.base_path); - auto cache2 = DB::FileCache(settings2); + auto cache2 = DB::FileCache("3", settings2); cache2.initialize(); auto key = cache2.createKeyForPath("key1"); @@ -600,11 +614,10 @@ TEST_F(FileCacheTest, get) std::cerr << "Step 13\n"; { - /// Test delated cleanup + /// Test delayed cleanup - auto cache = FileCache(settings); + auto cache = FileCache("4", settings); cache.initialize(); - cache.cleanup(); const auto key = cache.createKeyForPath("key10"); const auto key_path = cache.getPathInLocalCache(key); @@ -619,21 +632,15 @@ TEST_F(FileCacheTest, get) cache.removeAllReleasable(); ASSERT_EQ(cache.getUsedCacheSize(), 0); - ASSERT_TRUE(fs::exists(key_path)); - ASSERT_TRUE(!fs::exists(cache.getPathInLocalCache(key, 0, FileSegmentKind::Regular))); - - cache.cleanup(); ASSERT_TRUE(!fs::exists(key_path)); - ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path())); + ASSERT_TRUE(!fs::exists(cache.getPathInLocalCache(key, 0, FileSegmentKind::Regular))); } std::cerr << "Step 14\n"; { /// Test background thread delated cleanup - auto settings2{settings}; - settings2.delayed_cleanup_interval_ms = 0; - auto cache = DB::FileCache(settings2); + auto cache = DB::FileCache("5", settings); cache.initialize(); const auto key = cache.createKeyForPath("key10"); const auto key_path = cache.getPathInLocalCache(key); @@ -662,7 +669,7 @@ TEST_F(FileCacheTest, writeBuffer) settings.max_file_segment_size = 5; settings.base_path = cache_base_path; - FileCache cache(settings); + FileCache cache("6", settings); cache.initialize(); auto write_to_cache = [&cache](const String & key, const Strings & data, bool flush) @@ -767,7 +774,7 @@ TEST_F(FileCacheTest, temporaryData) settings.max_file_segment_size = 1_KiB; settings.base_path = cache_base_path; - DB::FileCache file_cache(settings); + DB::FileCache file_cache("7", settings); file_cache.initialize(); auto tmp_data_scope = std::make_shared(nullptr, &file_cache, 0); @@ -908,7 +915,7 @@ TEST_F(FileCacheTest, CachedReadBuffer) wb->next(); wb->finalize(); - auto cache = std::make_shared(settings); + auto cache = std::make_shared("8", settings); cache->initialize(); auto key = cache->createKeyForPath(file_path); diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 271ab39cd88..258853e8162 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -21,6 +21,12 @@ namespace fs = std::filesystem; namespace DB { class SensitiveDataMasker; + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + } @@ -40,14 +46,10 @@ static std::string renderFileNameTemplate(time_t now, const std::string & file_p std::tm buf; localtime_r(&now, &buf); std::ostringstream ss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - ss << std::put_time(&buf, file_path.c_str()); + ss << std::put_time(&buf, path.filename().c_str()); return path.replace_filename(ss.str()); } -#ifndef WITHOUT_TEXT_LOG -constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500; -#endif - void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger /*_root*/, const std::string & cmd_name) { auto current_logger = config.getString("logger", ""); @@ -271,9 +273,37 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log { String text_log_level_str = config.getString("text_log.level", "trace"); int text_log_level = Poco::Logger::parseLevel(text_log_level_str); - size_t flush_interval_milliseconds = config.getUInt64("text_log.flush_interval_milliseconds", - DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS); - split->addTextLog(DB::TextLog::getLogQueue(flush_interval_milliseconds), text_log_level); + + DB::SystemLogQueueSettings log_settings; + log_settings.flush_interval_milliseconds = config.getUInt64("text_log.flush_interval_milliseconds", + DB::TextLog::getDefaultFlushIntervalMilliseconds()); + + log_settings.max_size_rows = config.getUInt64("text_log.max_size_rows", + DB::TextLog::getDefaultMaxSize()); + + if (log_settings.max_size_rows< 1) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "text_log.max_size_rows {} should be 1 at least", + log_settings.max_size_rows); + + log_settings.reserved_size_rows = config.getUInt64("text_log.reserved_size_rows", DB::TextLog::getDefaultReservedSize()); + + if (log_settings.max_size_rows < log_settings.reserved_size_rows) + { + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, + "text_log.max_size {0} should be greater or equal to text_log.reserved_size_rows {1}", + log_settings.max_size_rows, + log_settings.reserved_size_rows); + } + + log_settings.buffer_size_rows_flush_threshold = config.getUInt64("text_log.buffer_size_rows_flush_threshold", + log_settings.max_size_rows / 2); + + log_settings.notify_flush_on_crash = config.getBool("text_log.flush_on_crash", + DB::TextLog::shouldNotifyFlushOnCrash()); + + log_settings.turn_off_logger = DB::TextLog::shouldTurnOffLogger(); + + split->addTextLog(DB::TextLog::getLogQueue(log_settings), text_log_level); } #endif } diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp index b5ac42d6041..958be928509 100644 --- a/src/Loggers/OwnSplitChannel.cpp +++ b/src/Loggers/OwnSplitChannel.cpp @@ -1,7 +1,6 @@ #include "OwnSplitChannel.h" #include "OwnFormattingChannel.h" -#include #include #include #include @@ -118,7 +117,6 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) elem.event_time = msg_ext.time_seconds; elem.event_time_microseconds = msg_ext.time_in_microseconds; - elem.microseconds = msg_ext.time_microseconds; elem.thread_name = getThreadName(); elem.thread_id = msg_ext.thread_id; @@ -138,7 +136,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) std::shared_ptr> text_log_locked{}; text_log_locked = text_log.lock(); if (text_log_locked) - text_log_locked->push(elem); + text_log_locked->push(std::move(elem)); } #endif } diff --git a/src/Parsers/ASTAlterNamedCollectionQuery.cpp b/src/Parsers/ASTAlterNamedCollectionQuery.cpp index 7e95147ad75..6363a7306bd 100644 --- a/src/Parsers/ASTAlterNamedCollectionQuery.cpp +++ b/src/Parsers/ASTAlterNamedCollectionQuery.cpp @@ -15,6 +15,8 @@ ASTPtr ASTAlterNamedCollectionQuery::clone() const void ASTAlterNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { settings.ostr << (settings.hilite ? hilite_keyword : "") << "Alter NAMED COLLECTION "; + if (if_exists) + settings.ostr << "IF EXISTS "; settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); formatOnCluster(settings); if (!changes.empty()) diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 61e5903fad5..955320c318c 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -13,7 +13,7 @@ namespace ErrorCodes String ASTAlterCommand::getID(char delim) const { - return String("AlterCommand") + delim + typeToString(type); + return fmt::format("AlterCommand{}{}", delim, type); } ASTPtr ASTAlterCommand::clone() const @@ -80,53 +80,6 @@ ASTPtr ASTAlterCommand::clone() const return res; } -const char * ASTAlterCommand::typeToString(ASTAlterCommand::Type type) -{ - switch (type) - { - case ADD_COLUMN: return "ADD_COLUMN"; - case DROP_COLUMN: return "DROP_COLUMN"; - case MODIFY_COLUMN: return "MODIFY_COLUMN"; - case COMMENT_COLUMN: return "COMMENT_COLUMN"; - case RENAME_COLUMN: return "RENAME_COLUMN"; - case MATERIALIZE_COLUMN: return "MATERIALIZE_COLUMN"; - case MODIFY_ORDER_BY: return "MODIFY_ORDER_BY"; - case MODIFY_SAMPLE_BY: return "MODIFY_SAMPLE_BY"; - case MODIFY_TTL: return "MODIFY_TTL"; - case MATERIALIZE_TTL: return "MATERIALIZE_TTL"; - case MODIFY_SETTING: return "MODIFY_SETTING"; - case RESET_SETTING: return "RESET_SETTING"; - case MODIFY_QUERY: return "MODIFY_QUERY"; - case REMOVE_TTL: return "REMOVE_TTL"; - case REMOVE_SAMPLE_BY: return "REMOVE_SAMPLE_BY"; - case ADD_INDEX: return "ADD_INDEX"; - case DROP_INDEX: return "DROP_INDEX"; - case MATERIALIZE_INDEX: return "MATERIALIZE_INDEX"; - case ADD_CONSTRAINT: return "ADD_CONSTRAINT"; - case DROP_CONSTRAINT: return "DROP_CONSTRAINT"; - case ADD_PROJECTION: return "ADD_PROJECTION"; - case DROP_PROJECTION: return "DROP_PROJECTION"; - case MATERIALIZE_PROJECTION: return "MATERIALIZE_PROJECTION"; - case DROP_PARTITION: return "DROP_PARTITION"; - case DROP_DETACHED_PARTITION: return "DROP_DETACHED_PARTITION"; - case ATTACH_PARTITION: return "ATTACH_PARTITION"; - case MOVE_PARTITION: return "MOVE_PARTITION"; - case REPLACE_PARTITION: return "REPLACE_PARTITION"; - case FETCH_PARTITION: return "FETCH_PARTITION"; - case FREEZE_PARTITION: return "FREEZE_PARTITION"; - case FREEZE_ALL: return "FREEZE_ALL"; - case UNFREEZE_PARTITION: return "UNFREEZE_PARTITION"; - case UNFREEZE_ALL: return "UNFREEZE_ALL"; - case DELETE: return "DELETE"; - case UPDATE: return "UPDATE"; - case NO_TYPE: return "NO_TYPE"; - case LIVE_VIEW_REFRESH: return "LIVE_VIEW_REFRESH"; - case MODIFY_DATABASE_SETTING: return "MODIFY_DATABASE_SETTING"; - case MODIFY_COMMENT: return "MODIFY_COMMENT"; - } - UNREACHABLE(); -} - void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { if (type == ASTAlterCommand::ADD_COLUMN) diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 1400113fa9c..30cf0cac4ce 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -208,8 +208,6 @@ public: ASTPtr clone() const override; - static const char * typeToString(Type type); - protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index 12d000d5e9f..e9b490a1be3 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -77,10 +77,6 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta << (*null_modifier ? "" : "NOT ") << "NULL" << (settings.hilite ? hilite_none : ""); } - if (primary_key_specifier) - settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") - << "PRIMARY KEY" << (settings.hilite ? hilite_none : ""); - if (default_expression) { settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTCreateIndexQuery.cpp b/src/Parsers/ASTCreateIndexQuery.cpp index 0d580d5bb21..17d4b9a9d58 100644 --- a/src/Parsers/ASTCreateIndexQuery.cpp +++ b/src/Parsers/ASTCreateIndexQuery.cpp @@ -38,7 +38,7 @@ void ASTCreateIndexQuery::formatQueryImpl(const FormatSettings & settings, Forma settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str; - settings.ostr << "CREATE INDEX " << (if_not_exists ? "IF NOT EXISTS " : ""); + settings.ostr << "CREATE " << (unique ? "UNIQUE " : "") << "INDEX " << (if_not_exists ? "IF NOT EXISTS " : ""); index_name->formatImpl(settings, state, frame); settings.ostr << " ON "; diff --git a/src/Parsers/ASTCreateIndexQuery.h b/src/Parsers/ASTCreateIndexQuery.h index 424a0e493d9..b7577f2634e 100644 --- a/src/Parsers/ASTCreateIndexQuery.h +++ b/src/Parsers/ASTCreateIndexQuery.h @@ -20,6 +20,7 @@ public: ASTPtr index_decl; bool if_not_exists{false}; + bool unique{false}; String getID(char delim) const override; diff --git a/src/Parsers/ASTCreateNamedCollectionQuery.cpp b/src/Parsers/ASTCreateNamedCollectionQuery.cpp index 97e83541f05..45ef8565148 100644 --- a/src/Parsers/ASTCreateNamedCollectionQuery.cpp +++ b/src/Parsers/ASTCreateNamedCollectionQuery.cpp @@ -18,6 +18,8 @@ ASTPtr ASTCreateNamedCollectionQuery::clone() const void ASTCreateNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE NAMED COLLECTION "; + if (if_not_exists) + settings.ostr << "IF NOT EXISTS "; settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); formatOnCluster(settings); diff --git a/src/Parsers/ASTCreateNamedCollectionQuery.h b/src/Parsers/ASTCreateNamedCollectionQuery.h index 303d1901931..785aeb781b4 100644 --- a/src/Parsers/ASTCreateNamedCollectionQuery.h +++ b/src/Parsers/ASTCreateNamedCollectionQuery.h @@ -13,6 +13,7 @@ class ASTCreateNamedCollectionQuery : public IAST, public ASTQueryWithOnCluster public: std::string collection_name; SettingsChanges changes; + bool if_not_exists = false; String getID(char) const override { return "CreateNamedCollectionQuery"; } diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 196681a8801..1562586bd93 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include namespace DB @@ -460,4 +462,49 @@ bool ASTCreateQuery::isParameterizedView() const return false; } + +ASTCreateQuery::UUIDs::UUIDs(const ASTCreateQuery & query) + : uuid(query.uuid) + , to_inner_uuid(query.to_inner_uuid) +{ +} + +String ASTCreateQuery::UUIDs::toString() const +{ + WriteBufferFromOwnString out; + out << "{" << uuid << "," << to_inner_uuid << "}"; + return out.str(); +} + +ASTCreateQuery::UUIDs ASTCreateQuery::UUIDs::fromString(const String & str) +{ + ReadBufferFromString in{str}; + ASTCreateQuery::UUIDs res; + in >> "{" >> res.uuid >> "," >> res.to_inner_uuid >> "}"; + return res; +} + +ASTCreateQuery::UUIDs ASTCreateQuery::generateRandomUUID(bool always_generate_new_uuid) +{ + if (always_generate_new_uuid) + setUUID({}); + + if (uuid == UUIDHelpers::Nil) + uuid = UUIDHelpers::generateV4(); + + /// If destination table (to_table_id) is not specified for materialized view, + /// then MV will create inner table. We should generate UUID of inner table here. + bool need_uuid_for_inner_table = !attach && is_materialized_view && !to_table_id; + if (need_uuid_for_inner_table && (to_inner_uuid == UUIDHelpers::Nil)) + to_inner_uuid = UUIDHelpers::generateV4(); + + return UUIDs{*this}; +} + +void ASTCreateQuery::setUUID(const UUIDs & uuids) +{ + uuid = uuids.uuid; + to_inner_uuid = uuids.to_inner_uuid; +} + } diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index ae45a244a03..28f5e05802b 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -146,6 +146,18 @@ public: QueryKind getQueryKind() const override { return QueryKind::Create; } + struct UUIDs + { + UUID uuid = UUIDHelpers::Nil; + UUID to_inner_uuid = UUIDHelpers::Nil; + UUIDs() = default; + explicit UUIDs(const ASTCreateQuery & query); + String toString() const; + static UUIDs fromString(const String & str); + }; + UUIDs generateRandomUUID(bool always_generate_new_uuid = false); + void setUUID(const UUIDs & uuids); + protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ASTDropNamedCollectionQuery.cpp b/src/Parsers/ASTDropNamedCollectionQuery.cpp index 3b8568cfd70..e317681d33d 100644 --- a/src/Parsers/ASTDropNamedCollectionQuery.cpp +++ b/src/Parsers/ASTDropNamedCollectionQuery.cpp @@ -13,6 +13,8 @@ ASTPtr ASTDropNamedCollectionQuery::clone() const void ASTDropNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP NAMED COLLECTION "; + if (if_exists) + settings.ostr << "IF EXISTS "; settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); formatOnCluster(settings); } diff --git a/src/Parsers/ASTForeignKeyDeclaration.h b/src/Parsers/ASTForeignKeyDeclaration.h new file mode 100644 index 00000000000..43c5995055d --- /dev/null +++ b/src/Parsers/ASTForeignKeyDeclaration.h @@ -0,0 +1,26 @@ +#pragma once + +#include + +namespace DB +{ + +/* + * Currently ignore the foreign key node, flesh it out when needed + */ +class ASTForeignKeyDeclaration : public IAST +{ +public: + String name; + + String getID(char) const override { return "Foreign Key"; } + + ASTPtr clone() const override + { + auto res = std::make_shared(); + res->name = name; + return res; + } +}; + +} diff --git a/src/Parsers/ASTIndexDeclaration.h b/src/Parsers/ASTIndexDeclaration.h index 6ed241f75ab..1fbf5e12695 100644 --- a/src/Parsers/ASTIndexDeclaration.h +++ b/src/Parsers/ASTIndexDeclaration.h @@ -14,6 +14,7 @@ class ASTIndexDeclaration : public IAST public: static const auto DEFAULT_INDEX_GRANULARITY = 1uz; static const auto DEFAULT_ANNOY_INDEX_GRANULARITY = 100'000'000uz; + static const auto DEFAULT_USEARCH_INDEX_GRANULARITY = 100'000'000uz; String name; IAST * expr; diff --git a/src/Parsers/ASTLiteral.cpp b/src/Parsers/ASTLiteral.cpp index 6481fc6eb00..425e5c73bee 100644 --- a/src/Parsers/ASTLiteral.cpp +++ b/src/Parsers/ASTLiteral.cpp @@ -102,7 +102,7 @@ void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const void ASTLiteral::appendColumnNameImplLegacy(WriteBuffer & ostr) const { - /// 100 - just arbitrary value. + /// 100 - just arbitrary value. constexpr auto min_elements_for_hashing = 100; /// Special case for very large arrays. Instead of listing all elements, will use hash of them. @@ -127,9 +127,31 @@ void ASTLiteral::appendColumnNameImplLegacy(WriteBuffer & ostr) const } } +/// Use different rules for escaping backslashes and quotes +class FieldVisitorToStringPostgreSQL : public StaticVisitor +{ +public: + template + String operator() (const T & x) const { return visitor(x); } + +private: + FieldVisitorToString visitor; +}; + +template<> +String FieldVisitorToStringPostgreSQL::operator() (const String & x) const +{ + WriteBufferFromOwnString wb; + writeQuotedStringPostgreSQL(x, wb); + return wb.str(); +} + void ASTLiteral::formatImplWithoutAlias(const FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { - settings.ostr << applyVisitor(FieldVisitorToString(), value); + if (settings.literal_escaping_style == LiteralEscapingStyle::Regular) + settings.ostr << applyVisitor(FieldVisitorToString(), value); + else + settings.ostr << applyVisitor(FieldVisitorToStringPostgreSQL(), value); } } diff --git a/src/Parsers/ASTProjectionSelectQuery.cpp b/src/Parsers/ASTProjectionSelectQuery.cpp index 0cfdc3762a1..90d9ede7337 100644 --- a/src/Parsers/ASTProjectionSelectQuery.cpp +++ b/src/Parsers/ASTProjectionSelectQuery.cpp @@ -73,11 +73,11 @@ void ASTProjectionSelectQuery::formatImpl(const FormatSettings & s, FormatState if (orderBy()) { - /// Let's convert the ASTFunction into ASTExpressionList, which generates consistent format + /// Let's convert tuple ASTFunction into ASTExpressionList, which generates consistent format /// between GROUP BY and ORDER BY projection definition. s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY " << (s.hilite ? hilite_none : ""); ASTPtr order_by; - if (auto * func = orderBy()->as()) + if (auto * func = orderBy()->as(); func && func->name == "tuple") order_by = func->arguments; else { diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index 9550752b1f3..48b4ae3c38d 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -5,7 +5,6 @@ #include #include -#include namespace DB { diff --git a/src/Parsers/ASTSetQuery.cpp b/src/Parsers/ASTSetQuery.cpp index 76ad812e713..1b7b76fe231 100644 --- a/src/Parsers/ASTSetQuery.cpp +++ b/src/Parsers/ASTSetQuery.cpp @@ -69,9 +69,9 @@ void ASTSetQuery::appendColumnName(WriteBuffer & ostr) const Hash hash = getTreeHash(); writeCString("__settings_", ostr); - writeText(hash.first, ostr); + writeText(hash.low64, ostr); ostr.write('_'); - writeText(hash.second, ostr); + writeText(hash.high64, ostr); } } diff --git a/src/Parsers/ASTShowFunctionsQuery.cpp b/src/Parsers/ASTShowFunctionsQuery.cpp new file mode 100644 index 00000000000..9253dcf5cb2 --- /dev/null +++ b/src/Parsers/ASTShowFunctionsQuery.cpp @@ -0,0 +1,25 @@ +#include + +#include + +namespace DB +{ + +ASTPtr ASTShowFunctionsQuery::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + cloneOutputOptions(*res); + return res; +} + +void ASTShowFunctionsQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW FUNCTIONS" << (settings.hilite ? hilite_none : ""); + + if (!like.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") << (case_insensitive_like ? " ILIKE " : " LIKE ") + << (settings.hilite ? hilite_none : "") << DB::quote << like; +} + +} diff --git a/src/Parsers/ASTShowFunctionsQuery.h b/src/Parsers/ASTShowFunctionsQuery.h new file mode 100644 index 00000000000..6993f939888 --- /dev/null +++ b/src/Parsers/ASTShowFunctionsQuery.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ASTShowFunctionsQuery : public ASTQueryWithOutput +{ +public: + bool case_insensitive_like = false; + String like; + + String getID(char) const override { return "ShowFunctions"; } + ASTPtr clone() const override; + QueryKind getQueryKind() const override { return QueryKind::Show; } + +protected: + void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; + +} diff --git a/src/Parsers/ASTShowTablesQuery.cpp b/src/Parsers/ASTShowTablesQuery.cpp index 7b4e052bc86..03ae96fa288 100644 --- a/src/Parsers/ASTShowTablesQuery.cpp +++ b/src/Parsers/ASTShowTablesQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -10,10 +11,20 @@ ASTPtr ASTShowTablesQuery::clone() const { auto res = std::make_shared(*this); res->children.clear(); + if (from) + res->set(res->from, from->clone()); + cloneOutputOptions(*res); return res; } +String ASTShowTablesQuery::getFrom() const +{ + String name; + tryGetIdentifierNameInto(from, name); + return name; +} + void ASTShowTablesQuery::formatLike(const FormatSettings & settings) const { if (!like.empty()) @@ -72,9 +83,11 @@ void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, Format settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (temporary ? "TEMPORARY " : "") << (dictionaries ? "DICTIONARIES" : "TABLES") << (settings.hilite ? hilite_none : ""); - if (!from.empty()) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") - << backQuoteIfNeed(from); + if (from) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : ""); + from->formatImpl(settings, state, frame); + } formatLike(settings); diff --git a/src/Parsers/ASTShowTablesQuery.h b/src/Parsers/ASTShowTablesQuery.h index 2878df54bcc..2fd4c3dac96 100644 --- a/src/Parsers/ASTShowTablesQuery.h +++ b/src/Parsers/ASTShowTablesQuery.h @@ -24,8 +24,9 @@ public: bool caches = false; bool full = false; + IAST * from; + String cluster_str; - String from; String like; bool not_like = false; @@ -38,6 +39,8 @@ public: ASTPtr clone() const override; QueryKind getQueryKind() const override { return QueryKind::Show; } + String getFrom() const; + protected: void formatLike(const FormatSettings & settings) const; void formatLimit(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const; diff --git a/src/Parsers/ASTSubquery.cpp b/src/Parsers/ASTSubquery.cpp index a3408f12330..92adad666ed 100644 --- a/src/Parsers/ASTSubquery.cpp +++ b/src/Parsers/ASTSubquery.cpp @@ -19,11 +19,9 @@ void ASTSubquery::appendColumnNameImpl(WriteBuffer & ostr) const } else { - Hash hash = getTreeHash(); + const auto hash = getTreeHash(); writeCString("__subquery_", ostr); - writeText(hash.first, ostr); - ostr.write('_'); - writeText(hash.second, ostr); + writeString(toString(hash), ostr); } } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index a91449ff035..9be01719d8c 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -162,7 +162,9 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, || type == Type::STOP_REPLICATION_QUEUES || type == Type::START_REPLICATION_QUEUES || type == Type::STOP_DISTRIBUTED_SENDS - || type == Type::START_DISTRIBUTED_SENDS) + || type == Type::START_DISTRIBUTED_SENDS + || type == Type::STOP_PULLING_REPLICATION_LOG + || type == Type::START_PULLING_REPLICATION_LOG) { if (table) print_database_table(); @@ -202,7 +204,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, } else if (type == Type::SUSPEND) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FOR " + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FOR " << (settings.hilite ? hilite_none : "") << seconds << (settings.hilite ? hilite_keyword : "") << " SECOND" << (settings.hilite ? hilite_none : ""); @@ -210,7 +212,15 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, else if (type == Type::DROP_FILESYSTEM_CACHE) { if (!filesystem_cache_name.empty()) + { settings.ostr << (settings.hilite ? hilite_none : "") << " " << filesystem_cache_name; + if (!key_to_drop.empty()) + { + settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << key_to_drop; + if (offset_to_drop.has_value()) + settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << offset_to_drop.value(); + } + } } else if (type == Type::UNFREEZE) { @@ -220,6 +230,55 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, { settings.ostr << (settings.hilite ? hilite_none : ""); } + else if (type == Type::START_LISTEN || type == Type::STOP_LISTEN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " " + << ServerType::serverTypeToString(server_type.type) << (settings.hilite ? hilite_none : ""); + + if (server_type.type == ServerType::Type::CUSTOM) + { + settings.ostr << " " << quoteString(server_type.custom_name); + } + + bool comma = false; + + if (!server_type.exclude_types.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") + << " EXCEPT" << (settings.hilite ? hilite_none : ""); + + for (auto cur_type : server_type.exclude_types) + { + if (cur_type == ServerType::Type::CUSTOM) + continue; + + if (comma) + settings.ostr << ","; + else + comma = true; + + settings.ostr << (settings.hilite ? hilite_keyword : "") << " " + << ServerType::serverTypeToString(cur_type) << (settings.hilite ? hilite_none : ""); + } + + if (server_type.exclude_types.contains(ServerType::Type::CUSTOM)) + { + for (const auto & cur_name : server_type.exclude_custom_names) + { + if (comma) + settings.ostr << ","; + else + comma = true; + + settings.ostr << (settings.hilite ? hilite_keyword : "") << " " + << ServerType::serverTypeToString(ServerType::Type::CUSTOM) << (settings.hilite ? hilite_none : ""); + + settings.ostr << " " << quoteString(cur_name); + } + } + } + + } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 52b3b79b16e..5897c63f66a 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "config.h" @@ -35,8 +36,8 @@ public: #if USE_AWS_S3 DROP_S3_CLIENT_CACHE, #endif - STOP_LISTEN_QUERIES, - START_LISTEN_QUERIES, + STOP_LISTEN, + START_LISTEN, RESTART_REPLICAS, RESTART_REPLICA, RESTORE_REPLICA, @@ -71,6 +72,7 @@ public: START_REPLICATION_QUEUES, FLUSH_LOGS, FLUSH_DISTRIBUTED, + FLUSH_ASYNC_INSERT_QUEUE, STOP_DISTRIBUTED_SENDS, START_DISTRIBUTED_SENDS, START_THREAD_FUZZER, @@ -78,6 +80,9 @@ public: UNFREEZE, ENABLE_FAILPOINT, DISABLE_FAILPOINT, + SYNC_FILESYSTEM_CACHE, + STOP_PULLING_REPLICATION_LOG, + START_PULLING_REPLICATION_LOG, END }; @@ -106,6 +111,8 @@ public: UInt64 seconds{}; String filesystem_cache_name; + std::string key_to_drop; + std::optional offset_to_drop; String backup_name; @@ -115,6 +122,8 @@ public: SyncReplicaMode sync_replica_mode = SyncReplicaMode::DEFAULT; + ServerType server_type; + String getID(char) const override { return "SYSTEM query"; } ASTPtr clone() const override diff --git a/src/Parsers/ASTUseQuery.h b/src/Parsers/ASTUseQuery.h index f767a6bbdb7..873a316e653 100644 --- a/src/Parsers/ASTUseQuery.h +++ b/src/Parsers/ASTUseQuery.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -14,19 +15,34 @@ namespace DB class ASTUseQuery : public IAST { public: - String database; + IAST * database; + + String getDatabase() const + { + String name; + tryGetIdentifierNameInto(database, name); + return name; + } /** Get the text that identifies this element. */ - String getID(char delim) const override { return "UseQuery" + (delim + database); } + String getID(char delim) const override { return "UseQuery" + (delim + getDatabase()); } - ASTPtr clone() const override { return std::make_shared(*this); } + ASTPtr clone() const override + { + auto res = std::make_shared(*this); + res->children.clear(); + if (database) + res->set(res->database, database->clone()); + return res; + } QueryKind getQueryKind() const override { return QueryKind::Use; } protected: - void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "USE " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "USE " << (settings.hilite ? hilite_none : ""); + database->formatImpl(settings, state, frame); } }; diff --git a/src/Parsers/Access/ASTCreateQuotaQuery.cpp b/src/Parsers/Access/ASTCreateQuotaQuery.cpp index 56abedf5235..091b62b0a9f 100644 --- a/src/Parsers/Access/ASTCreateQuotaQuery.cpp +++ b/src/Parsers/Access/ASTCreateQuotaQuery.cpp @@ -170,6 +170,12 @@ void ASTCreateQuotaQuery::formatImpl(const FormatSettings & settings, FormatStat settings.ostr << (settings.hilite ? hilite_keyword : "") << " OR REPLACE" << (settings.hilite ? hilite_none : ""); formatNames(names, settings); + + if (!storage_name.empty()) + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") + << " IN " << (settings.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(settings); if (!new_name.empty()) diff --git a/src/Parsers/Access/ASTCreateQuotaQuery.h b/src/Parsers/Access/ASTCreateQuotaQuery.h index f5eb59800ec..aecbbb00f9a 100644 --- a/src/Parsers/Access/ASTCreateQuotaQuery.h +++ b/src/Parsers/Access/ASTCreateQuotaQuery.h @@ -38,6 +38,7 @@ public: Strings names; String new_name; std::optional key_type; + String storage_name; struct Limits { diff --git a/src/Parsers/Access/ASTCreateRoleQuery.cpp b/src/Parsers/Access/ASTCreateRoleQuery.cpp index d624b9a9157..eeeb34c97e4 100644 --- a/src/Parsers/Access/ASTCreateRoleQuery.cpp +++ b/src/Parsers/Access/ASTCreateRoleQuery.cpp @@ -71,6 +71,12 @@ void ASTCreateRoleQuery::formatImpl(const FormatSettings & format, FormatState & format.ostr << (format.hilite ? hilite_keyword : "") << " OR REPLACE" << (format.hilite ? hilite_none : ""); formatNames(names, format); + + if (!storage_name.empty()) + format.ostr << (format.hilite ? IAST::hilite_keyword : "") + << " IN " << (format.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(format); if (!new_name.empty()) diff --git a/src/Parsers/Access/ASTCreateRoleQuery.h b/src/Parsers/Access/ASTCreateRoleQuery.h index 42d1a4031b6..4e465553164 100644 --- a/src/Parsers/Access/ASTCreateRoleQuery.h +++ b/src/Parsers/Access/ASTCreateRoleQuery.h @@ -28,6 +28,7 @@ public: Strings names; String new_name; + String storage_name; std::shared_ptr settings; diff --git a/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp b/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp index ca888be2cfe..e95959703ee 100644 --- a/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp +++ b/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp @@ -168,6 +168,11 @@ void ASTCreateRowPolicyQuery::formatImpl(const FormatSettings & settings, Format settings.ostr << " "; names->format(settings); + if (!storage_name.empty()) + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") + << " IN " << (settings.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(settings); assert(names->cluster.empty()); diff --git a/src/Parsers/Access/ASTCreateRowPolicyQuery.h b/src/Parsers/Access/ASTCreateRowPolicyQuery.h index 32d98fab822..3f2418e7287 100644 --- a/src/Parsers/Access/ASTCreateRowPolicyQuery.h +++ b/src/Parsers/Access/ASTCreateRowPolicyQuery.h @@ -35,6 +35,7 @@ public: bool if_exists = false; bool if_not_exists = false; bool or_replace = false; + String storage_name; std::shared_ptr names; String new_short_name; diff --git a/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp b/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp index 56ddef433ef..3379486d789 100644 --- a/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp +++ b/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp @@ -81,6 +81,12 @@ void ASTCreateSettingsProfileQuery::formatImpl(const FormatSettings & format, Fo format.ostr << (format.hilite ? hilite_keyword : "") << " OR REPLACE" << (format.hilite ? hilite_none : ""); formatNames(names, format); + + if (!storage_name.empty()) + format.ostr << (format.hilite ? IAST::hilite_keyword : "") + << " IN " << (format.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(format); if (!new_name.empty()) diff --git a/src/Parsers/Access/ASTCreateSettingsProfileQuery.h b/src/Parsers/Access/ASTCreateSettingsProfileQuery.h index c1a64998f29..be01aae1e26 100644 --- a/src/Parsers/Access/ASTCreateSettingsProfileQuery.h +++ b/src/Parsers/Access/ASTCreateSettingsProfileQuery.h @@ -28,6 +28,7 @@ public: bool if_exists = false; bool if_not_exists = false; bool or_replace = false; + String storage_name; Strings names; String new_name; diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 0611545adf0..b7290c16013 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -208,6 +208,11 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState & format.ostr << " "; names->format(format); + if (!storage_name.empty()) + format.ostr << (format.hilite ? IAST::hilite_keyword : "") + << " IN " << (format.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(format); if (new_name) diff --git a/src/Parsers/Access/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h index 62ddbfd0040..06940e67d91 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.h +++ b/src/Parsers/Access/ASTCreateUserQuery.h @@ -45,6 +45,7 @@ public: std::shared_ptr names; std::optional new_name; + String storage_name; std::shared_ptr auth_data; diff --git a/src/Parsers/Access/ASTDropAccessEntityQuery.cpp b/src/Parsers/Access/ASTDropAccessEntityQuery.cpp index 88f2d7bce63..bcd7105d0e9 100644 --- a/src/Parsers/Access/ASTDropAccessEntityQuery.cpp +++ b/src/Parsers/Access/ASTDropAccessEntityQuery.cpp @@ -53,6 +53,11 @@ void ASTDropAccessEntityQuery::formatImpl(const FormatSettings & settings, Forma else formatNames(names, settings); + if (!storage_name.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") + << " FROM " << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(settings); } diff --git a/src/Parsers/Access/ASTDropAccessEntityQuery.h b/src/Parsers/Access/ASTDropAccessEntityQuery.h index f3a065c50df..32f4a8f8047 100644 --- a/src/Parsers/Access/ASTDropAccessEntityQuery.h +++ b/src/Parsers/Access/ASTDropAccessEntityQuery.h @@ -21,6 +21,7 @@ public: AccessEntityType type; bool if_exists = false; Strings names; + String storage_name; std::shared_ptr row_policy_names; String getID(char) const override; diff --git a/src/Parsers/Access/ASTMoveAccessEntityQuery.cpp b/src/Parsers/Access/ASTMoveAccessEntityQuery.cpp new file mode 100644 index 00000000000..285f07854c0 --- /dev/null +++ b/src/Parsers/Access/ASTMoveAccessEntityQuery.cpp @@ -0,0 +1,64 @@ +#include +#include +#include +#include + + +namespace DB +{ +namespace +{ + void formatNames(const Strings & names, const IAST::FormatSettings & settings) + { + bool need_comma = false; + for (const auto & name : names) + { + if (std::exchange(need_comma, true)) + settings.ostr << ','; + settings.ostr << ' ' << backQuoteIfNeed(name); + } + } +} + +String ASTMoveAccessEntityQuery::getID(char) const +{ + return String("MOVE ") + toString(type) + " query"; +} + +ASTPtr ASTMoveAccessEntityQuery::clone() const +{ + auto res = std::make_shared(*this); + + if (row_policy_names) + res->row_policy_names = std::static_pointer_cast(row_policy_names->clone()); + + return res; +} + +void ASTMoveAccessEntityQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") + << "MOVE " << AccessEntityTypeInfo::get(type).name + << (settings.hilite ? hilite_none : ""); + + if (type == AccessEntityType::ROW_POLICY) + { + settings.ostr << " "; + row_policy_names->format(settings); + } + else + formatNames(names, settings); + + settings.ostr << (settings.hilite ? hilite_keyword : "") + << " TO " << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(storage_name); + + formatOnCluster(settings); +} + +void ASTMoveAccessEntityQuery::replaceEmptyDatabase(const String & current_database) const +{ + if (row_policy_names) + row_policy_names->replaceEmptyDatabase(current_database); +} +} diff --git a/src/Parsers/Access/ASTMoveAccessEntityQuery.h b/src/Parsers/Access/ASTMoveAccessEntityQuery.h new file mode 100644 index 00000000000..aa2b3b0f98c --- /dev/null +++ b/src/Parsers/Access/ASTMoveAccessEntityQuery.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +class ASTRowPolicyNames; + +/** MOVE {USER | ROLE | QUOTA | [ROW] POLICY | [SETTINGS] PROFILE} [IF EXISTS] name [,...] [ON [database.]table [,...]] TO storage_name + */ +class ASTMoveAccessEntityQuery : public IAST, public ASTQueryWithOnCluster +{ +public: + AccessEntityType type; + Strings names; + std::shared_ptr row_policy_names; + + String storage_name; + + String getID(char) const override; + ASTPtr clone() const override; + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster(clone()); } + + void replaceEmptyDatabase(const String & current_database) const; + + QueryKind getQueryKind() const override { return QueryKind::Move; } +}; +} diff --git a/src/Parsers/Access/ParserCreateQuotaQuery.cpp b/src/Parsers/Access/ParserCreateQuotaQuery.cpp index 2330ba7771c..bc3512e3fc0 100644 --- a/src/Parsers/Access/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/Access/ParserCreateQuotaQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -108,7 +109,8 @@ namespace }); } - template || std::is_same_v>> + template + requires std::same_as || std::same_as T fieldToNumber(const Field & f) { if (f.getType() == Field::Types::String) @@ -288,6 +290,7 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe std::optional key_type; std::vector all_limits; String cluster; + String storage_name; while (true) { @@ -310,6 +313,9 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -332,6 +338,7 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe query->key_type = key_type; query->all_limits = std::move(all_limits); query->roles = std::move(roles); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserCreateRoleQuery.cpp b/src/Parsers/Access/ParserCreateRoleQuery.cpp index da9749958ee..99a97f6901d 100644 --- a/src/Parsers/Access/ParserCreateRoleQuery.cpp +++ b/src/Parsers/Access/ParserCreateRoleQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -91,6 +92,7 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec String new_name; std::shared_ptr settings; String cluster; + String storage_name; while (true) { @@ -110,6 +112,9 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -125,6 +130,7 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->names = std::move(names); query->new_name = std::move(new_name); query->settings = std::move(settings); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp b/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp index 2c25fc14e7d..f9e9466e35d 100644 --- a/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp +++ b/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -245,6 +246,7 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & String new_short_name; std::optional is_restrictive; std::vector> filters; + String storage_name; while (true) { @@ -271,6 +273,9 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -294,6 +299,7 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->is_restrictive = is_restrictive; query->filters = std::move(filters); query->roles = std::move(roles); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp b/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp index c58a3035dc6..fcb6ebd8806 100644 --- a/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp +++ b/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -111,6 +112,7 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec String new_name; std::shared_ptr settings; String cluster; + String storage_name; while (true) { @@ -130,6 +132,9 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -152,6 +157,7 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec query->new_name = std::move(new_name); query->settings = std::move(settings); query->to_roles = std::move(to_roles); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 0344fb99c04..577b6772ab2 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -414,6 +415,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec std::shared_ptr grantees; std::shared_ptr default_database; String cluster; + String storage_name; while (true) { @@ -480,6 +482,9 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec } } + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -514,6 +519,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->settings = std::move(settings); query->grantees = std::move(grantees); query->default_database = std::move(default_database); + query->storage_name = std::move(storage_name); if (query->auth_data) query->children.push_back(query->auth_data); diff --git a/src/Parsers/Access/ParserDropAccessEntityQuery.cpp b/src/Parsers/Access/ParserDropAccessEntityQuery.cpp index d21164bc1a2..7fd34a6f8e4 100644 --- a/src/Parsers/Access/ParserDropAccessEntityQuery.cpp +++ b/src/Parsers/Access/ParserDropAccessEntityQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -53,6 +54,7 @@ bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & Strings names; std::shared_ptr row_policy_names; + String storage_name; String cluster; if ((type == AccessEntityType::USER) || (type == AccessEntityType::ROLE)) @@ -76,6 +78,9 @@ bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & return false; } + if (ParserKeyword{"FROM"}.ignore(pos, expected)) + parseAccessStorageName(pos, expected, storage_name); + if (cluster.empty()) parseOnCluster(pos, expected, cluster); @@ -87,6 +92,7 @@ bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->cluster = std::move(cluster); query->names = std::move(names); query->row_policy_names = std::move(row_policy_names); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserMoveAccessEntityQuery.cpp b/src/Parsers/Access/ParserMoveAccessEntityQuery.cpp new file mode 100644 index 00000000000..798e200bfb3 --- /dev/null +++ b/src/Parsers/Access/ParserMoveAccessEntityQuery.cpp @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace +{ + bool parseEntityType(IParserBase::Pos & pos, Expected & expected, AccessEntityType & type) + { + for (auto i : collections::range(AccessEntityType::MAX)) + { + const auto & type_info = AccessEntityTypeInfo::get(i); + if (ParserKeyword{type_info.name}.ignore(pos, expected) + || (!type_info.alias.empty() && ParserKeyword{type_info.alias}.ignore(pos, expected))) + { + type = i; + return true; + } + } + return false; + } + + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } +} + + +bool ParserMoveAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserKeyword{"MOVE"}.ignore(pos, expected)) + return false; + + AccessEntityType type; + if (!parseEntityType(pos, expected, type)) + return false; + + Strings names; + std::shared_ptr row_policy_names; + String storage_name; + String cluster; + + if ((type == AccessEntityType::USER) || (type == AccessEntityType::ROLE)) + { + if (!parseUserNames(pos, expected, names)) + return false; + } + else if (type == AccessEntityType::ROW_POLICY) + { + ParserRowPolicyNames parser; + ASTPtr ast; + parser.allowOnCluster(); + if (!parser.parse(pos, ast, expected)) + return false; + row_policy_names = typeid_cast>(ast); + cluster = std::exchange(row_policy_names->cluster, ""); + } + else + { + if (!parseIdentifiersOrStringLiterals(pos, expected, names)) + return false; + } + + if (!ParserKeyword{"TO"}.ignore(pos, expected) || !parseAccessStorageName(pos, expected, storage_name)) + return false; + + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + + auto query = std::make_shared(); + node = query; + + query->type = type; + query->cluster = std::move(cluster); + query->names = std::move(names); + query->row_policy_names = std::move(row_policy_names); + query->storage_name = std::move(storage_name); + + return true; +} +} diff --git a/src/Parsers/Access/ParserMoveAccessEntityQuery.h b/src/Parsers/Access/ParserMoveAccessEntityQuery.h new file mode 100644 index 00000000000..3accec883dd --- /dev/null +++ b/src/Parsers/Access/ParserMoveAccessEntityQuery.h @@ -0,0 +1,17 @@ +#pragma once + +#include + + +namespace DB +{ +/** Parses queries like + * MOVE {USER | ROLE | QUOTA | [ROW] POLICY | [SETTINGS] PROFILE} [IF EXISTS] name [,...] [ON [database.]table [,...]] TO storage_name + */ +class ParserMoveAccessEntityQuery : public IParserBase +{ +protected: + const char * getName() const override { return "MOVE access entity query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/src/Parsers/Access/parseUserName.h b/src/Parsers/Access/parseUserName.h index fbad78e7bce..10c548d8826 100644 --- a/src/Parsers/Access/parseUserName.h +++ b/src/Parsers/Access/parseUserName.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 3a7e8790bb4..587908eb49c 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -243,6 +243,38 @@ bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } +bool ParserTableAsStringLiteralIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::StringLiteral) + return false; + + ReadBufferFromMemory in(pos->begin, pos->size()); + String s; + + if (!tryReadQuotedStringInto(s, in)) + { + expected.add(pos, "string literal"); + return false; + } + + if (in.count() != pos->size()) + { + expected.add(pos, "string literal"); + return false; + } + + if (s.empty()) + { + expected.add(pos, "non-empty string literal"); + return false; + } + + node = std::make_shared(s); + ++pos; + return true; +} + + bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr id_list; @@ -1900,6 +1932,39 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } +bool ParserMySQLComment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::QuotedIdentifier && pos->type != TokenType::StringLiteral) + return false; + String s; + ReadBufferFromMemory in(pos->begin, pos->size()); + try + { + if (pos->type == TokenType::StringLiteral) + readQuotedStringWithSQLStyle(s, in); + else + readDoubleQuotedStringWithSQLStyle(s, in); + } + catch (const Exception &) + { + expected.add(pos, "string literal or double quoted string"); + return false; + } + + if (in.count() != pos->size()) + { + expected.add(pos, "string literal or double quoted string"); + return false; + } + + auto literal = std::make_shared(s); + literal->begin = pos; + literal->end = ++pos; + node = literal; + return true; +} + + bool ParserMySQLGlobalVariable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (pos->type != TokenType::DoubleAt) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index cc88faf2653..2d06b26c6dc 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -34,6 +34,19 @@ protected: }; +/** An identifier for tables written as string literal, for example, 'mytable.avro' + */ +class ParserTableAsStringLiteralIdentifier : public IParserBase +{ +public: + explicit ParserTableAsStringLiteralIdentifier() {} + +protected: + const char * getName() const override { return "string literal table identifier"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + /** An identifier, possibly containing a dot, for example, x_yz123 or `something special` or Hits.EventTime, * possibly with UUID clause like `db name`.`table name` UUID 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' */ @@ -367,6 +380,21 @@ protected: }; +/** MySQL comment: + * CREATE TABLE t ( + * i INT PRIMARY KEY, + * first_name VARCHAR(255) COMMENT 'FIRST_NAME', + * last_name VARCHAR(255) COMMENT "LAST_NAME" + * ) + */ +class ParserMySQLComment : public IParserBase +{ +protected: + const char * getName() const override { return "MySQL comment parser"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + /** MySQL-style global variable: @@var */ class ParserMySQLGlobalVariable : public IParserBase diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index cd399531064..3ebf5571eae 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -865,6 +865,10 @@ public: if (!ParserKeyword("FROM").ignore(test_pos, test_expected)) return true; + // If there is a comma after 'from' then the first one was a name of a column + if (test_pos->type == TokenType::Comma) + return true; + /// If we parse a second FROM then the first one was a name of a column if (ParserKeyword("FROM").ignore(test_pos, test_expected)) return true; @@ -2332,12 +2336,14 @@ const std::vector> ParserExpressionImpl::o {":", Operator("if", 3, 3, OperatorType::FinishIf)}, {"OR", Operator("or", 3, 2, OperatorType::Mergeable)}, {"AND", Operator("and", 4, 2, OperatorType::Mergeable)}, + {"IS NOT DISTINCT FROM", Operator("isNotDistinctFrom", 6, 2)}, {"IS NULL", Operator("isNull", 6, 1, OperatorType::IsNull)}, {"IS NOT NULL", Operator("isNotNull", 6, 1, OperatorType::IsNull)}, {"BETWEEN", Operator("", 7, 0, OperatorType::StartBetween)}, {"NOT BETWEEN", Operator("", 7, 0, OperatorType::StartNotBetween)}, {"==", Operator("equals", 9, 2, OperatorType::Comparison)}, {"!=", Operator("notEquals", 9, 2, OperatorType::Comparison)}, + {"<=>", Operator("isNotDistinctFrom", 9, 2, OperatorType::Comparison)}, {"<>", Operator("notEquals", 9, 2, OperatorType::Comparison)}, {"<=", Operator("lessOrEquals", 9, 2, OperatorType::Comparison)}, {">=", Operator("greaterOrEquals", 9, 2, OperatorType::Comparison)}, diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index bf4d6fc9dec..a494a528cd2 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -118,9 +118,7 @@ IAST::Hash IAST::getTreeHash() const { SipHash hash_state; updateTreeHash(hash_state); - IAST::Hash res; - hash_state.get128(res); - return res; + return getSipHash128AsPair(hash_state); } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index d217876459f..812fd082476 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -78,7 +79,7 @@ public: /** Get hash code, identifying this element and its subtree. */ - using Hash = std::pair; + using Hash = CityHash_v1_0_2::uint128; Hash getTreeHash() const; void updateTreeHash(SipHash & hash_state) const; virtual void updateTreeHashImpl(SipHash & hash_state) const; @@ -197,6 +198,7 @@ public: IdentifierQuotingStyle identifier_quoting_style; bool show_secrets; /// Show secret parts of the AST (e.g. passwords, encryption keys). char nl_or_ws; /// Newline or whitespace. + LiteralEscapingStyle literal_escaping_style; explicit FormatSettings( WriteBuffer & ostr_, @@ -204,7 +206,8 @@ public: bool hilite_ = false, bool always_quote_identifiers_ = false, IdentifierQuotingStyle identifier_quoting_style_ = IdentifierQuotingStyle::Backticks, - bool show_secrets_ = true) + bool show_secrets_ = true, + LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular) : ostr(ostr_) , one_line(one_line_) , hilite(hilite_) @@ -212,6 +215,7 @@ public: , identifier_quoting_style(identifier_quoting_style_) , show_secrets(show_secrets_) , nl_or_ws(one_line ? ' ' : '\n') + , literal_escaping_style(literal_escaping_style_) { } @@ -223,6 +227,7 @@ public: , identifier_quoting_style(other.identifier_quoting_style) , show_secrets(other.show_secrets) , nl_or_ws(other.nl_or_ws) + , literal_escaping_style(other.literal_escaping_style) { } @@ -302,6 +307,7 @@ public: Alter, Grant, Revoke, + Move, System, Set, Use, diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9796ae10c07..72e25cc3cf9 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -31,10 +31,10 @@ protected: not_endswith, endswith_cs, not_endswith_cs, - equal, //=~ - not_equal,//!~ - equal_cs, //= - not_equal_cs,//!= + equal, /// =~ + not_equal, /// !~ + equal_cs, /// = + not_equal_cs, /// != has, not_has, has_all, @@ -49,10 +49,10 @@ protected: not_hassuffix, hassuffix_cs, not_hassuffix_cs, - in_cs, //in - not_in_cs, //!in - in, //in~ - not_in ,//!in~ + in_cs, /// in + not_in_cs, /// !in + in, /// in~ + not_in, /// !in~ matches_regex, startswith, not_startswith, diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index be67807ad8f..2871df77d1a 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -375,9 +375,14 @@ Token Lexer::nextTokenImpl() return Token(TokenType::NotEquals, token_begin, ++pos); return Token(TokenType::ErrorSingleExclamationMark, token_begin, pos); } - case '<': /// <, <=, <> + case '<': /// <, <=, <>, <=> { ++pos; + if (pos + 1 < end && *pos == '=' && *(pos + 1) == '>') + { + pos += 2; + return Token(TokenType::Spaceship, token_begin, pos); + } if (pos < end && *pos == '=') return Token(TokenType::LessOrEquals, token_begin, ++pos); if (pos < end && *pos == '>') diff --git a/src/Parsers/Lexer.h b/src/Parsers/Lexer.h index 0c439ca0677..e228dba6c1f 100644 --- a/src/Parsers/Lexer.h +++ b/src/Parsers/Lexer.h @@ -51,6 +51,7 @@ namespace DB M(Greater) \ M(LessOrEquals) \ M(GreaterOrEquals) \ + M(Spaceship) /** <=>. Used in MySQL for NULL-safe equality comparison. */ \ M(PipeMark) \ M(Concatenation) /** String concatenation operator: || */ \ \ diff --git a/src/Parsers/LiteralEscapingStyle.h b/src/Parsers/LiteralEscapingStyle.h new file mode 100644 index 00000000000..10d4d84a85d --- /dev/null +++ b/src/Parsers/LiteralEscapingStyle.h @@ -0,0 +1,14 @@ +#pragma once + + +namespace DB +{ + +/// Method to escape single quotes. +enum class LiteralEscapingStyle +{ + Regular, /// Escape backslashes with backslash (\\) and quotes with backslash (\') + PostgreSQL, /// Do not escape backslashes (\), escape quotes with quote ('') +}; + +} diff --git a/src/Parsers/MySQL/ASTDeclareColumn.cpp b/src/Parsers/MySQL/ASTDeclareColumn.cpp index e585dcb670c..e5f2b7870e2 100644 --- a/src/Parsers/MySQL/ASTDeclareColumn.cpp +++ b/src/Parsers/MySQL/ASTDeclareColumn.cpp @@ -50,7 +50,7 @@ static inline bool parseColumnDeclareOptions(IParser::Pos & pos, ASTPtr & node, OptionDescribe("PRIMARY KEY", "primary_key", std::make_unique()), OptionDescribe("UNIQUE", "unique_key", std::make_unique()), OptionDescribe("KEY", "primary_key", std::make_unique()), - OptionDescribe("COMMENT", "comment", std::make_unique()), + OptionDescribe("COMMENT", "comment", std::make_unique()), OptionDescribe("CHARACTER SET", "charset_name", std::make_unique()), OptionDescribe("CHARSET", "charset", std::make_unique()), OptionDescribe("COLLATE", "collate", std::make_unique()), diff --git a/src/Parsers/ParserAlterNamedCollectionQuery.cpp b/src/Parsers/ParserAlterNamedCollectionQuery.cpp index 9108747ad82..8fb84f86246 100644 --- a/src/Parsers/ParserAlterNamedCollectionQuery.cpp +++ b/src/Parsers/ParserAlterNamedCollectionQuery.cpp @@ -13,8 +13,9 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod { ParserKeyword s_alter("ALTER"); ParserKeyword s_collection("NAMED COLLECTION"); + ParserKeyword s_if_exists("IF EXISTS"); + ParserKeyword s_on("ON"); ParserKeyword s_delete("DELETE"); - ParserIdentifier name_p; ParserSetQuery set_p; ParserToken s_comma(TokenType::Comma); @@ -32,10 +33,13 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod if (!s_collection.ignore(pos, expected)) return false; + if (s_if_exists.ignore(pos, expected)) + if_exists = true; + if (!name_p.parse(pos, collection_name, expected)) return false; - if (ParserKeyword{"ON"}.ignore(pos, expected)) + if (s_on.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index d2ae7f972b7..81954e3c247 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -66,6 +66,8 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected { if (index->type && index->type->name == "annoy") index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY; + else if (index->type && index->type->name == "usearch") + index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY; else index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY; } @@ -80,6 +82,7 @@ bool ParserCreateIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect node = query; ParserKeyword s_create("CREATE"); + ParserKeyword s_unique("UNIQUE"); ParserKeyword s_index("INDEX"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); ParserKeyword s_on("ON"); @@ -91,10 +94,14 @@ bool ParserCreateIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect String cluster_str; bool if_not_exists = false; + bool unique = false; if (!s_create.ignore(pos, expected)) return false; + if (s_unique.ignore(pos, expected)) + unique = true; + if (!s_index.ignore(pos, expected)) return false; @@ -131,6 +138,7 @@ bool ParserCreateIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect query->children.push_back(index_decl); query->if_not_exists = if_not_exists; + query->unique = unique; query->cluster = cluster_str; if (query->database) diff --git a/src/Parsers/ParserCreateIndexQuery.h b/src/Parsers/ParserCreateIndexQuery.h index 3cb91cd03c6..701586d6e11 100644 --- a/src/Parsers/ParserCreateIndexQuery.h +++ b/src/Parsers/ParserCreateIndexQuery.h @@ -6,7 +6,7 @@ namespace DB { /** Query like this: - * CREATE INDEX [IF NOT EXISTS] name ON [db].name (expression) TYPE type GRANULARITY value + * CREATE [UNIQUE] INDEX [IF NOT EXISTS] name ON [db].name (expression) TYPE type GRANULARITY value */ class ParserCreateIndexQuery : public IParserBase diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 415d3321eb5..44f375adb65 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -148,6 +149,8 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe { if (index->type->name == "annoy") index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY; + else if (index->type->name == "usearch") + index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY; else index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY; } @@ -222,17 +225,69 @@ bool ParserProjectionDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & return true; } +bool ParserForeignKeyDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_references("REFERENCES"); + ParserCompoundIdentifier table_name_p(true, true); + ParserExpression expression_p; + + ASTPtr name; + ASTPtr expr; + + if (!expression_p.parse(pos, expr, expected)) + return false; + + if (!s_references.ignore(pos, expected)) + return false; + + if (!table_name_p.parse(pos, name, expected)) + return false; + + if (!expression_p.parse(pos, expr, expected)) + return false; + + ParserKeyword s_on("ON"); + while (s_on.ignore(pos, expected)) + { + ParserKeyword s_delete("DELETE"); + ParserKeyword s_update("UPDATE"); + + if (!s_delete.ignore(pos, expected) && !s_update.ignore(pos, expected)) + return false; + + ParserKeyword s_restrict("RESTRICT"); + ParserKeyword s_cascade("CASCADE"); + ParserKeyword s_set_null("SET NULL"); + ParserKeyword s_no_action("NO ACTION"); + ParserKeyword s_set_default("SET DEFAULT"); + + if (!s_restrict.ignore(pos, expected) && !s_cascade.ignore(pos, expected) && + !s_set_null.ignore(pos, expected) && !s_no_action.ignore(pos, expected) && + !s_set_default.ignore(pos, expected)) + { + return false; + } + } + + auto foreign_key = std::make_shared(); + foreign_key->name = "Foreign Key"; + node = foreign_key; + + return true; +} bool ParserTablePropertyDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_index("INDEX"); ParserKeyword s_constraint("CONSTRAINT"); ParserKeyword s_projection("PROJECTION"); + ParserKeyword s_foreign_key("FOREIGN KEY"); ParserKeyword s_primary_key("PRIMARY KEY"); ParserIndexDeclaration index_p; ParserConstraintDeclaration constraint_p; ParserProjectionDeclaration projection_p; + ParserForeignKeyDeclaration foreign_key_p; ParserColumnDeclaration column_p{true, true}; ParserExpression primary_key_p; @@ -258,6 +313,11 @@ bool ParserTablePropertyDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expecte if (!primary_key_p.parse(pos, new_node, expected)) return false; } + else if (s_foreign_key.ignore(pos, expected)) + { + if (!foreign_key_p.parse(pos, new_node, expected)) + return false; + } else { if (!column_p.parse(pos, new_node, expected)) @@ -321,6 +381,11 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E constraints->children.push_back(elem); else if (elem->as()) projections->children.push_back(elem); + else if (elem->as()) + { + /// Ignore the foreign key node + continue; + } else if (elem->as() || elem->as()) { if (primary_key) @@ -461,6 +526,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) break; } + // If any part of storage definition is found create storage node if (!storage_like) return false; @@ -502,7 +568,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ParserKeyword s_temporary("TEMPORARY"); ParserKeyword s_table("TABLE"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); - ParserCompoundIdentifier table_name_p(true, true); + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); ParserKeyword s_from("FROM"); ParserKeyword s_on("ON"); ParserToken s_dot(TokenType::Dot); @@ -740,7 +806,7 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e ParserKeyword s_create("CREATE"); ParserKeyword s_attach("ATTACH"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); - ParserCompoundIdentifier table_name_p(true, true); + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); ParserKeyword s_as("AS"); ParserKeyword s_view("VIEW"); ParserKeyword s_live("LIVE"); @@ -878,7 +944,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserKeyword s_temporary("TEMPORARY"); ParserKeyword s_attach("ATTACH"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); - ParserCompoundIdentifier table_name_p(true); + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); ParserKeyword s_as("AS"); ParserKeyword s_view("VIEW"); ParserKeyword s_window("WINDOW"); @@ -1015,12 +1081,17 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->if_not_exists = if_not_exists; query->is_window_view = true; - StorageID table_id = table->as()->getTableId(); - query->setDatabase(table_id.database_name); - query->setTable(table_id.table_name); - query->uuid = table_id.uuid; + auto * table_id = table->as(); + query->database = table_id->getDatabase(); + query->table = table_id->getTable(); + query->uuid = table_id->uuid; query->cluster = cluster_str; + if (query->database) + query->children.push_back(query->database); + if (query->table) + query->children.push_back(query->table); + if (to_table) query->to_table_id = to_table->as()->getTableId(); @@ -1265,7 +1336,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserKeyword s_create("CREATE"); ParserKeyword s_attach("ATTACH"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); - ParserCompoundIdentifier table_name_p(true, true); + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); + ParserCompoundIdentifier to_table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ false); ParserKeyword s_as("AS"); ParserKeyword s_view("VIEW"); ParserKeyword s_materialized("MATERIALIZED"); @@ -1421,15 +1493,17 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_create("CREATE"); - ParserKeyword s_attach("ATTACH"); ParserKeyword s_named_collection("NAMED COLLECTION"); + ParserKeyword s_if_not_exists("IF NOT EXISTS"); + ParserKeyword s_on("ON"); ParserKeyword s_as("AS"); - - ParserToken s_comma(TokenType::Comma); ParserIdentifier name_p; + ParserToken s_comma(TokenType::Comma); + + String cluster_str; + bool if_not_exists = false; ASTPtr collection_name; - String cluster_str; if (!s_create.ignore(pos, expected)) return false; @@ -1437,10 +1511,13 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec if (!s_named_collection.ignore(pos, expected)) return false; + if (s_if_not_exists.ignore(pos, expected)) + if_not_exists = true; + if (!name_p.parse(pos, collection_name, expected)) return false; - if (ParserKeyword{"ON"}.ignore(pos, expected)) + if (s_on.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; @@ -1465,7 +1542,9 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec auto query = std::make_shared(); tryGetIdentifierNameInto(collection_name, query->collection_name); + query->if_not_exists = if_not_exists; query->changes = changes; + query->cluster = std::move(cluster_str); node = query; return true; @@ -1480,7 +1559,7 @@ bool ParserCreateDictionaryQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, E ParserKeyword s_dictionary("DICTIONARY"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); ParserKeyword s_on("ON"); - ParserCompoundIdentifier dict_name_p(true, true); + ParserCompoundIdentifier dict_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); ParserToken s_left_paren(TokenType::OpeningRoundBracket); ParserToken s_right_paren(TokenType::ClosingRoundBracket); ParserToken s_dot(TokenType::Dot); diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 09935e2b608..4062ed25c6b 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -403,6 +403,13 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserForeignKeyDeclaration : public IParserBase +{ +protected: + const char * getName() const override { return "foreign key declaration"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + class ParserTablePropertyDeclaration : public IParserBase { protected: @@ -548,6 +555,7 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +/// CREATE NAMED COLLECTION name [ON CLUSTER cluster] class ParserCreateNamedCollectionQuery : public IParserBase { protected: diff --git a/src/Parsers/ParserDropNamedCollectionQuery.cpp b/src/Parsers/ParserDropNamedCollectionQuery.cpp index 1ea8aa6d75d..b0b010b5ef6 100644 --- a/src/Parsers/ParserDropNamedCollectionQuery.cpp +++ b/src/Parsers/ParserDropNamedCollectionQuery.cpp @@ -12,6 +12,7 @@ bool ParserDropNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node ParserKeyword s_drop("DROP"); ParserKeyword s_collection("NAMED COLLECTION"); ParserKeyword s_if_exists("IF EXISTS"); + ParserKeyword s_on("ON"); ParserIdentifier name_p; String cluster_str; @@ -31,7 +32,7 @@ bool ParserDropNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node if (!name_p.parse(pos, collection_name, expected)) return false; - if (ParserKeyword{"ON"}.ignore(pos, expected)) + if (s_on.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; diff --git a/src/Parsers/ParserQuery.cpp b/src/Parsers/ParserQuery.cpp index 77e7b58e6b1..7ed69940bed 100644 --- a/src/Parsers/ParserQuery.cpp +++ b/src/Parsers/ParserQuery.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -54,6 +55,7 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserCreateIndexQuery create_index_p; ParserDropIndexQuery drop_index_p; ParserDropAccessEntityQuery drop_access_entity_p; + ParserMoveAccessEntityQuery move_access_entity_p; ParserGrantQuery grant_p; ParserSetRoleQuery set_role_p; ParserExternalDDLQuery external_ddl_p; @@ -80,6 +82,7 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) || create_index_p.parse(pos, node, expected) || drop_index_p.parse(pos, node, expected) || drop_access_entity_p.parse(pos, node, expected) + || move_access_entity_p.parse(pos, node, expected) || grant_p.parse(pos, node, expected) || external_ddl_p.parse(pos, node, expected) || transaction_control_p.parse(pos, node, expected) diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index a2391495071..9a71bc222b5 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserShowTablesQuery show_tables_p; ParserShowColumnsQuery show_columns_p; ParserShowEnginesQuery show_engine_p; + ParserShowFunctionsQuery show_functions_p; ParserShowIndexesQuery show_indexes_p; ParserSelectWithUnionQuery select_p; ParserTablePropertiesQuery table_p; @@ -71,6 +73,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec || show_tables_p.parse(pos, query, expected) || show_columns_p.parse(pos, query, expected) || show_engine_p.parse(pos, query, expected) + || show_functions_p.parse(pos, query, expected) || show_indexes_p.parse(pos, query, expected) || table_p.parse(pos, query, expected) || describe_cache_p.parse(pos, query, expected) diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 1c48f773823..341c1ef60b4 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -292,6 +292,9 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// This is needed for TOP expression, because it can also use WITH TIES. bool limit_with_ties_occured = false; + bool has_offset_clause = false; + bool offset_clause_has_sql_standard_row_or_rows = false; /// OFFSET offset_row_count {ROW | ROWS} + /// LIMIT length | LIMIT offset, length | LIMIT count BY expr-list | LIMIT offset, length BY expr-list if (s_limit.ignore(pos, expected)) { @@ -316,6 +319,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (!exp_elem.parse(pos, limit_offset, expected)) return false; + + has_offset_clause = true; } else if (s_with_ties.ignore(pos, expected)) { @@ -351,60 +356,65 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (s_offset.ignore(pos, expected)) { - /// OFFSET offset_row_count {ROW | ROWS} FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES} - bool offset_with_fetch_maybe = false; + /// OFFSET without LIMIT + + has_offset_clause = true; if (!exp_elem.parse(pos, limit_offset, expected)) return false; + /// SQL standard OFFSET N ROW[S] ... + + if (s_row.ignore(pos, expected)) + offset_clause_has_sql_standard_row_or_rows = true; + + if (s_rows.ignore(pos, expected)) + { + if (offset_clause_has_sql_standard_row_or_rows) + throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together"); + + offset_clause_has_sql_standard_row_or_rows = true; + } + } + + /// SQL standard FETCH (either following SQL standard OFFSET or following ORDER BY) + if ((!has_offset_clause || offset_clause_has_sql_standard_row_or_rows) + && s_fetch.ignore(pos, expected)) + { + /// FETCH clause must exist with "ORDER BY" + if (!order_expression_list) + throw Exception(ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY, "Can not use OFFSET FETCH clause without ORDER BY"); + + if (s_first.ignore(pos, expected)) + { + if (s_next.ignore(pos, expected)) + throw Exception(ErrorCodes::FIRST_AND_NEXT_TOGETHER, "Can not use FIRST and NEXT together"); + } + else if (!s_next.ignore(pos, expected)) + return false; + + if (!exp_elem.parse(pos, limit_length, expected)) + return false; + if (s_row.ignore(pos, expected)) { if (s_rows.ignore(pos, expected)) throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together"); - offset_with_fetch_maybe = true; } - else if (s_rows.ignore(pos, expected)) + else if (!s_rows.ignore(pos, expected)) + return false; + + if (s_with_ties.ignore(pos, expected)) { - offset_with_fetch_maybe = true; + select_query->limit_with_ties = true; } - - if (offset_with_fetch_maybe && s_fetch.ignore(pos, expected)) + else if (s_only.ignore(pos, expected)) { - /// OFFSET FETCH clause must exists with "ORDER BY" - if (!order_expression_list) - throw Exception(ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY, "Can not use OFFSET FETCH clause without ORDER BY"); - - if (s_first.ignore(pos, expected)) - { - if (s_next.ignore(pos, expected)) - throw Exception(ErrorCodes::FIRST_AND_NEXT_TOGETHER, "Can not use FIRST and NEXT together"); - } - else if (!s_next.ignore(pos, expected)) - return false; - - if (!exp_elem.parse(pos, limit_length, expected)) - return false; - - if (s_row.ignore(pos, expected)) - { - if (s_rows.ignore(pos, expected)) - throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together"); - } - else if (!s_rows.ignore(pos, expected)) - return false; - - if (s_with_ties.ignore(pos, expected)) - { - select_query->limit_with_ties = true; - } - else if (s_only.ignore(pos, expected)) - { - select_query->limit_with_ties = false; - } - else - { - return false; - } + select_query->limit_with_ties = false; + } + else + { + return false; } } diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index 727d037112f..4df74c2dd82 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -215,7 +215,7 @@ bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & p else if (ParserKeyword("FALSE").ignore(pos, expected)) value = std::make_shared(Field(static_cast(0))); /// for SETTINGS disk=disk(type='s3', path='', ...) - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name.starts_with("disk")) + else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") { tryGetIdentifierNameInto(name, change.name); change.value = createFieldFromAST(function_ast); @@ -280,7 +280,7 @@ bool ParserSetQuery::parseNameValuePairWithParameterOrDefault( node = std::make_shared(Field(static_cast(1))); else if (ParserKeyword("FALSE").ignore(pos, expected)) node = std::make_shared(Field(static_cast(0))); - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name.starts_with("disk")) + else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") { change.name = name; change.value = createFieldFromAST(function_ast); diff --git a/src/Parsers/ParserShowFunctionsQuery.cpp b/src/Parsers/ParserShowFunctionsQuery.cpp new file mode 100644 index 00000000000..524d936c4f1 --- /dev/null +++ b/src/Parsers/ParserShowFunctionsQuery.cpp @@ -0,0 +1,35 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + +bool ParserShowFunctionsQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr like; + + auto query = std::make_shared(); + if (!ParserKeyword("SHOW FUNCTIONS").ignore(pos, expected)) + return false; + + if (bool insensitive = ParserKeyword("ILIKE").ignore(pos, expected); insensitive || ParserKeyword("LIKE").ignore(pos, expected)) + { + if (insensitive) + query->case_insensitive_like = true; + + if (!ParserStringLiteral().parse(pos, like, expected)) + return false; + } + + if (like) + query->like = like->as().value.safeGet(); + node = query; + + return true; +} + +} diff --git a/src/Parsers/ParserShowFunctionsQuery.h b/src/Parsers/ParserShowFunctionsQuery.h new file mode 100644 index 00000000000..25241d180db --- /dev/null +++ b/src/Parsers/ParserShowFunctionsQuery.h @@ -0,0 +1,19 @@ +#pragma once + +#include + +namespace DB +{ + +/** Parses queries of the form + * SHOW FUNCTIONS [LIKE | ILIKE ''] + */ +class ParserShowFunctionsQuery : public IParserBase +{ +protected: + const char * getName() const override { return "SHOW FUNCTIONS query"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/ParserShowTablesQuery.cpp b/src/Parsers/ParserShowTablesQuery.cpp index 3540a6d3fc8..e3728eb2cd6 100644 --- a/src/Parsers/ParserShowTablesQuery.cpp +++ b/src/Parsers/ParserShowTablesQuery.cpp @@ -36,7 +36,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserKeyword s_where("WHERE"); ParserKeyword s_limit("LIMIT"); ParserStringLiteral like_p; - ParserIdentifier name_p; + ParserIdentifier name_p(true); ParserExpressionWithOptionalAlias exp_elem(false); ASTPtr like; @@ -174,7 +174,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; } - tryGetIdentifierNameInto(database, query->from); + query->set(query->from, database); if (like) query->like = like->as().value.safeGet(); diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 48dbe60e241..36122a48804 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -379,6 +379,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::START_REPLICATED_SENDS: case Type::STOP_REPLICATION_QUEUES: case Type::START_REPLICATION_QUEUES: + case Type::STOP_PULLING_REPLICATION_LOG: + case Type::START_PULLING_REPLICATION_LOG: if (!parseQueryWithOnCluster(res, pos, expected)) return false; parseDatabaseAndTableAsAST(pos, expected, res->database, res->table); @@ -401,6 +403,24 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & break; } case Type::DROP_FILESYSTEM_CACHE: + { + ParserLiteral path_parser; + ASTPtr ast; + if (path_parser.parse(pos, ast, expected)) + { + res->filesystem_cache_name = ast->as()->value.safeGet(); + if (ParserKeyword{"KEY"}.ignore(pos, expected) && ParserIdentifier().parse(pos, ast, expected)) + { + res->key_to_drop = ast->as()->name(); + if (ParserKeyword{"OFFSET"}.ignore(pos, expected) && ParserLiteral().parse(pos, ast, expected)) + res->offset_to_drop = ast->as()->value.safeGet(); + } + } + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; + break; + } + case Type::SYNC_FILESYSTEM_CACHE: { ParserLiteral path_parser; ASTPtr ast; @@ -422,6 +442,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & res->schema_cache_storage = "HDFS"; else if (ParserKeyword{"URL"}.ignore(pos, expected)) res->schema_cache_storage = "URL"; + else if (ParserKeyword{"AZURE"}.ignore(pos, expected)) + res->schema_cache_storage = "AZURE"; else return false; } @@ -442,6 +464,81 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & break; } + case Type::START_LISTEN: + case Type::STOP_LISTEN: + { + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; + + auto parse_server_type = [&](ServerType::Type & type, std::string & custom_name) -> bool + { + type = ServerType::Type::END; + custom_name = ""; + + for (const auto & cur_type : magic_enum::enum_values()) + { + if (ParserKeyword{ServerType::serverTypeToString(cur_type)}.ignore(pos, expected)) + { + type = cur_type; + break; + } + } + + if (type == ServerType::Type::END) + return false; + + if (type == ServerType::CUSTOM) + { + ASTPtr ast; + + if (!ParserStringLiteral{}.parse(pos, ast, expected)) + return false; + + custom_name = ast->as().value.get(); + } + + return true; + }; + + ServerType::Type base_type; + std::string base_custom_name; + + ServerType::Types exclude_type; + ServerType::CustomNames exclude_custom_names; + + if (!parse_server_type(base_type, base_custom_name)) + return false; + + if (ParserKeyword{"EXCEPT"}.ignore(pos, expected)) + { + if (base_type != ServerType::Type::QUERIES_ALL && + base_type != ServerType::Type::QUERIES_DEFAULT && + base_type != ServerType::Type::QUERIES_CUSTOM) + return false; + + ServerType::Type current_type; + std::string current_custom_name; + + while (true) + { + if (!exclude_type.empty() && !ParserToken(TokenType::Comma).ignore(pos, expected)) + break; + + if (!parse_server_type(current_type, current_custom_name)) + return false; + + exclude_type.insert(current_type); + + if (current_type == ServerType::Type::CUSTOM) + exclude_custom_names.insert(current_custom_name); + } + } + + res->server_type = ServerType(base_type, base_custom_name, exclude_type, exclude_custom_names); + + break; + } + default: { if (!parseQueryWithOnCluster(res, pos, expected)) diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index 08b6f77fafa..b3ae6ca0bb9 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -24,6 +24,8 @@ bool ParserTableExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!ParserWithOptionalAlias(std::make_unique(), allow_alias_without_as_keyword).parse(pos, res->subquery, expected) && !ParserWithOptionalAlias(std::make_unique(false, true), allow_alias_without_as_keyword).parse(pos, res->table_function, expected) && !ParserWithOptionalAlias(std::make_unique(true, true), allow_alias_without_as_keyword) + .parse(pos, res->database_and_table_name, expected) + && !ParserWithOptionalAlias(std::make_unique(), allow_alias_without_as_keyword) .parse(pos, res->database_and_table_name, expected)) return false; diff --git a/src/Parsers/ParserUseQuery.cpp b/src/Parsers/ParserUseQuery.cpp index 40511d8c142..350e573281c 100644 --- a/src/Parsers/ParserUseQuery.cpp +++ b/src/Parsers/ParserUseQuery.cpp @@ -11,7 +11,7 @@ namespace DB bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_use("USE"); - ParserIdentifier name_p; + ParserIdentifier name_p{/*allow_query_parameter*/ true}; if (!s_use.ignore(pos, expected)) return false; @@ -21,7 +21,7 @@ bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; auto query = std::make_shared(); - tryGetIdentifierNameInto(database, query->database); + query->set(query->database, database); node = query; return true; diff --git a/src/Parsers/QueryParameterVisitor.cpp b/src/Parsers/QueryParameterVisitor.cpp index 1282c12cce6..b8679cc3b96 100644 --- a/src/Parsers/QueryParameterVisitor.cpp +++ b/src/Parsers/QueryParameterVisitor.cpp @@ -31,7 +31,7 @@ private: void visitQueryParameter(const ASTQueryParameter & query_parameter) { - query_parameters[query_parameter.name]= query_parameter.type; + query_parameters[query_parameter.name] = query_parameter.type; } }; diff --git a/src/Parsers/examples/CMakeLists.txt b/src/Parsers/examples/CMakeLists.txt index 82ca7bc0688..261f234081c 100644 --- a/src/Parsers/examples/CMakeLists.txt +++ b/src/Parsers/examples/CMakeLists.txt @@ -3,8 +3,8 @@ set(SRCS) clickhouse_add_executable(lexer lexer.cpp ${SRCS}) target_link_libraries(lexer PRIVATE clickhouse_parsers) -clickhouse_add_executable(select_parser select_parser.cpp ${SRCS}) -target_link_libraries(select_parser PRIVATE clickhouse_parsers) +clickhouse_add_executable(select_parser select_parser.cpp ${SRCS} "../../Server/ServerType.cpp") +target_link_libraries(select_parser PRIVATE dbms) -clickhouse_add_executable(create_parser create_parser.cpp ${SRCS}) -target_link_libraries(create_parser PRIVATE clickhouse_parsers) +clickhouse_add_executable(create_parser create_parser.cpp ${SRCS} "../../Server/ServerType.cpp") +target_link_libraries(create_parser PRIVATE dbms) diff --git a/src/Parsers/fuzzers/CMakeLists.txt b/src/Parsers/fuzzers/CMakeLists.txt index c3aa21e2a04..903319d733c 100644 --- a/src/Parsers/fuzzers/CMakeLists.txt +++ b/src/Parsers/fuzzers/CMakeLists.txt @@ -1,11 +1,11 @@ clickhouse_add_executable(lexer_fuzzer lexer_fuzzer.cpp ${SRCS}) -target_link_libraries(lexer_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE}) +target_link_libraries(lexer_fuzzer PRIVATE clickhouse_parsers) clickhouse_add_executable(select_parser_fuzzer select_parser_fuzzer.cpp ${SRCS}) -target_link_libraries(select_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE}) +target_link_libraries(select_parser_fuzzer PRIVATE clickhouse_parsers dbms) clickhouse_add_executable(create_parser_fuzzer create_parser_fuzzer.cpp ${SRCS}) -target_link_libraries(create_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE}) +target_link_libraries(create_parser_fuzzer PRIVATE clickhouse_parsers dbms) add_subdirectory(codegen_fuzzer) diff --git a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt index 727c49cfc4d..20fd951d390 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt +++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt @@ -41,5 +41,10 @@ clickhouse_add_executable(codegen_select_fuzzer ${FUZZER_SRCS}) set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier") +# contrib/libprotobuf-mutator/src/libfuzzer/libfuzzer_macro.h:143:44: error: no newline at end of file [-Werror,-Wnewline-eof] +target_compile_options (codegen_select_fuzzer PRIVATE -Wno-newline-eof) + +target_link_libraries(protoc ch_contrib::fuzzer) + target_include_directories(codegen_select_fuzzer SYSTEM BEFORE PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") -target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf_mutator ch_contrib::protoc dbms ${LIB_FUZZING_ENGINE}) +target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf_mutator ch_contrib::protoc dbms) diff --git a/src/Parsers/fuzzers/codegen_fuzzer/clickhouse-template.g b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse-template.g index 79fd775b1da..a2068d391cd 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/clickhouse-template.g +++ b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse-template.g @@ -66,6 +66,7 @@ $1 " as " $2 ; "=="; "!="; "<>"; +"<=>" "<"; "<="; ">"; diff --git a/src/Parsers/fuzzers/create_parser_fuzzer.cpp b/src/Parsers/fuzzers/create_parser_fuzzer.cpp index 13cb1dfd36e..854885ad33b 100644 --- a/src/Parsers/fuzzers/create_parser_fuzzer.cpp +++ b/src/Parsers/fuzzers/create_parser_fuzzer.cpp @@ -8,27 +8,28 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) -try { - std::string input = std::string(reinterpret_cast(data), size); + try + { + std::string input = std::string(reinterpret_cast(data), size); - DB::ParserCreateQuery parser; - DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 1000); + DB::ParserCreateQuery parser; + DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 1000); - const UInt64 max_ast_depth = 1000; - ast->checkDepth(max_ast_depth); + const UInt64 max_ast_depth = 1000; + ast->checkDepth(max_ast_depth); - const UInt64 max_ast_elements = 50000; - ast->checkSize(max_ast_elements); + const UInt64 max_ast_elements = 50000; + ast->checkSize(max_ast_elements); - DB::WriteBufferFromOwnString wb; - DB::formatAST(*ast, wb); + DB::WriteBufferFromOwnString wb; + DB::formatAST(*ast, wb); - std::cerr << wb.str() << std::endl; + std::cerr << wb.str() << std::endl; + } + catch (...) + { + } return 0; } -catch (...) -{ - return 1; -} diff --git a/src/Parsers/fuzzers/lexer_fuzzer.cpp b/src/Parsers/fuzzers/lexer_fuzzer.cpp index d7dd2cfe970..0f9471a8e4f 100644 --- a/src/Parsers/fuzzers/lexer_fuzzer.cpp +++ b/src/Parsers/fuzzers/lexer_fuzzer.cpp @@ -8,21 +8,27 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) { - DB::String query; - DB::ReadBufferFromMemory in(data, size); - readStringUntilEOF(query, in); - - DB::Lexer lexer(query.data(), query.data() + query.size()); - - while (true) + try { - DB::Token token = lexer.nextToken(); + DB::String query; + DB::ReadBufferFromMemory in(data, size); + readStringUntilEOF(query, in); - if (token.isEnd()) - break; + DB::Lexer lexer(query.data(), query.data() + query.size()); - if (token.isError()) - return 1; + while (true) + { + DB::Token token = lexer.nextToken(); + + if (token.isEnd()) + break; + + if (token.isError()) + return 0; + } + } + catch (...) + { } return 0; diff --git a/src/Parsers/fuzzers/select_parser_fuzzer.cpp b/src/Parsers/fuzzers/select_parser_fuzzer.cpp index 3f712834c55..ae490ed4e56 100644 --- a/src/Parsers/fuzzers/select_parser_fuzzer.cpp +++ b/src/Parsers/fuzzers/select_parser_fuzzer.cpp @@ -7,29 +7,30 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) -try { - std::string input = std::string(reinterpret_cast(data), size); + try + { + std::string input = std::string(reinterpret_cast(data), size); - DB::ParserQueryWithOutput parser(input.data() + input.size()); + DB::ParserQueryWithOutput parser(input.data() + input.size()); - const UInt64 max_parser_depth = 1000; - DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, max_parser_depth); + const UInt64 max_parser_depth = 1000; + DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, max_parser_depth); - const UInt64 max_ast_depth = 1000; - ast->checkDepth(max_ast_depth); + const UInt64 max_ast_depth = 1000; + ast->checkDepth(max_ast_depth); - const UInt64 max_ast_elements = 50000; - ast->checkSize(max_ast_elements); + const UInt64 max_ast_elements = 50000; + ast->checkSize(max_ast_elements); - DB::WriteBufferFromOwnString wb; - DB::formatAST(*ast, wb); + DB::WriteBufferFromOwnString wb; + DB::formatAST(*ast, wb); - std::cerr << wb.str() << std::endl; + std::cerr << wb.str() << std::endl; + } + catch (...) + { + } return 0; } -catch (...) -{ - return 1; -} diff --git a/src/Parsers/obfuscateQueries.cpp b/src/Parsers/obfuscateQueries.cpp index da793a4d6bd..a6806a628bf 100644 --- a/src/Parsers/obfuscateQueries.cpp +++ b/src/Parsers/obfuscateQueries.cpp @@ -42,6 +42,7 @@ const std::unordered_set keywords "<>", "=", "==", + "<=>", ">", ">=", "?", diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index d77ae8d3a27..18e91c533e0 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -359,11 +359,11 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "SELECT *\nFROM Customers\nORDER BY LastName DESC" }, { - "Customers | order by Age desc , FirstName asc ", + "Customers | order by Age desc, FirstName asc ", "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC" }, { - "Customers | order by Age asc , FirstName desc", + "Customers | order by Age asc, FirstName desc", "SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC" }, { diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 9f6c22f90f3..7cce495dfb8 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1047,7 +1047,7 @@ PlannerContextPtr buildPlannerContext(const QueryTreeNodePtr & query_tree_node, } Planner::Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_) + SelectQueryOptions & select_query_options_) : query_tree(query_tree_) , select_query_options(select_query_options_) , planner_context(buildPlannerContext(query_tree, select_query_options, std::make_shared())) @@ -1055,7 +1055,7 @@ Planner::Planner(const QueryTreeNodePtr & query_tree_, } Planner::Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_, + SelectQueryOptions & select_query_options_, GlobalPlannerContextPtr global_planner_context_) : query_tree(query_tree_) , select_query_options(select_query_options_) @@ -1064,7 +1064,7 @@ Planner::Planner(const QueryTreeNodePtr & query_tree_, } Planner::Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_, + SelectQueryOptions & select_query_options_, PlannerContextPtr planner_context_) : query_tree(query_tree_) , select_query_options(select_query_options_) diff --git a/src/Planner/Planner.h b/src/Planner/Planner.h index 783a07f6e99..f8d151365cf 100644 --- a/src/Planner/Planner.h +++ b/src/Planner/Planner.h @@ -22,16 +22,16 @@ class Planner public: /// Initialize planner with query tree after analysis phase Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_); + SelectQueryOptions & select_query_options_); /// Initialize planner with query tree after query analysis phase and global planner context Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_, + SelectQueryOptions & select_query_options_, GlobalPlannerContextPtr global_planner_context_); /// Initialize planner with query tree after query analysis phase and planner context Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_, + SelectQueryOptions & select_query_options_, PlannerContextPtr planner_context_); const QueryPlan & getQueryPlan() const @@ -66,7 +66,7 @@ private: void buildPlanForQueryNode(); QueryTreeNodePtr query_tree; - SelectQueryOptions select_query_options; + SelectQueryOptions & select_query_options; PlannerContextPtr planner_context; QueryPlan query_plan; StorageLimitsList storage_limits; diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 7575828e64d..9b6034cd0de 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -100,8 +100,9 @@ public: if (isNameOfInFunction(function_node.getFunctionName())) { + const auto & in_first_argument_node = function_node.getArguments().getNodes().at(0); const auto & in_second_argument_node = function_node.getArguments().getNodes().at(1); - in_function_second_argument_node_name = planner_context.createSetKey(in_second_argument_node); + in_function_second_argument_node_name = planner_context.createSetKey(in_first_argument_node->getResultType(), in_second_argument_node); } WriteBufferFromOwnString buffer; @@ -165,7 +166,7 @@ public: case QueryTreeNodeType::LAMBDA: { auto lambda_hash = node->getTreeHash(); - result = "__lambda_" + toString(lambda_hash.first) + '_' + toString(lambda_hash.second); + result = "__lambda_" + toString(lambda_hash); break; } default: @@ -628,8 +629,6 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma auto in_first_argument = function_node.getArguments().getNodes().at(0); auto in_second_argument = function_node.getArguments().getNodes().at(1); - //auto set_key = planner_context->createSetKey(in_second_argument); - DataTypes set_element_types; auto in_second_argument_node_type = in_second_argument->getNodeType(); @@ -665,7 +664,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma PreparedSets::toString(set_key, set_element_types)); ColumnWithTypeAndName column; - column.name = planner_context->createSetKey(in_second_argument); + column.name = planner_context->createSetKey(in_first_argument->getResultType(), in_second_argument); column.type = std::make_shared(); bool set_is_created = set->get() != nullptr; diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp index 3c75d4fbea8..0fde034b87a 100644 --- a/src/Planner/PlannerContext.cpp +++ b/src/Planner/PlannerContext.cpp @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -112,10 +113,24 @@ const ColumnIdentifier * PlannerContext::getColumnNodeIdentifierOrNull(const Que return table_expression_data->getColumnIdentifierOrNull(column_name); } -PlannerContext::SetKey PlannerContext::createSetKey(const QueryTreeNodePtr & set_source_node) +PlannerContext::SetKey PlannerContext::createSetKey(const DataTypePtr & left_operand_type, const QueryTreeNodePtr & set_source_node) { - auto set_source_hash = set_source_node->getTreeHash(); - return "__set_" + toString(set_source_hash.first) + '_' + toString(set_source_hash.second); + const auto set_source_hash = set_source_node->getTreeHash(); + if (set_source_node->as()) + { + /* We need to hash the type of the left operand because we can build different sets for different types. + * (It's done for performance reasons. It's cheaper to convert a small set of values from literal to the type of the left operand.) + * + * For example in expression `(a :: Decimal(9, 1) IN (1.0, 2.5)) AND (b :: Decimal(9, 0) IN (1, 2.5))` + * we need to build two different sets: + * - `{1, 2.5} :: Set(Decimal(9, 1))` for a + * - `{1} :: Set(Decimal(9, 0))` for b (2.5 omitted because bercause it's not representable as Decimal(9, 0)). + */ + return "__set_" + left_operand_type->getName() + '_' + toString(set_source_hash); + } + + /// For other cases we will cast left operand to the type of the set source, so no difference in types. + return "__set_" + toString(set_source_hash); } } diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index aceb313d4b5..44b24d09c5d 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -132,7 +132,7 @@ public: using SetKey = std::string; /// Create set key for set source node - static SetKey createSetKey(const QueryTreeNodePtr & set_source_node); + static SetKey createSetKey(const DataTypePtr & left_operand_type, const QueryTreeNodePtr & set_source_node); PreparedSets & getPreparedSets() { return prepared_sets; } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 5d8f8ca8741..c95671da6be 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -101,7 +101,7 @@ void checkAccessRights(const TableNode & table_node, const Names & column_names, } throw Exception(ErrorCodes::ACCESS_DENIED, - "{}: Not enough privileges. To execute this query it's necessary to have grant SELECT for at least one column on {}", + "{}: Not enough privileges. To execute this query, it's necessary to have the grant SELECT for at least one column on {}", query_context->getUserName(), storage_id.getFullTableName()); } @@ -113,6 +113,20 @@ void checkAccessRights(const TableNode & table_node, const Names & column_names, query_context->checkAccess(AccessType::SELECT, storage_id, column_names); } +bool shouldIgnoreQuotaAndLimits(const TableNode & table_node) +{ + const auto & storage_id = table_node.getStorageID(); + if (!storage_id.hasDatabase()) + return false; + if (storage_id.database_name == DatabaseCatalog::SYSTEM_DATABASE) + { + static const boost::container::flat_set tables_ignoring_quota{"quotas", "quota_limits", "quota_usage", "quotas_usage", "one"}; + if (tables_ignoring_quota.count(storage_id.table_name)) + return true; + } + return false; +} + NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage, const StorageSnapshotPtr & storage_snapshot) { /** We need to read at least one column to find the number of rows. @@ -172,7 +186,9 @@ NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage bool applyTrivialCountIfPossible( QueryPlan & query_plan, - const TableNode & table_node, + SelectQueryInfo & select_query_info, + const TableNode * table_node, + const TableFunctionNode * table_function_node, const QueryTreeNodePtr & query_tree, ContextMutablePtr & query_context, const Names & columns_names) @@ -181,7 +197,10 @@ bool applyTrivialCountIfPossible( if (!settings.optimize_trivial_count_query) return false; - const auto & storage = table_node.getStorage(); + const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); + if (!storage->supportsTrivialCountOptimization()) + return false; + auto storage_id = storage->getStorageID(); auto row_policy_filter = query_context->getRowPolicyFilter(storage_id.getDatabaseName(), storage_id.getTableName(), @@ -189,6 +208,9 @@ bool applyTrivialCountIfPossible( if (row_policy_filter) return {}; + if (select_query_info.additional_filter_ast) + return false; + /** Transaction check here is necessary because * MergeTree maintains total count for all parts in Active state and it simply returns that number for trivial select count() from table query. * But if we have current transaction, then we should return number of rows in current snapshot (that may include parts in Outdated state), @@ -199,9 +221,13 @@ bool applyTrivialCountIfPossible( return false; /// can't apply if FINAL - if (table_node.getTableExpressionModifiers().has_value() && - (table_node.getTableExpressionModifiers()->hasFinal() || table_node.getTableExpressionModifiers()->hasSampleSizeRatio() || - table_node.getTableExpressionModifiers()->hasSampleOffsetRatio())) + if (table_node && table_node->getTableExpressionModifiers().has_value() && + (table_node->getTableExpressionModifiers()->hasFinal() || table_node->getTableExpressionModifiers()->hasSampleSizeRatio() || + table_node->getTableExpressionModifiers()->hasSampleOffsetRatio())) + return false; + else if (table_function_node && table_function_node->getTableExpressionModifiers().has_value() && + (table_function_node->getTableExpressionModifiers()->hasFinal() || table_function_node->getTableExpressionModifiers()->hasSampleSizeRatio() || + table_function_node->getTableExpressionModifiers()->hasSampleOffsetRatio())) return false; // TODO: It's possible to optimize count() given only partition predicates @@ -226,6 +252,11 @@ bool applyTrivialCountIfPossible( if (!count_func) return false; + /// Some storages can optimize trivial count in read() method instead of totalRows() because it still can + /// require reading some data (but much faster than reading columns). + /// Set a special flag in query info so the storage will see it and optimize count in read() method. + select_query_info.optimize_trivial_count = true; + /// Get number of rows std::optional num_rows = storage->totalRows(settings); if (!num_rows) @@ -258,7 +289,8 @@ bool applyTrivialCountIfPossible( DataTypes argument_types; argument_types.reserve(columns_names.size()); { - const Block source_header = table_node.getStorageSnapshot()->getSampleBlockForColumns(columns_names); + const Block source_header = table_node ? table_node->getStorageSnapshot()->getSampleBlockForColumns(columns_names) + : table_function_node->getStorageSnapshot()->getSampleBlockForColumns(columns_names); for (const auto & column_name : columns_names) argument_types.push_back(source_header.getByName(column_name).type); } @@ -632,9 +664,10 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres /// Apply trivial_count optimization if possible bool is_trivial_count_applied = !select_query_options.only_analyze && is_single_table_expression && - table_node && + (table_node || table_function_node) && select_query_info.has_aggregates && - applyTrivialCountIfPossible(query_plan, *table_node, select_query_info.query_tree, planner_context->getMutableQueryContext(), table_expression_data.getColumnNames()); + settings.additional_table_filters.value.empty() && + applyTrivialCountIfPossible(query_plan, table_expression_query_info, table_node, table_function_node, select_query_info.query_tree, planner_context->getMutableQueryContext(), table_expression_data.getColumnNames()); if (is_trivial_count_applied) { @@ -761,6 +794,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres */ if (!query_plan.getMaxThreads() || is_remote) query_plan.setMaxThreads(max_threads_execute_query); + + query_plan.setConcurrencyControl(settings.use_concurrency_control); } else { @@ -825,8 +860,9 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } else { + SelectQueryOptions analyze_query_options = SelectQueryOptions(from_stage).analyze(); Planner planner(select_query_info.query_tree, - SelectQueryOptions(from_stage).analyze(), + analyze_query_options, select_query_info.planner_context); planner.buildQueryPlanIfNeeded(); @@ -1372,7 +1408,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node, const SelectQueryInfo & select_query_info, - const SelectQueryOptions & select_query_options, + SelectQueryOptions & select_query_options, const ColumnIdentifierSet & outer_scope_columns, PlannerContextPtr & planner_context) { @@ -1383,6 +1419,16 @@ JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node, std::vector table_expressions_outer_scope_columns(table_expressions_stack_size); ColumnIdentifierSet current_outer_scope_columns = outer_scope_columns; + if (is_single_table_expression) + { + auto * table_node = table_expressions_stack[0]->as(); + if (table_node && shouldIgnoreQuotaAndLimits(*table_node)) + { + select_query_options.ignore_quota = true; + select_query_options.ignore_limits = true; + } + } + /// For each table, table function, query, union table expressions prepare before query plan build for (size_t i = 0; i < table_expressions_stack_size; ++i) { diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h index acbc96ddae0..9d3b98175d0 100644 --- a/src/Planner/PlannerJoinTree.h +++ b/src/Planner/PlannerJoinTree.h @@ -20,7 +20,7 @@ struct JoinTreeQueryPlan /// Build JOIN TREE query plan for query node JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node, const SelectQueryInfo & select_query_info, - const SelectQueryOptions & select_query_options, + SelectQueryOptions & select_query_options, const ColumnIdentifierSet & outer_scope_columns, PlannerContextPtr & planner_context); diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index e495b0967e9..e87930a4d6b 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -635,6 +635,7 @@ std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_jo /// JOIN with JOIN engine. if (auto storage = table_join->getStorageJoin()) { + Names required_column_names; for (const auto & result_column : right_table_expression_header) { const auto * source_column_name = right_table_expression_data.getColumnNameOrNull(result_column.name); @@ -644,8 +645,9 @@ std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_jo fmt::join(storage->getKeyNames(), ", "), result_column.name); table_join->setRename(*source_column_name, result_column.name); + required_column_names.push_back(*source_column_name); } - return storage->getJoinLocked(table_join, planner_context->getQueryContext()); + return storage->getJoinLocked(table_join, planner_context->getQueryContext(), required_column_names); } /** JOIN with constant. diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 9ec5bb7adde..cd442085eca 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace DB @@ -13,7 +14,8 @@ namespace ErrorCodes extern const int POSITION_OUT_OF_BOUND; } -Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns_)), num_rows(num_rows_) +Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) + : columns(std::move(columns_)), num_rows(num_rows_) { checkNumRowsIsConsistent(); } @@ -221,4 +223,16 @@ void convertToFullIfSparse(Chunk & chunk) chunk.setColumns(std::move(columns), num_rows); } +Chunk cloneConstWithDefault(const Chunk & chunk, size_t num_rows) +{ + auto columns = chunk.cloneEmptyColumns(); + for (auto & column : columns) + { + column->insertDefault(); + column = ColumnConst::create(std::move(column), num_rows); + } + + return Chunk(std::move(columns), num_rows); +} + } diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index f50e45db644..9a7d6bc294d 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -156,4 +156,7 @@ private: void convertToFullIfConst(Chunk & chunk); void convertToFullIfSparse(Chunk & chunk); +/// Creates a chunk with the same columns but makes them constants with a default value and a specified number of rows. +Chunk cloneConstWithDefault(const Chunk & chunk, size_t num_rows); + } diff --git a/src/Processors/DelayedPortsProcessor.cpp b/src/Processors/DelayedPortsProcessor.cpp index 24023529bca..f3edc91b162 100644 --- a/src/Processors/DelayedPortsProcessor.cpp +++ b/src/Processors/DelayedPortsProcessor.cpp @@ -75,10 +75,13 @@ void DelayedPortsProcessor::finishPair(PortsPair & pair) pair.input_port->close(); pair.is_finished = true; - ++num_finished_pairs; + ++num_finished_inputs; if (pair.output_port) ++num_finished_outputs; + + if (!pair.is_delayed) + ++num_finished_main_inputs; } } @@ -112,9 +115,15 @@ bool DelayedPortsProcessor::processPair(PortsPair & pair) return true; } + +bool DelayedPortsProcessor::shouldSkipDelayed() const +{ + return num_finished_main_inputs + num_delayed_ports < port_pairs.size(); +} + IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs) { - bool skip_delayed = (num_finished_pairs + num_delayed_ports) < port_pairs.size(); + bool skip_delayed = shouldSkipDelayed(); bool need_data = false; if (!are_inputs_initialized && !updated_outputs.empty()) @@ -154,14 +163,14 @@ IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_in } /// In case if main streams are finished at current iteration, start processing delayed streams. - if (skip_delayed && (num_finished_pairs + num_delayed_ports) >= port_pairs.size()) + if (skip_delayed && !shouldSkipDelayed()) { for (auto & pair : port_pairs) if (pair.is_delayed) need_data = processPair(pair) || need_data; } - if (num_finished_pairs == port_pairs.size()) + if (num_finished_inputs == port_pairs.size()) return Status::Finished; if (need_data) diff --git a/src/Processors/DelayedPortsProcessor.h b/src/Processors/DelayedPortsProcessor.h index 3909d533914..667667bbb91 100644 --- a/src/Processors/DelayedPortsProcessor.h +++ b/src/Processors/DelayedPortsProcessor.h @@ -29,14 +29,16 @@ private: std::vector port_pairs; const size_t num_delayed_ports; - size_t num_finished_pairs = 0; + size_t num_finished_inputs = 0; size_t num_finished_outputs = 0; + size_t num_finished_main_inputs = 0; std::vector output_to_pair; bool are_inputs_initialized = false; bool processPair(PortsPair & pair); void finishPair(PortsPair & pair); + bool shouldSkipDelayed() const; }; } diff --git a/src/Processors/Executors/CompletedPipelineExecutor.cpp b/src/Processors/Executors/CompletedPipelineExecutor.cpp index b0f842dec1b..c30586e194e 100644 --- a/src/Processors/Executors/CompletedPipelineExecutor.cpp +++ b/src/Processors/Executors/CompletedPipelineExecutor.cpp @@ -32,7 +32,8 @@ struct CompletedPipelineExecutor::Data } }; -static void threadFunction(CompletedPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads) +static void threadFunction( + CompletedPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads, bool concurrency_control) { SCOPE_EXIT_SAFE( if (thread_group) @@ -45,7 +46,7 @@ static void threadFunction(CompletedPipelineExecutor::Data & data, ThreadGroupPt if (thread_group) CurrentThread::attachToGroup(thread_group); - data.executor->execute(num_threads); + data.executor->execute(num_threads, concurrency_control); } catch (...) { @@ -74,14 +75,18 @@ void CompletedPipelineExecutor::execute() if (interactive_timeout_ms) { data = std::make_unique(); - data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); data->executor->setReadProgressCallback(pipeline.getReadProgressCallback()); /// Avoid passing this to lambda, copy ptr to data instead. /// Destructor of unique_ptr copy raw ptr into local variable first, only then calls object destructor. - auto func = [data_ptr = data.get(), num_threads = pipeline.getNumThreads(), thread_group = CurrentThread::getGroup()] + auto func = [ + data_ptr = data.get(), + num_threads = pipeline.getNumThreads(), + thread_group = CurrentThread::getGroup(), + concurrency_control = pipeline.getConcurrencyControl()] { - threadFunction(*data_ptr, thread_group, num_threads); + threadFunction(*data_ptr, thread_group, num_threads, concurrency_control); }; data->thread = ThreadFromGlobalPool(std::move(func)); @@ -100,9 +105,9 @@ void CompletedPipelineExecutor::execute() } else { - PipelineExecutor executor(pipeline.processors, pipeline.process_list_element); + PipelineExecutor executor(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); executor.setReadProgressCallback(pipeline.getReadProgressCallback()); - executor.execute(pipeline.getNumThreads()); + executor.execute(pipeline.getNumThreads(), pipeline.getConcurrencyControl()); } } diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp index 27f6a454b24..6a946b4a4b9 100644 --- a/src/Processors/Executors/ExecutingGraph.cpp +++ b/src/Processors/Executors/ExecutingGraph.cpp @@ -260,7 +260,6 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue { pid = updated_processors.top(); updated_processors.pop(); - /// In this method we have ownership on node. auto & node = *nodes[pid]; diff --git a/src/Processors/Executors/ExecutionThreadContext.cpp b/src/Processors/Executors/ExecutionThreadContext.cpp index 794f478b272..0fa7e0b552f 100644 --- a/src/Processors/Executors/ExecutionThreadContext.cpp +++ b/src/Processors/Executors/ExecutionThreadContext.cpp @@ -56,6 +56,9 @@ static void executeJob(ExecutingGraph::Node * node, ReadProgressCallback * read_ if (read_progress->counters.total_rows_approx) read_progress_callback->addTotalRowsApprox(read_progress->counters.total_rows_approx); + if (read_progress->counters.total_bytes) + read_progress_callback->addTotalBytes(read_progress->counters.total_bytes); + if (!read_progress_callback->onProgress(read_progress->counters.read_rows, read_progress->counters.read_bytes, read_progress->limits)) node->processor->cancel(); } diff --git a/src/Processors/Executors/ExecutionThreadContext.h b/src/Processors/Executors/ExecutionThreadContext.h index eb048f8ab09..85788a70771 100644 --- a/src/Processors/Executors/ExecutionThreadContext.h +++ b/src/Processors/Executors/ExecutionThreadContext.h @@ -30,6 +30,12 @@ private: /// Callback for read progress. ReadProgressCallback * read_progress_callback = nullptr; + /// Timer that stops optimization of running local tasks instead of queuing them. + /// It provides local progress for each IProcessor task, allowing the partial result of the request to be always sended to the user. + Stopwatch watch; + /// Time period that limits the maximum allowed duration for optimizing the scheduling of local tasks within the executor + const UInt64 partial_result_duration_ms; + public: #ifndef NDEBUG /// Time for different processing stages. @@ -62,8 +68,13 @@ public: void setException(std::exception_ptr exception_) { exception = exception_; } void rethrowExceptionIfHas(); - explicit ExecutionThreadContext(size_t thread_number_, bool profile_processors_, bool trace_processors_, ReadProgressCallback * callback) + bool needWatchRestartForPartialResultProgress() { return partial_result_duration_ms != 0 && partial_result_duration_ms < watch.elapsedMilliseconds(); } + void restartWatch() { watch.restart(); } + + explicit ExecutionThreadContext(size_t thread_number_, bool profile_processors_, bool trace_processors_, ReadProgressCallback * callback, UInt64 partial_result_duration_ms_) : read_progress_callback(callback) + , watch(CLOCK_MONOTONIC) + , partial_result_duration_ms(partial_result_duration_ms_) , thread_number(thread_number_) , profile_processors(profile_processors_) , trace_processors(trace_processors_) diff --git a/src/Processors/Executors/ExecutorTasks.cpp b/src/Processors/Executors/ExecutorTasks.cpp index e61d225a968..08920592391 100644 --- a/src/Processors/Executors/ExecutorTasks.cpp +++ b/src/Processors/Executors/ExecutorTasks.cpp @@ -108,8 +108,15 @@ void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThrea { context.setTask(nullptr); - /// Take local task from queue if has one. - if (!queue.empty() && !context.hasAsyncTasks()) + /// If sending partial results is allowed and local tasks scheduling optimization is repeated longer than the limit + /// or new task need to send partial result later, skip optimization for this iteration. + /// Otherwise take local task from queue if has one. + if ((!queue.empty() && queue.front()->processor->isPartialResultProcessor()) + || context.needWatchRestartForPartialResultProgress()) + { + context.restartWatch(); + } + else if (!queue.empty() && !context.hasAsyncTasks()) { context.setTask(queue.front()); queue.pop(); @@ -139,7 +146,7 @@ void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThrea } } -void ExecutorTasks::init(size_t num_threads_, size_t use_threads_, bool profile_processors, bool trace_processors, ReadProgressCallback * callback) +void ExecutorTasks::init(size_t num_threads_, size_t use_threads_, bool profile_processors, bool trace_processors, ReadProgressCallback * callback, UInt64 partial_result_duration_ms) { num_threads = num_threads_; use_threads = use_threads_; @@ -151,7 +158,7 @@ void ExecutorTasks::init(size_t num_threads_, size_t use_threads_, bool profile_ executor_contexts.reserve(num_threads); for (size_t i = 0; i < num_threads; ++i) - executor_contexts.emplace_back(std::make_unique(i, profile_processors, trace_processors, callback)); + executor_contexts.emplace_back(std::make_unique(i, profile_processors, trace_processors, callback, partial_result_duration_ms)); } } diff --git a/src/Processors/Executors/ExecutorTasks.h b/src/Processors/Executors/ExecutorTasks.h index d35f8de94d1..ab6d5e91411 100644 --- a/src/Processors/Executors/ExecutorTasks.h +++ b/src/Processors/Executors/ExecutorTasks.h @@ -58,7 +58,7 @@ public: void tryGetTask(ExecutionThreadContext & context); void pushTasks(Queue & queue, Queue & async_queue, ExecutionThreadContext & context); - void init(size_t num_threads_, size_t use_threads_, bool profile_processors, bool trace_processors, ReadProgressCallback * callback); + void init(size_t num_threads_, size_t use_threads_, bool profile_processors, bool trace_processors, ReadProgressCallback * callback, UInt64 partial_result_duration_ms); void fill(Queue & queue); void upscale(size_t use_threads_); diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index f523e7b7cf9..77779e2cec2 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -3,16 +3,13 @@ #include #include #include -#include #include #include #include #include #include #include -#include #include -#include #include #include @@ -36,8 +33,9 @@ namespace ErrorCodes } -PipelineExecutor::PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem) +PipelineExecutor::PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem, UInt64 partial_result_duration_ms_) : process_list_element(std::move(elem)) + , partial_result_duration_ms(partial_result_duration_ms_) { if (process_list_element) { @@ -99,7 +97,7 @@ void PipelineExecutor::finish() tasks.finish(); } -void PipelineExecutor::execute(size_t num_threads) +void PipelineExecutor::execute(size_t num_threads, bool concurrency_control) { checkTimeLimit(); if (num_threads < 1) @@ -110,7 +108,7 @@ void PipelineExecutor::execute(size_t num_threads) try { - executeImpl(num_threads); + executeImpl(num_threads, concurrency_control); /// Execution can be stopped because of exception. Check and rethrow if any. for (auto & node : graph->nodes) @@ -137,12 +135,11 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) { if (!is_execution_initialized) { - initializeExecution(1); + initializeExecution(1, true); // Acquire slot until we are done single_thread_slot = slots->tryAcquire(); - if (!single_thread_slot) - abort(); // Unable to allocate slot for the first thread, but we just allocated at least one slot + chassert(single_thread_slot && "Unable to allocate slot for the first thread, but we just allocated at least one slot"); if (yield_flag && *yield_flag) return true; @@ -210,6 +207,27 @@ void PipelineExecutor::finalizeExecution() all_processors_finished = false; break; } + else if (node->processor && read_progress_callback) + { + /// Some executors might have reported progress as part of their finish() call + /// For example, when reading from parallel replicas the coordinator will cancel the queries as soon as it + /// enough data (on LIMIT), but as the progress report is asynchronous it might not be reported until the + /// connection is cancelled and all packets drained + /// To cover these cases we check if there is any pending progress in the processors to report + if (auto read_progress = node->processor->getReadProgress()) + { + if (read_progress->counters.total_rows_approx) + read_progress_callback->addTotalRowsApprox(read_progress->counters.total_rows_approx); + + if (read_progress->counters.total_bytes) + read_progress_callback->addTotalBytes(read_progress->counters.total_bytes); + + /// We are finalizing the execution, so no need to call onProgress if there is nothing to report + if (read_progress->counters.read_rows || read_progress->counters.read_bytes) + read_progress_callback->onProgress( + read_progress->counters.read_rows, read_progress->counters.read_bytes, read_progress->limits); + } + } } if (!all_processors_finished) @@ -272,7 +290,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie /// Prepare processor after execution. if (!graph->updateNode(context.getProcessorID(), queue, async_queue)) - finish(); + cancel(); /// Push other tasks to global queue. tasks.pushTasks(queue, async_queue, context); @@ -297,19 +315,21 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie #endif } -void PipelineExecutor::initializeExecution(size_t num_threads) +void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_control) { is_execution_initialized = true; + size_t use_threads = num_threads; + /// Allocate CPU slots from concurrency control - constexpr size_t min_threads = 1; + size_t min_threads = concurrency_control ? 1uz : num_threads; slots = ConcurrencyControl::instance().allocate(min_threads, num_threads); - size_t use_threads = slots->grantedCount(); + use_threads = slots->grantedCount(); Queue queue; graph->initializeExecution(queue); - tasks.init(num_threads, use_threads, profile_processors, trace_processors, read_progress_callback.get()); + tasks.init(num_threads, use_threads, profile_processors, trace_processors, read_progress_callback.get(), partial_result_duration_ms); tasks.fill(queue); if (num_threads > 1) @@ -320,7 +340,7 @@ void PipelineExecutor::spawnThreads() { while (auto slot = slots->tryAcquire()) { - size_t thread_num = threads++; + size_t thread_num = threads.fetch_add(1); /// Count of threads in use should be updated for proper finish() condition. /// NOTE: this will not decrease `use_threads` below initially granted count @@ -352,9 +372,9 @@ void PipelineExecutor::spawnThreads() } } -void PipelineExecutor::executeImpl(size_t num_threads) +void PipelineExecutor::executeImpl(size_t num_threads, bool concurrency_control) { - initializeExecution(num_threads); + initializeExecution(num_threads, concurrency_control); bool finished_flag = false; diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 1e7d52d8290..6cb0e6c4ac1 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -33,12 +33,12 @@ public: /// During pipeline execution new processors can appear. They will be added to existing set. /// /// Explicit graph representation is built in constructor. Throws if graph is not correct. - explicit PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem); + explicit PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem, UInt64 partial_result_duration_ms_ = 0); ~PipelineExecutor(); /// Execute pipeline in multiple threads. Must be called once. /// In case of exception during execution throws any occurred. - void execute(size_t num_threads); + void execute(size_t num_threads, bool concurrency_control); /// Execute single step. Step will be stopped when yield_flag is true. /// Execution is happened in a single thread. @@ -67,7 +67,7 @@ private: ExecutorTasks tasks; - // Concurrency control related + /// Concurrency control related ConcurrencyControl::AllocationPtr slots; ConcurrencyControl::SlotPtr single_thread_slot; // slot for single-thread mode to work using executeStep() std::unique_ptr pool; @@ -90,14 +90,17 @@ private: ReadProgressCallbackPtr read_progress_callback; + /// Duration between sending partial result through the pipeline + const UInt64 partial_result_duration_ms; + using Queue = std::queue; - void initializeExecution(size_t num_threads); /// Initialize executor contexts and task_queue. + void initializeExecution(size_t num_threads, bool concurrency_control); /// Initialize executor contexts and task_queue. void finalizeExecution(); /// Check all processors are finished. void spawnThreads(); /// Methods connected to execution. - void executeImpl(size_t num_threads); + void executeImpl(size_t num_threads, bool concurrency_control); void executeStepImpl(size_t thread_num, std::atomic_bool * yield_flag = nullptr); void executeSingleThread(size_t thread_num); void finish(); diff --git a/src/Processors/Executors/PollingQueue.cpp b/src/Processors/Executors/PollingQueue.cpp index 781a7736336..40f968621b1 100644 --- a/src/Processors/Executors/PollingQueue.cpp +++ b/src/Processors/Executors/PollingQueue.cpp @@ -74,7 +74,7 @@ PollingQueue::TaskData PollingQueue::wait(std::unique_lock & lock) epoll_event event; event.data.ptr = nullptr; - epoll.getManyReady(1, &event, true); + epoll.getManyReady(1, &event, -1); lock.lock(); diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index b2608f665b7..95a2022bf93 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -41,12 +41,13 @@ struct PullingAsyncPipelineExecutor::Data } }; -PullingAsyncPipelineExecutor::PullingAsyncPipelineExecutor(QueryPipeline & pipeline_) : pipeline(pipeline_) +PullingAsyncPipelineExecutor::PullingAsyncPipelineExecutor(QueryPipeline & pipeline_, bool has_partial_result_setting) : pipeline(pipeline_) { if (!pipeline.pulling()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for PullingAsyncPipelineExecutor must be pulling"); - lazy_format = std::make_shared(pipeline.output->getHeader()); + lazy_format = std::make_shared(pipeline.output->getHeader(), /*is_partial_result_protocol_active*/ has_partial_result_setting); + pipeline.complete(lazy_format); } @@ -67,7 +68,8 @@ const Block & PullingAsyncPipelineExecutor::getHeader() const return lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader(); } -static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads) +static void threadFunction( + PullingAsyncPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads, bool concurrency_control) { SCOPE_EXIT_SAFE( if (thread_group) @@ -80,7 +82,7 @@ static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGrou if (thread_group) CurrentThread::attachToGroup(thread_group); - data.executor->execute(num_threads); + data.executor->execute(num_threads, concurrency_control); } catch (...) { @@ -102,13 +104,13 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) if (!data) { data = std::make_unique(); - data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); data->executor->setReadProgressCallback(pipeline.getReadProgressCallback()); data->lazy_format = lazy_format.get(); auto func = [&, thread_group = CurrentThread::getGroup()]() { - threadFunction(*data, thread_group, pipeline.getNumThreads()); + threadFunction(*data, thread_group, pipeline.getNumThreads(), pipeline.getConcurrencyControl()); }; data->thread = ThreadFromGlobalPool(std::move(func)); diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.h b/src/Processors/Executors/PullingAsyncPipelineExecutor.h index 361bcc0155c..202ecbf281b 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.h +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.h @@ -21,7 +21,7 @@ struct ProfileInfo; class PullingAsyncPipelineExecutor { public: - explicit PullingAsyncPipelineExecutor(QueryPipeline & pipeline_); + explicit PullingAsyncPipelineExecutor(QueryPipeline & pipeline_, bool has_partial_result_setting = false); ~PullingAsyncPipelineExecutor(); /// Get structure of returned block or chunk. diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index cbf73c5cb07..f79f15c19bf 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -44,7 +44,7 @@ bool PullingPipelineExecutor::pull(Chunk & chunk) { if (!executor) { - executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); executor->setReadProgressCallback(pipeline.getReadProgressCallback()); } diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp index 59d33cbffed..f3ed24e7e96 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -98,7 +98,8 @@ struct PushingAsyncPipelineExecutor::Data } }; -static void threadFunction(PushingAsyncPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads) +static void threadFunction( + PushingAsyncPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads, bool concurrency_control) { SCOPE_EXIT_SAFE( if (thread_group) @@ -111,7 +112,7 @@ static void threadFunction(PushingAsyncPipelineExecutor::Data & data, ThreadGrou if (thread_group) CurrentThread::attachToGroup(thread_group); - data.executor->execute(num_threads); + data.executor->execute(num_threads, concurrency_control); } catch (...) { @@ -166,13 +167,13 @@ void PushingAsyncPipelineExecutor::start() started = true; data = std::make_unique(); - data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); data->executor->setReadProgressCallback(pipeline.getReadProgressCallback()); data->source = pushing_source.get(); auto func = [&, thread_group = CurrentThread::getGroup()]() { - threadFunction(*data, thread_group, pipeline.getNumThreads()); + threadFunction(*data, thread_group, pipeline.getNumThreads(), pipeline.getConcurrencyControl()); }; data->thread = ThreadFromGlobalPool(std::move(func)); diff --git a/src/Processors/Executors/PushingPipelineExecutor.cpp b/src/Processors/Executors/PushingPipelineExecutor.cpp index 696932932df..f2b018792c7 100644 --- a/src/Processors/Executors/PushingPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingPipelineExecutor.cpp @@ -87,7 +87,7 @@ void PushingPipelineExecutor::start() return; started = true; - executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); executor->setReadProgressCallback(pipeline.getReadProgressCallback()); if (!executor->executeStep(&input_wait_flag)) diff --git a/src/Processors/Executors/StreamingFormatExecutor.cpp b/src/Processors/Executors/StreamingFormatExecutor.cpp index 2223721439e..46818989032 100644 --- a/src/Processors/Executors/StreamingFormatExecutor.cpp +++ b/src/Processors/Executors/StreamingFormatExecutor.cpp @@ -1,6 +1,5 @@ #include #include -#include namespace DB { diff --git a/src/Processors/Formats/IInputFormat.cpp b/src/Processors/Formats/IInputFormat.cpp index 20f87d60e88..a87db5a0d4d 100644 --- a/src/Processors/Formats/IInputFormat.cpp +++ b/src/Processors/Formats/IInputFormat.cpp @@ -28,4 +28,10 @@ void IInputFormat::setReadBuffer(ReadBuffer & in_) in = &in_; } +Chunk IInputFormat::getChunkForCount(size_t rows) +{ + const auto & header = getPort().getHeader(); + return cloneConstWithDefault(Chunk{header.getColumns(), 0}, rows); +} + } diff --git a/src/Processors/Formats/IInputFormat.h b/src/Processors/Formats/IInputFormat.h index 86f892b630d..c7b8d97d145 100644 --- a/src/Processors/Formats/IInputFormat.h +++ b/src/Processors/Formats/IInputFormat.h @@ -10,6 +10,8 @@ namespace DB { +struct SelectQueryInfo; + using ColumnMappingPtr = std::shared_ptr; /** Input format is a source, that reads data from ReadBuffer. @@ -21,9 +23,13 @@ protected: ReadBuffer * in [[maybe_unused]] = nullptr; public: - // ReadBuffer can be nullptr for random-access formats. + /// ReadBuffer can be nullptr for random-access formats. IInputFormat(Block header, ReadBuffer * in_); + /// If the format is used by a SELECT query, this method may be called. + /// The format may use it for filter pushdown. + virtual void setQueryInfo(const SelectQueryInfo &, ContextPtr) {} + /** In some usecase (hello Kafka) we need to read a lot of tiny streams in exactly the same format. * The recreating of parser for each small stream takes too long, so we introduce a method * resetParser() which allow to reset the state of parser to continue reading of @@ -55,11 +61,17 @@ public: virtual size_t getApproxBytesReadForChunk() const { return 0; } + void needOnlyCount() { need_only_count = true; } + protected: + virtual Chunk getChunkForCount(size_t rows); + ColumnMappingPtr column_mapping{}; InputFormatErrorsLoggerPtr errors_logger; + bool need_only_count = false; + private: /// Number of currently parsed chunk (if parallel parsing is enabled) size_t current_unit_number = 0; diff --git a/src/Processors/Formats/IOutputFormat.cpp b/src/Processors/Formats/IOutputFormat.cpp index 88a6fb1e92f..e691e32a7bc 100644 --- a/src/Processors/Formats/IOutputFormat.cpp +++ b/src/Processors/Formats/IOutputFormat.cpp @@ -1,40 +1,89 @@ #include #include +#include namespace DB { -IOutputFormat::IOutputFormat(const Block & header_, WriteBuffer & out_) - : IProcessor({header_, header_, header_}, {}), out(out_) +IOutputFormat::IOutputFormat(const Block & header_, WriteBuffer & out_, bool is_partial_result_protocol_active_) + : IProcessor({header_, header_, header_, header_}, {}) + , out(out_) + , is_partial_result_protocol_active(is_partial_result_protocol_active_) { } +void IOutputFormat::setCurrentChunk(InputPort & input, PortKind kind) +{ + current_chunk = input.pull(true); + current_block_kind = kind; + has_input = true; +} + +IOutputFormat::Status IOutputFormat::prepareMainAndPartialResult() +{ + bool need_data = false; + for (auto kind : {Main, PartialResult}) + { + auto & input = getPort(kind); + + if (input.isFinished()) + continue; + + if (kind == PartialResult && main_input_activated) + { + input.close(); + continue; + } + + input.setNeeded(); + need_data = true; + + if (!input.hasData()) + continue; + + setCurrentChunk(input, kind); + return Status::Ready; + } + + if (need_data) + return Status::NeedData; + + return Status::Finished; +} + +IOutputFormat::Status IOutputFormat::prepareTotalsAndExtremes() +{ + for (auto kind : {Totals, Extremes}) + { + auto & input = getPort(kind); + + if (!input.isConnected() || input.isFinished()) + continue; + + input.setNeeded(); + if (!input.hasData()) + return Status::NeedData; + + setCurrentChunk(input, kind); + return Status::Ready; + } + + return Status::Finished; +} + IOutputFormat::Status IOutputFormat::prepare() { if (has_input) return Status::Ready; - for (auto kind : {Main, Totals, Extremes}) - { - auto & input = getPort(kind); + auto status = prepareMainAndPartialResult(); + if (status != Status::Finished) + return status; - if (kind != Main && !input.isConnected()) - continue; - - if (input.isFinished()) - continue; - - input.setNeeded(); - - if (!input.hasData()) - return Status::NeedData; - - current_chunk = input.pull(true); - current_block_kind = kind; - has_input = true; - return Status::Ready; - } + status = prepareTotalsAndExtremes(); + if (status != Status::Finished) + return status; finished = true; @@ -83,8 +132,18 @@ void IOutputFormat::work() case Main: result_rows += current_chunk.getNumRows(); result_bytes += current_chunk.allocatedBytes(); + if (is_partial_result_protocol_active && !main_input_activated && current_chunk.hasRows()) + { + /// Sending an empty block signals to the client that partial results are terminated, + /// and only data from the main pipeline will be forwarded. + consume(Chunk(current_chunk.cloneEmptyColumns(), 0)); + main_input_activated = true; + } consume(std::move(current_chunk)); break; + case PartialResult: + consumePartialResult(std::move(current_chunk)); + break; case Totals: writeSuffixIfNeeded(); if (auto totals = prepareTotals(std::move(current_chunk))) @@ -119,6 +178,15 @@ void IOutputFormat::write(const Block & block) flush(); } +void IOutputFormat::writePartialResult(const Block & block) +{ + writePrefixIfNeeded(); + consumePartialResult(Chunk(block.getColumns(), block.rows())); + + if (auto_flush) + flush(); +} + void IOutputFormat::finalize() { if (finalized) diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 58700a978ff..470d24e9a22 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -23,9 +23,9 @@ class WriteBuffer; class IOutputFormat : public IProcessor { public: - enum PortKind { Main = 0, Totals = 1, Extremes = 2 }; + enum PortKind { Main = 0, Totals = 1, Extremes = 2, PartialResult = 3 }; - IOutputFormat(const Block & header_, WriteBuffer & out_); + IOutputFormat(const Block & header_, WriteBuffer & out_, bool is_partial_result_protocol_active_ = false); Status prepare() override; void work() override; @@ -54,6 +54,7 @@ public: /// TODO: separate formats and processors. void write(const Block & block); + void writePartialResult(const Block & block); void finalize(); @@ -118,6 +119,7 @@ protected: virtual void consume(Chunk) = 0; virtual void consumeTotals(Chunk) {} virtual void consumeExtremes(Chunk) {} + virtual void consumePartialResult(Chunk) {} virtual void finalizeImpl() {} virtual void finalizeBuffers() {} virtual void writePrefix() {} @@ -166,6 +168,7 @@ protected: Chunk current_chunk; PortKind current_block_kind = PortKind::Main; + bool main_input_activated = false; bool has_input = false; bool finished = false; bool finalized = false; @@ -180,9 +183,15 @@ protected: Statistics statistics; private: + void setCurrentChunk(InputPort & input, PortKind kind); + IOutputFormat::Status prepareMainAndPartialResult(); + IOutputFormat::Status prepareTotalsAndExtremes(); + size_t rows_read_before = 0; bool are_totals_written = false; + bool is_partial_result_protocol_active = false; + /// Counters for consumed chunks. Are used for QueryLog. size_t result_rows = 0; size_t result_bytes = 0; diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index 0728aecf61f..2ce1e48b2c5 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -86,7 +86,21 @@ void IRowInputFormat::logError() Chunk IRowInputFormat::generate() { if (total_rows == 0) - readPrefix(); + { + try + { + readPrefix(); + } + catch (Exception & e) + { + auto file_name = getFileNameFromReadBuffer(getReadBuffer()); + if (!file_name.empty()) + e.addMessage(fmt::format("(in file/uri {})", file_name)); + + e.addMessage("(while reading header)"); + throw; + } + } const Block & header = getPort().getHeader(); @@ -97,9 +111,21 @@ Chunk IRowInputFormat::generate() size_t num_rows = 0; size_t chunk_start_offset = getDataOffsetMaybeCompressed(getReadBuffer()); - try { + if (need_only_count && supportsCountRows()) + { + num_rows = countRows(params.max_block_size); + if (num_rows == 0) + { + readSuffix(); + return {}; + } + total_rows += num_rows; + approx_bytes_read_for_chunk = getDataOffsetMaybeCompressed(getReadBuffer()) - chunk_start_offset; + return getChunkForCount(num_rows); + } + RowReadExtension info; bool continue_reading = true; for (size_t rows = 0; rows < params.max_block_size && continue_reading; ++rows) @@ -249,7 +275,7 @@ Chunk IRowInputFormat::generate() void IRowInputFormat::syncAfterError() { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method syncAfterError is not implemented for input format"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method syncAfterError is not implemented for input format {}", getName()); } void IRowInputFormat::resetParser() @@ -260,5 +286,10 @@ void IRowInputFormat::resetParser() block_missing_values.clear(); } +size_t IRowInputFormat::countRows(size_t) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method countRows is not implemented for input format {}", getName()); +} + } diff --git a/src/Processors/Formats/IRowInputFormat.h b/src/Processors/Formats/IRowInputFormat.h index b7b1b0b29a6..1b48647a224 100644 --- a/src/Processors/Formats/IRowInputFormat.h +++ b/src/Processors/Formats/IRowInputFormat.h @@ -52,6 +52,13 @@ protected: */ virtual bool readRow(MutableColumns & columns, RowReadExtension & extra) = 0; + /// Count some rows. Called in a loop until it returns 0, and the return values are added up. + /// `max_block_size` is the recommended number of rows after which to stop, if the implementation + /// involves scanning the data. If the implementation just takes the count from metadata, + /// `max_block_size` can be ignored. + virtual size_t countRows(size_t max_block_size); + virtual bool supportsCountRows() const { return false; } + virtual void readPrefix() {} /// delimiter before begin of result virtual void readSuffix() {} /// delimiter after end of result @@ -85,7 +92,7 @@ private: size_t num_errors = 0; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; }; } diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 1fa520eaaee..15b53c2a499 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -115,21 +115,24 @@ NamesAndTypesList IRowSchemaReader::readSchema() "Cannot read rows to determine the schema, the maximum number of rows (or bytes) to read is set to 0. " "Most likely setting input_format_max_rows_to_read_for_schema_inference or input_format_max_bytes_to_read_for_schema_inference is set to 0"); - DataTypes data_types = readRowAndGetDataTypes(); + auto data_types_maybe = readRowAndGetDataTypes(); /// Check that we read at list one column. - if (data_types.empty()) + if (!data_types_maybe) throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Cannot read rows from the data"); + DataTypes data_types = std::move(*data_types_maybe); + /// If column names weren't set, use default names 'c1', 'c2', ... - if (column_names.empty()) + bool use_default_column_names = column_names.empty(); + if (use_default_column_names) { column_names.reserve(data_types.size()); for (size_t i = 0; i != data_types.size(); ++i) column_names.push_back("c" + std::to_string(i + 1)); } /// If column names were set, check that the number of names match the number of types. - else if (column_names.size() != data_types.size()) + else if (column_names.size() != data_types.size() && !allowVariableNumberOfColumns()) { throw Exception( ErrorCodes::INCORRECT_DATA, @@ -137,6 +140,9 @@ NamesAndTypesList IRowSchemaReader::readSchema() } else { + if (column_names.size() != data_types.size()) + data_types.resize(column_names.size()); + std::unordered_set names_set; for (const auto & name : column_names) { @@ -155,13 +161,39 @@ NamesAndTypesList IRowSchemaReader::readSchema() for (rows_read = 1; rows_read < max_rows_to_read && in.count() < max_bytes_to_read; ++rows_read) { - DataTypes new_data_types = readRowAndGetDataTypes(); - if (new_data_types.empty()) + auto new_data_types_maybe = readRowAndGetDataTypes(); + if (!new_data_types_maybe) /// We reached eof. break; + DataTypes new_data_types = std::move(*new_data_types_maybe); + if (new_data_types.size() != data_types.size()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values"); + { + if (!allowVariableNumberOfColumns()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values"); + + if (use_default_column_names) + { + /// Current row contains new columns, add new default names. + if (new_data_types.size() > data_types.size()) + { + for (size_t i = data_types.size(); i < new_data_types.size(); ++i) + column_names.push_back("c" + std::to_string(i + 1)); + data_types.resize(new_data_types.size()); + } + /// Current row contain less columns than previous rows. + else + { + new_data_types.resize(data_types.size()); + } + } + /// If names were explicitly set, ignore all extra columns. + else + { + new_data_types.resize(column_names.size()); + } + } for (field_index = 0; field_index != data_types.size(); ++field_index) { diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index 40702198a57..e6402ac0249 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -25,6 +25,10 @@ public: virtual NamesAndTypesList readSchema() = 0; + /// Some formats like Parquet contains number of rows in metadata + /// and we can read it once during schema inference and reuse it later for fast count; + virtual std::optional readNumberOrRows() { return std::nullopt; } + /// True if order of columns is important in format. /// Exceptions: JSON, TSKV. virtual bool hasStrictOrderOfColumns() const { return true; } @@ -93,11 +97,13 @@ protected: /// Read one row and determine types of columns in it. /// Return types in the same order in which the values were in the row. /// If it's impossible to determine the type for some column, return nullptr for it. - /// Return empty list if can't read more data. - virtual DataTypes readRowAndGetDataTypes() = 0; + /// Return std::nullopt if can't read more data. + virtual std::optional readRowAndGetDataTypes() = 0; void setColumnNames(const std::vector & names) { column_names = names; } + virtual bool allowVariableNumberOfColumns() const { return false; } + size_t field_index; private: diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 2fadc09e80f..f92406488d8 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -45,6 +45,9 @@ Chunk ArrowBlockInputFormat::generate() batch_result = stream_reader->Next(); if (batch_result.ok() && !(*batch_result)) return res; + + if (need_only_count && batch_result.ok()) + return getChunkForCount((*batch_result)->num_rows()); } else { @@ -57,6 +60,15 @@ Chunk ArrowBlockInputFormat::generate() if (record_batch_current >= record_batch_total) return res; + if (need_only_count) + { + auto rows = file_reader->RecordBatchCountRows(record_batch_current++); + if (!rows.ok()) + throw ParsingException( + ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading batch of Arrow data: {}", rows.status().ToString()); + return getChunkForCount(*rows); + } + batch_result = file_reader->ReadRecordBatch(record_batch_current); } @@ -143,7 +155,6 @@ void ArrowBlockInputFormat::prepareReader() arrow_column_to_ch_column = std::make_unique( getPort().getHeader(), "Arrow", - format_settings.arrow.import_nested, format_settings.arrow.allow_missing_columns, format_settings.null_as_default, format_settings.arrow.case_insensitive_column_matching); @@ -161,23 +172,49 @@ ArrowSchemaReader::ArrowSchemaReader(ReadBuffer & in_, bool stream_, const Forma { } -NamesAndTypesList ArrowSchemaReader::readSchema() +void ArrowSchemaReader::initializeIfNeeded() { - std::shared_ptr schema; + if (file_reader || stream_reader) + return; if (stream) - schema = createStreamReader(in)->schema(); + stream_reader = createStreamReader(in); else { std::atomic is_stopped = 0; - schema = createFileReader(in, format_settings, is_stopped)->schema(); + file_reader = createFileReader(in, format_settings, is_stopped); } +} + +NamesAndTypesList ArrowSchemaReader::readSchema() +{ + initializeIfNeeded(); + + std::shared_ptr schema; + + if (stream) + schema = stream_reader->schema(); + else + schema = file_reader->schema(); auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( *schema, stream ? "ArrowStream" : "Arrow", format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference); if (format_settings.schema_inference_make_columns_nullable) return getNamesAndRecursivelyNullableTypes(header); - return header.getNamesAndTypesList();} + return header.getNamesAndTypesList(); +} + +std::optional ArrowSchemaReader::readNumberOrRows() +{ + if (stream) + return std::nullopt; + + auto rows = file_reader->CountRows(); + if (!rows.ok()) + throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading batch of Arrow data: {}", rows.status().ToString()); + + return *rows; +} void registerInputFormatArrow(FormatFactory & factory) { @@ -190,7 +227,6 @@ void registerInputFormatArrow(FormatFactory & factory) { return std::make_shared(buf, sample, false, format_settings); }); - factory.markFormatSupportsSubcolumns("Arrow"); factory.markFormatSupportsSubsetOfColumns("Arrow"); factory.registerInputFormat( "ArrowStream", diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h index df77994c3d5..06a7b470312 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h @@ -50,7 +50,7 @@ private: int record_batch_current = 0; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; const FormatSettings format_settings; @@ -66,9 +66,15 @@ public: NamesAndTypesList readSchema() override; + std::optional readNumberOrRows() override; + private: + void initializeIfNeeded(); + bool stream; const FormatSettings format_settings; + std::shared_ptr stream_reader; + std::shared_ptr file_reader; }; } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 5a7306111a5..76f39b07a05 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -719,7 +719,8 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( /// ORC doesn't support Decimal256 as separate type. We read and write it as binary data. case TypeIndex::Decimal256: return readColumnWithBigNumberFromBinaryData>(arrow_column, column_name, type_hint); - default:; + default: + break; } } return readColumnWithStringData(arrow_column, column_name); @@ -738,7 +739,8 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( return readColumnWithBigIntegerFromFixedBinaryData(arrow_column, column_name, type_hint); case TypeIndex::UInt256: return readColumnWithBigIntegerFromFixedBinaryData(arrow_column, column_name, type_hint); - default:; + default: + break; } } @@ -1032,13 +1034,11 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( ArrowColumnToCHColumn::ArrowColumnToCHColumn( const Block & header_, const std::string & format_name_, - bool import_nested_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_) : header(header_) , format_name(format_name_) - , import_nested(import_nested_) , allow_missing_columns(allow_missing_columns_) , null_as_default(null_as_default_) , case_insensitive_matching(case_insensitive_matching_) @@ -1080,42 +1080,40 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & if (!name_to_column_ptr.contains(search_column_name)) { bool read_from_nested = false; - /// Check if it's a column from nested table. - if (import_nested) + /// Check if it's a subcolumn from some struct. + String nested_table_name = Nested::extractTableName(header_column.name); + String search_nested_table_name = nested_table_name; + if (case_insensitive_matching) + boost::to_lower(search_nested_table_name); + if (name_to_column_ptr.contains(search_nested_table_name)) { - String nested_table_name = Nested::extractTableName(header_column.name); - String search_nested_table_name = nested_table_name; - if (case_insensitive_matching) - boost::to_lower(search_nested_table_name); - if (name_to_column_ptr.contains(search_nested_table_name)) + if (!nested_tables.contains(search_nested_table_name)) { - if (!nested_tables.contains(search_nested_table_name)) + NamesAndTypesList nested_columns; + for (const auto & name_and_type : header.getNamesAndTypesList()) { - NamesAndTypesList nested_columns; - for (const auto & name_and_type : header.getNamesAndTypesList()) - { - if (name_and_type.name.starts_with(nested_table_name + ".")) - nested_columns.push_back(name_and_type); - } - auto nested_table_type = Nested::collect(nested_columns).front().type; + if (name_and_type.name.starts_with(nested_table_name + ".")) + nested_columns.push_back(name_and_type); + } + auto nested_table_type = Nested::collect(nested_columns).front().type; - std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; - ColumnsWithTypeAndName cols = {readColumnFromArrowColumn( - arrow_column, nested_table_name, format_name, false, dictionary_infos, true, false, skipped, nested_table_type)}; - BlockPtr block_ptr = std::make_shared(cols); - auto column_extractor = std::make_shared(*block_ptr, case_insensitive_matching); - nested_tables[search_nested_table_name] = {block_ptr, column_extractor}; - } - auto nested_column = nested_tables[search_nested_table_name].second->extractColumn(search_column_name); - if (nested_column) - { - column = *nested_column; - if (case_insensitive_matching) - column.name = header_column.name; - read_from_nested = true; - } + std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; + ColumnsWithTypeAndName cols = {readColumnFromArrowColumn( + arrow_column, nested_table_name, format_name, false, dictionary_infos, true, false, skipped, nested_table_type)}; + BlockPtr block_ptr = std::make_shared(cols); + auto column_extractor = std::make_shared(*block_ptr, case_insensitive_matching); + nested_tables[search_nested_table_name] = {block_ptr, column_extractor}; + } + auto nested_column = nested_tables[search_nested_table_name].second->extractColumn(search_column_name); + if (nested_column) + { + column = *nested_column; + if (case_insensitive_matching) + column.name = header_column.name; + read_from_nested = true; } } + if (!read_from_nested) { if (!allow_missing_columns) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 64ff99c70ac..57f33069e0e 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -24,7 +24,6 @@ public: ArrowColumnToCHColumn( const Block & header_, const std::string & format_name_, - bool import_nested_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_ = false); @@ -53,7 +52,6 @@ public: private: const Block & header; const std::string format_name; - bool import_nested; /// If false, throw exception if some columns in header not exists in arrow table. bool allow_missing_columns; bool null_as_default; diff --git a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h index b7adaa35335..676ce50d04f 100644 --- a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h +++ b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h @@ -35,7 +35,7 @@ public: /// - key: field name with full path. eg. a struct field's name is like a.x.i /// - value: a pair, first value refers to this field's start index, second value refers to how many /// indices this field take. eg. - /// For a parquet schema {x: int , y: {i: int, j: int}}, the return will be + /// For a parquet schema {x: int, y: {i: int, j: int}}, the return will be /// - x: (0, 1) /// - y: (1, 2) /// - y.i: (1, 1) diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index a7efc823fbb..1046125c16c 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -369,14 +369,25 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro break; case avro::AVRO_UNION: { - if (root_node->leaves() == 2 + if (root_node->leaves() == 1) + { + auto nested_deserialize = createDeserializeFn(root_node->leafAt(0), target_type); + return [nested_deserialize](IColumn & column, avro::Decoder & decoder) + { + decoder.decodeUnionIndex(); + nested_deserialize(column, decoder); + return true; + }; + } + /// FIXME Support UNION has more than two datatypes. + else if ( + root_node->leaves() == 2 && (root_node->leafAt(0)->type() == avro::AVRO_NULL || root_node->leafAt(1)->type() == avro::AVRO_NULL)) { int non_null_union_index = root_node->leafAt(0)->type() == avro::AVRO_NULL ? 1 : 0; if (target.isNullable()) { - auto nested_deserialize = this->createDeserializeFn( - root_node->leafAt(non_null_union_index), removeNullable(target_type)); + auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), removeNullable(target_type)); return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder) { ColumnNullable & col = assert_cast(column); @@ -395,7 +406,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro } else if (null_as_default) { - auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), target_type); + auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), target_type); return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder) { int union_index = static_cast(decoder.decodeUnionIndex()); @@ -901,6 +912,19 @@ bool AvroRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &ext return false; } +size_t AvroRowInputFormat::countRows(size_t max_block_size) +{ + size_t num_rows = 0; + while (file_reader_ptr->hasMore() && num_rows < max_block_size) + { + file_reader_ptr->decr(); + file_reader_ptr->decoder().drain(); + ++num_rows; + } + + return num_rows; +} + class AvroConfluentRowInputFormat::SchemaRegistry { public: @@ -1000,7 +1024,7 @@ private: using ConfluentSchemaRegistry = AvroConfluentRowInputFormat::SchemaRegistry; #define SCHEMA_REGISTRY_CACHE_MAX_SIZE 1000 /// Cache of Schema Registry URL -> SchemaRegistry -static CacheBase schema_registry_cache(SCHEMA_REGISTRY_CACHE_MAX_SIZE); +static CacheBase schema_registry_cache(SCHEMA_REGISTRY_CACHE_MAX_SIZE); static std::shared_ptr getConfluentSchemaRegistry(const FormatSettings & format_settings) { @@ -1192,12 +1216,19 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node) case avro::Type::AVRO_NULL: return std::make_shared(); case avro::Type::AVRO_UNION: - if (node->leaves() == 2 && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL)) + if (node->leaves() == 1) + { + return avroNodeToDataType(node->leafAt(0)); + } + else if ( + node->leaves() == 2 + && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL)) { int nested_leaf_index = node->leafAt(0)->type() == avro::Type::AVRO_NULL ? 1 : 0; auto nested_type = avroNodeToDataType(node->leafAt(nested_leaf_index)); return nested_type->canBeInsideNullable() ? makeNullable(nested_type) : nested_type; } + /// FIXME Support UNION has more than two datatypes. throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro type UNION is not supported for inserting."); case avro::Type::AVRO_SYMBOLIC: return avroNodeToDataType(avro::resolveSymbol(node)); @@ -1240,6 +1271,8 @@ void registerInputFormatAvro(FormatFactory & factory) { return std::make_shared(sample, buf, params, settings); }); + + factory.markFormatSupportsSubsetOfColumns("AvroConfluent"); } void registerAvroSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h index 341b430205f..a8364df6e80 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -160,6 +160,9 @@ private: bool readRow(MutableColumns & columns, RowReadExtension & ext) override; void readPrefix() override; + bool supportsCountRows() const override { return true; } + size_t countRows(size_t max_block_size) override; + std::unique_ptr file_reader_ptr; std::unique_ptr deserializer_ptr; FormatSettings format_settings; diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp index 57598fb507f..2972f9da743 100644 --- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp @@ -372,6 +372,9 @@ void BSONEachRowRowInputFormat::readArray(IColumn & column, const DataTypePtr & size_t document_start = in->count(); BSONSizeT document_size; readBinary(document_size, *in); + if (document_size < sizeof(BSONSizeT) + sizeof(BSON_DOCUMENT_END)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid document size: {}", document_size); + while (in->count() - document_start + sizeof(BSON_DOCUMENT_END) != document_size) { auto nested_bson_type = getBSONType(readBSONType(*in)); @@ -399,6 +402,9 @@ void BSONEachRowRowInputFormat::readTuple(IColumn & column, const DataTypePtr & size_t document_start = in->count(); BSONSizeT document_size; readBinary(document_size, *in); + if (document_size < sizeof(BSONSizeT) + sizeof(BSON_DOCUMENT_END)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid document size: {}", document_size); + while (in->count() - document_start + sizeof(BSON_DOCUMENT_END) != document_size) { auto nested_bson_type = getBSONType(readBSONType(*in)); @@ -457,6 +463,9 @@ void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & da size_t document_start = in->count(); BSONSizeT document_size; readBinary(document_size, *in); + if (document_size < sizeof(BSONSizeT) + sizeof(BSON_DOCUMENT_END)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid document size: {}", document_size); + while (in->count() - document_start + sizeof(BSON_DOCUMENT_END) != document_size) { auto nested_bson_type = getBSONType(readBSONType(*in)); @@ -696,6 +705,8 @@ static void skipBSONField(ReadBuffer & in, BSONType type) { BSONSizeT size; readBinary(size, in); + if (size < sizeof(BSONSizeT) + sizeof(BSON_DOCUMENT_END)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid document size: {}", size); in.ignore(size - sizeof(size)); break; } @@ -735,6 +746,8 @@ static void skipBSONField(ReadBuffer & in, BSONType type) { BSONSizeT size; readBinary(size, in); + if (size < sizeof(BSONSizeT)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid java code_w_scope size: {}", size); in.ignore(size - sizeof(size)); break; } @@ -775,6 +788,9 @@ bool BSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi current_document_start = in->count(); readBinary(current_document_size, *in); + if (current_document_size < sizeof(BSONSizeT) + sizeof(BSON_DOCUMENT_END)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid document size: {}", current_document_size); + while (in->count() - current_document_start + sizeof(BSON_DOCUMENT_END) != current_document_size) { auto type = getBSONType(readBSONType(*in)); @@ -822,6 +838,22 @@ void BSONEachRowRowInputFormat::resetParser() prev_positions.clear(); } +size_t BSONEachRowRowInputFormat::countRows(size_t max_block_size) +{ + size_t num_rows = 0; + BSONSizeT document_size; + while (!in->eof() && num_rows < max_block_size) + { + readBinary(document_size, *in); + if (document_size < sizeof(BSONSizeT) + sizeof(BSON_DOCUMENT_END)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid document size: {}", document_size); + in->ignore(document_size - sizeof(BSONSizeT)); + ++num_rows; + } + + return num_rows; +} + BSONEachRowSchemaReader::BSONEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & settings_) : IRowWithNamesSchemaReader(in_, settings_) { @@ -865,7 +897,7 @@ DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, boo in.ignore(size); return std::make_shared(); } - case BSONType::OBJECT_ID:; + case BSONType::OBJECT_ID: { in.ignore(BSON_OBJECT_ID_SIZE); return makeNullable(std::make_shared(BSON_OBJECT_ID_SIZE)); diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h index 538a59e05c3..5e8bee50963 100644 --- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h @@ -64,6 +64,9 @@ private: bool allowSyncAfterError() const override { return true; } void syncAfterError() override; + bool supportsCountRows() const override { return true; } + size_t countRows(size_t max_block_size) override; + size_t columnIndex(const StringRef & name, size_t key_index); using ColumnReader = std::function; diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 899b84cc132..e2383d1bfab 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -233,6 +233,8 @@ namespace DB checkStatus(components_status, nested_column->getName(), format_name); /// Pass null null_map, because fillArrowArray will decide whether nested_type is nullable, if nullable, it will create a new null_map from nested_column + /// Note that it is only needed by gluten(https://github.com/oap-project/gluten), because array type in gluten is by default nullable. + /// And it does not influence the original ClickHouse logic, because null_map passed to fillArrowArrayWithArrayColumnData is always nullptr for ClickHouse doesn't allow nullable complex types including array type. fillArrowArray(column_name, nested_column, nested_type, nullptr, value_builder, format_name, offsets[array_idx - 1], offsets[array_idx], output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values); } } @@ -682,9 +684,6 @@ namespace DB bool output_fixed_string_as_fixed_byte_array, std::unordered_map & dictionary_values) { - const String column_type_name = column_type->getFamilyName(); - WhichDataType which(column_type); - switch (column_type->getTypeId()) { case TypeIndex::Nullable: @@ -794,7 +793,7 @@ namespace DB FOR_INTERNAL_NUMERIC_TYPES(DISPATCH) #undef DISPATCH default: - throw Exception(ErrorCodes::UNKNOWN_TYPE, "Internal type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type_name, column_name, format_name); + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Internal type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type->getFamilyName(), column_name, format_name); } } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 244b906549e..ddfda6cc54e 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB @@ -114,6 +115,68 @@ void CSVRowInputFormat::resetParser() buf->reset(); } +void CSVFormatReader::skipRow() +{ + bool quotes = false; + ReadBuffer & istr = *buf; + + while (!istr.eof()) + { + if (quotes) + { + auto * pos = find_first_symbols<'"'>(istr.position(), istr.buffer().end()); + istr.position() = pos; + + if (pos > istr.buffer().end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug."); + else if (pos == istr.buffer().end()) + continue; + else if (*pos == '"') + { + ++istr.position(); + if (!istr.eof() && *istr.position() == '"') + ++istr.position(); + else + quotes = false; + } + } + else + { + auto * pos = find_first_symbols<'"', '\r', '\n'>(istr.position(), istr.buffer().end()); + istr.position() = pos; + + if (pos > istr.buffer().end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug."); + else if (pos == istr.buffer().end()) + continue; + + if (*pos == '"') + { + quotes = true; + ++istr.position(); + continue; + } + + if (*pos == '\n') + { + ++istr.position(); + if (!istr.eof() && *istr.position() == '\r') + ++istr.position(); + return; + } + else if (*pos == '\r') + { + ++istr.position(); + if (!istr.eof() && *pos == '\n') + { + ++pos; + return; + } + } + } + } +} + static void skipEndOfLine(ReadBuffer & in) { /// \n (Unix) or \r\n (DOS/Windows) or \n\r (Mac OS Classic) @@ -284,7 +347,7 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) return true; } -bool CSVFormatReader::allowVariableNumberOfColumns() +bool CSVFormatReader::allowVariableNumberOfColumns() const { return format_settings.csv.allow_variable_number_of_columns; } @@ -410,19 +473,22 @@ CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_t { } -std::pair, DataTypes> CSVSchemaReader::readRowAndGetFieldsAndDataTypes() +std::optional, DataTypes>> CSVSchemaReader::readRowAndGetFieldsAndDataTypes() { if (buf.eof()) return {}; auto fields = reader.readRow(); auto data_types = tryInferDataTypesByEscapingRule(fields, format_settings, FormatSettings::EscapingRule::CSV); - return {fields, data_types}; + return std::make_pair(std::move(fields), std::move(data_types)); } -DataTypes CSVSchemaReader::readRowAndGetDataTypesImpl() +std::optional CSVSchemaReader::readRowAndGetDataTypesImpl() { - return std::move(readRowAndGetFieldsAndDataTypes().second); + auto fields_with_types = readRowAndGetFieldsAndDataTypes(); + if (!fields_with_types) + return {}; + return std::move(fields_with_types->second); } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 7b1a1fc433d..f8b8bddce19 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -40,6 +40,8 @@ private: bool allowSyncAfterError() const override { return true; } void syncAfterError() override; + bool supportsCountRows() const override { return true; } + protected: std::shared_ptr buf; }; @@ -59,6 +61,8 @@ public: bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) override; + void skipRow() override; + void skipField(size_t /*file_column*/) override { skipField(); } void skipField(); @@ -70,7 +74,7 @@ public: void skipPrefixBeforeHeader() override; bool checkForEndOfRow() override; - bool allowVariableNumberOfColumns() override; + bool allowVariableNumberOfColumns() const override; std::vector readNames() override { return readHeaderRow(); } std::vector readTypes() override { return readHeaderRow(); } @@ -102,8 +106,10 @@ public: CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_); private: - DataTypes readRowAndGetDataTypesImpl() override; - std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() override; + bool allowVariableNumberOfColumns() const override { return format_settings.csv.allow_variable_number_of_columns; } + + std::optional readRowAndGetDataTypesImpl() override; + std::optional, DataTypes>> readRowAndGetFieldsAndDataTypes() override; PeekableReadBuffer buf; CSVFormatReader reader; diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index c056ee2b4a4..12bb5aef168 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -17,17 +17,17 @@ namespace ErrorCodes extern const int INCORRECT_DATA; } -CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings) +CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const CapnProtoSchemaInfo & info, const FormatSettings & format_settings) : IRowInputFormat(std::move(header_), in_, std::move(params_)) , parser(std::make_shared()) { // Parse the schema and fetch the root object - schema = parser->getMessageSchema(info); + schema = parser->getMessageSchema(info.getSchemaInfo()); const auto & header = getPort().getHeader(); serializer = std::make_unique(header.getDataTypes(), header.getNames(), schema, format_settings.capn_proto); } -kj::Array CapnProtoRowInputFormat::readMessage() +std::pair, size_t> CapnProtoRowInputFormat::readMessagePrefix() { uint32_t segment_count; in->readStrict(reinterpret_cast(&segment_count), sizeof(uint32_t)); @@ -48,6 +48,14 @@ kj::Array CapnProtoRowInputFormat::readMessage() for (size_t i = 0; i <= segment_count; ++i) in->readStrict(prefix_chars.begin() + ((i + 1) * sizeof(uint32_t)), sizeof(uint32_t)); + return {std::move(prefix), prefix_size}; +} + +kj::Array CapnProtoRowInputFormat::readMessage() +{ + auto [prefix, prefix_size] = readMessagePrefix(); + auto prefix_chars = prefix.asChars(); + // calculate size of message const auto expected_words = capnp::expectedSizeInWordsFromPrefix(prefix); const auto expected_bytes = expected_words * sizeof(capnp::word); @@ -62,6 +70,18 @@ kj::Array CapnProtoRowInputFormat::readMessage() return msg; } +void CapnProtoRowInputFormat::skipMessage() +{ + auto [prefix, prefix_size] = readMessagePrefix(); + + // calculate size of message + const auto expected_bytes = capnp::expectedSizeInWordsFromPrefix(prefix) * sizeof(capnp::word); + const auto data_size = expected_bytes - prefix_size; + + // skip full message + in->ignore(data_size); +} + bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) { if (in->eof()) @@ -82,6 +102,18 @@ bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension return true; } +size_t CapnProtoRowInputFormat::countRows(size_t max_block_size) +{ + size_t num_rows = 0; + while (!in->eof() && num_rows < max_block_size) + { + skipMessage(); + ++num_rows; + } + + return num_rows; +} + CapnProtoSchemaReader::CapnProtoSchemaReader(const FormatSettings & format_settings_) : format_settings(format_settings_) { } @@ -106,8 +138,12 @@ void registerInputFormatCapnProto(FormatFactory & factory) "CapnProto", [](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings) { - return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings, "CapnProto", true), settings); + return std::make_shared( + buf, + sample, + std::move(params), + CapnProtoSchemaInfo(settings, "CapnProto", sample, settings.capn_proto.use_autogenerated_schema), + settings); }); factory.markFormatSupportsSubsetOfColumns("CapnProto"); factory.registerFileExtension("capnp", "CapnProto"); diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h index 06e94da123f..cb2d4090ff0 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h @@ -24,14 +24,19 @@ class ReadBuffer; class CapnProtoRowInputFormat final : public IRowInputFormat { public: - CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_); + CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const CapnProtoSchemaInfo & info, const FormatSettings & format_settings); String getName() const override { return "CapnProtoRowInputFormat"; } private: bool readRow(MutableColumns & columns, RowReadExtension &) override; + bool supportsCountRows() const override { return true; } + size_t countRows(size_t max_block_size) override; + + std::pair, size_t> readMessagePrefix(); kj::Array readMessage(); + void skipMessage(); std::shared_ptr parser; capnp::StructSchema schema; diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index 66a7160dd89..8ab19331b11 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -23,14 +23,14 @@ void CapnProtoOutputStream::write(const void * buffer, size_t size) CapnProtoRowOutputFormat::CapnProtoRowOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & info, + const CapnProtoSchemaInfo & info, const FormatSettings & format_settings) : IRowOutputFormat(header_, out_) , column_names(header_.getNames()) , column_types(header_.getDataTypes()) , output_stream(std::make_unique(out_)) { - schema = schema_parser.getMessageSchema(info); + schema = schema_parser.getMessageSchema(info.getSchemaInfo()); const auto & header = getPort(PortKind::Main).getHeader(); serializer = std::make_unique(header.getDataTypes(), header.getNames(), schema, format_settings.capn_proto); capnp::MallocMessageBuilder message; @@ -52,7 +52,11 @@ void registerOutputFormatCapnProto(FormatFactory & factory) const Block & sample, const FormatSettings & format_settings) { - return std::make_shared(buf, sample, FormatSchemaInfo(format_settings, "CapnProto", true), format_settings); + return std::make_shared( + buf, + sample, + CapnProtoSchemaInfo(format_settings, "CapnProto", sample, format_settings.capn_proto.use_autogenerated_schema), + format_settings); }); } diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h index dd9dcc6b340..c00dceb498e 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h @@ -31,8 +31,8 @@ public: CapnProtoRowOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & info, - const FormatSettings & format_settings_); + const CapnProtoSchemaInfo & info, + const FormatSettings & format_settings); String getName() const override { return "CapnProtoRowOutputFormat"; } diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 06efe0a20aa..fe6fb42d0a0 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -402,11 +402,10 @@ size_t ConstantExpressionTemplate::TemplateStructure::getTemplateHash(const ASTP /// Allows distinguish expression in the last column in Values format hash_state.update(salt); - IAST::Hash res128; - hash_state.get128(res128); + const auto res128 = getSipHash128AsPair(hash_state); size_t res = 0; - boost::hash_combine(res, res128.first); - boost::hash_combine(res, res128.second); + boost::hash_combine(res, res128.low64); + boost::hash_combine(res, res128.high64); return res; } diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index 1e67db79a2c..88510b96ae5 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -139,10 +139,13 @@ void CustomSeparatedFormatReader::skipRowBetweenDelimiter() void CustomSeparatedFormatReader::skipField() { skipSpaces(); - skipFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); + if (format_settings.custom.escaping_rule == FormatSettings::EscapingRule::CSV) + readCSVFieldWithTwoPossibleDelimiters(*buf, format_settings.csv, format_settings.custom.field_delimiter, format_settings.custom.row_after_delimiter); + else + skipFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); } -bool CustomSeparatedFormatReader::checkEndOfRow() +bool CustomSeparatedFormatReader::checkForEndOfRow() { PeekableReadBufferCheckpoint checkpoint{*buf, true}; @@ -200,12 +203,12 @@ std::vector CustomSeparatedFormatReader::readRowImpl() std::vector values; skipRowStartDelimiter(); - if (columns == 0) + if (columns == 0 || allowVariableNumberOfColumns()) { do { values.push_back(readFieldIntoString(values.empty(), false, true)); - } while (!checkEndOfRow()); + } while (!checkForEndOfRow()); columns = values.size(); } else @@ -218,19 +221,34 @@ std::vector CustomSeparatedFormatReader::readRowImpl() return values; } -void CustomSeparatedFormatReader::skipHeaderRow() +void CustomSeparatedFormatReader::skipRow() { skipRowStartDelimiter(); - bool first = true; - do - { - if (!first) - skipFieldDelimiter(); - first = false; - skipField(); + /// If the number of columns in row is unknown, + /// we should check for end of row after each field. + if (columns == 0 || allowVariableNumberOfColumns()) + { + bool first = true; + do + { + if (!first) + skipFieldDelimiter(); + first = false; + + skipField(); + } + while (!checkForEndOfRow()); + } + else + { + for (size_t i = 0; i != columns; ++i) + { + if (i != 0) + skipFieldDelimiter(); + skipField(); + } } - while (!checkEndOfRow()); skipRowEndDelimiter(); } @@ -369,7 +387,7 @@ CustomSeparatedSchemaReader::CustomSeparatedSchemaReader( { } -std::pair, DataTypes> CustomSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes() +std::optional, DataTypes>> CustomSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes() { if (no_more_data || reader.checkForSuffix()) { @@ -385,12 +403,15 @@ std::pair, DataTypes> CustomSeparatedSchemaReader::readRowAn auto fields = reader.readRow(); auto data_types = tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), &json_inference_info); - return {fields, data_types}; + return std::make_pair(std::move(fields), std::move(data_types)); } -DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypesImpl() +std::optional CustomSeparatedSchemaReader::readRowAndGetDataTypesImpl() { - return readRowAndGetFieldsAndDataTypes().second; + auto fields_with_types = readRowAndGetFieldsAndDataTypes(); + if (!fields_with_types) + return {}; + return std::move(fields_with_types->second); } void CustomSeparatedSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index 2acf35bd143..5ad870199db 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -33,6 +33,8 @@ private: void syncAfterError() override; void readPrefix() override; + bool supportsCountRows() const override { return true; } + std::unique_ptr buf; bool ignore_spaces; }; @@ -48,9 +50,9 @@ public: void skipField(size_t /*file_column*/) override { skipField(); } void skipField(); - void skipNames() override { skipHeaderRow(); } - void skipTypes() override { skipHeaderRow(); } - void skipHeaderRow(); + void skipNames() override { skipRow(); } + void skipTypes() override { skipRow(); } + void skipRow() override; void skipPrefixBeforeHeader() override; void skipRowStartDelimiter() override; @@ -74,7 +76,9 @@ public: std::vector readRowForHeaderDetection() override { return readRowImpl(); } - bool checkEndOfRow(); + bool checkForEndOfRow() override; + bool allowVariableNumberOfColumns() const override { return format_settings.custom.allow_variable_number_of_columns; } + bool checkForSuffixImpl(bool check_eof); inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf, true); } @@ -109,9 +113,11 @@ public: CustomSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_); private: - DataTypes readRowAndGetDataTypesImpl() override; + bool allowVariableNumberOfColumns() const override { return format_settings.custom.allow_variable_number_of_columns; } - std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() override; + std::optional readRowAndGetDataTypesImpl() override; + + std::optional, DataTypes>> readRowAndGetFieldsAndDataTypes() override; void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp index 17bade02a58..f6bd4f51289 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp @@ -15,11 +15,11 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; } -JSONAsRowInputFormat::JSONAsRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) - : JSONAsRowInputFormat(header_, std::make_unique(in_), params_) {} +JSONAsRowInputFormat::JSONAsRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_) + : JSONAsRowInputFormat(header_, std::make_unique(in_), params_, format_settings_) {} -JSONAsRowInputFormat::JSONAsRowInputFormat(const Block & header_, std::unique_ptr buf_, Params params_) : - IRowInputFormat(header_, *buf_, std::move(params_)), buf(std::move(buf_)) +JSONAsRowInputFormat::JSONAsRowInputFormat(const Block & header_, std::unique_ptr buf_, Params params_, const FormatSettings & format_settings_) : + JSONEachRowRowInputFormat(*buf_, header_, std::move(params_), format_settings_, false), buf(std::move(buf_)) { if (header_.columns() > 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -29,39 +29,10 @@ JSONAsRowInputFormat::JSONAsRowInputFormat(const Block & header_, std::unique_pt void JSONAsRowInputFormat::resetParser() { - IRowInputFormat::resetParser(); + JSONEachRowRowInputFormat::resetParser(); buf->reset(); } -void JSONAsRowInputFormat::readPrefix() -{ - /// In this format, BOM at beginning of stream cannot be confused with value, so it is safe to skip it. - skipBOMIfExists(*buf); - - skipWhitespaceIfAny(*buf); - if (!buf->eof() && *buf->position() == '[') - { - ++buf->position(); - data_in_square_brackets = true; - } -} - -void JSONAsRowInputFormat::readSuffix() -{ - skipWhitespaceIfAny(*buf); - if (data_in_square_brackets) - { - assertChar(']', *buf); - skipWhitespaceIfAny(*buf); - data_in_square_brackets = false; - } - if (!buf->eof() && *buf->position() == ';') - { - ++buf->position(); - skipWhitespaceIfAny(*buf); - } - assertEOF(*buf); -} bool JSONAsRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) { @@ -104,8 +75,8 @@ void JSONAsRowInputFormat::setReadBuffer(ReadBuffer & in_) JSONAsStringRowInputFormat::JSONAsStringRowInputFormat( - const Block & header_, ReadBuffer & in_, Params params_) - : JSONAsRowInputFormat(header_, in_, params_) + const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_) + : JSONAsRowInputFormat(header_, in_, params_, format_settings_) { if (!isString(removeNullable(removeLowCardinality(header_.getByPosition(0).type)))) throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -193,8 +164,7 @@ void JSONAsStringRowInputFormat::readJSONObject(IColumn & column) JSONAsObjectRowInputFormat::JSONAsObjectRowInputFormat( const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_) - : JSONAsRowInputFormat(header_, in_, params_) - , format_settings(format_settings_) + : JSONAsRowInputFormat(header_, in_, params_, format_settings_) { if (!isObject(header_.getByPosition(0).type)) throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -207,6 +177,13 @@ void JSONAsObjectRowInputFormat::readJSONObject(IColumn & column) serializations[0]->deserializeTextJSON(column, *buf, format_settings); } +Chunk JSONAsObjectRowInputFormat::getChunkForCount(size_t rows) +{ + auto object_type = getPort().getHeader().getDataTypes()[0]; + ColumnPtr column = object_type->createColumnConst(rows, Field(Object())); + return Chunk({std::move(column)}, rows); +} + JSONAsObjectExternalSchemaReader::JSONAsObjectExternalSchemaReader(const FormatSettings & settings) { if (!settings.json.allow_object_type) @@ -222,9 +199,9 @@ void registerInputFormatJSONAsString(FormatFactory & factory) ReadBuffer & buf, const Block & sample, const RowInputFormatParams & params, - const FormatSettings &) + const FormatSettings & format_settings) { - return std::make_shared(sample, buf, params); + return std::make_shared(sample, buf, params, format_settings); }); } diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h index 3f2d1998139..4312a853193 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include @@ -13,30 +13,22 @@ namespace DB class ReadBuffer; /// This format parses a sequence of JSON objects separated by newlines, spaces and/or comma. -class JSONAsRowInputFormat : public IRowInputFormat +class JSONAsRowInputFormat : public JSONEachRowRowInputFormat { public: - JSONAsRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_); + JSONAsRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings); void resetParser() override; void setReadBuffer(ReadBuffer & in_) override; private: - JSONAsRowInputFormat(const Block & header_, std::unique_ptr buf_, Params params_); + JSONAsRowInputFormat(const Block & header_, std::unique_ptr buf_, Params params_, const FormatSettings & format_settings); bool readRow(MutableColumns & columns, RowReadExtension & ext) override; - void readPrefix() override; - void readSuffix() override; - protected: virtual void readJSONObject(IColumn & column) = 0; std::unique_ptr buf; - -private: - /// This flag is needed to know if data is in square brackets. - bool data_in_square_brackets = false; - bool allow_new_rows = true; }; /// Each JSON object is parsed as a whole to string. @@ -44,7 +36,7 @@ private: class JSONAsStringRowInputFormat final : public JSONAsRowInputFormat { public: - JSONAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_); + JSONAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings); String getName() const override { return "JSONAsStringRowInputFormat"; } private: @@ -61,8 +53,8 @@ public: String getName() const override { return "JSONAsObjectRowInputFormat"; } private: + Chunk getChunkForCount(size_t rows) override; void readJSONObject(IColumn & column) override; - const FormatSettings format_settings; }; class JSONAsStringExternalSchemaReader : public IExternalSchemaReader diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp index 3cdeb0199b3..6e7e8e6c98a 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp @@ -1,12 +1,10 @@ #include #include #include -#include #include #include #include #include -#include namespace DB { @@ -52,14 +50,22 @@ void JSONColumnsReaderBase::skipColumn() while (!in->eof() && balance) { if (inside_quotes) - pos = find_first_symbols<'"'>(in->position(), in->buffer().end()); + pos = find_first_symbols<'\\', '"'>(in->position(), in->buffer().end()); else - pos = find_first_symbols<'[', ']', '"'>(in->position(), in->buffer().end()); + pos = find_first_symbols<'[', ']', '"', '\\'>(in->position(), in->buffer().end()); in->position() = pos; if (in->position() == in->buffer().end()) continue; + if (*in->position() == '\\') + { + ++in->position(); + if (!in->eof()) + ++in->position(); + continue; + } + if (*in->position() == '"') inside_quotes = !inside_quotes; else if (*in->position() == ']') @@ -118,6 +124,31 @@ Chunk JSONColumnsBlockInputFormatBase::generate() return Chunk(std::move(columns), 0); size_t chunk_start = getDataOffsetMaybeCompressed(*in); + + if (need_only_count) + { + /// Count rows in first column and skip the rest columns. + reader->readColumnStart(); + size_t num_rows = 0; + if (!reader->checkColumnEnd()) + { + do + { + skipJSONField(*in, "skip_field"); + ++num_rows; + } while (!reader->checkColumnEndOrSkipFieldDelimiter()); + } + + while (!reader->checkChunkEndOrSkipColumnDelimiter()) + { + reader->readColumnStart(); + reader->skipColumn(); + } + + approx_bytes_read_for_chunk = getDataOffsetMaybeCompressed(*in) - chunk_start; + return getChunkForCount(num_rows); + } + std::vector seen_columns(columns.size(), 0); Int64 rows = -1; size_t iteration = 0; diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h index 5ab20c796ea..bb52e2aa516 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h @@ -67,7 +67,7 @@ protected: Serializations serializations; std::unique_ptr reader; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; }; diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index b91345bebe3..1a8dddab83c 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -88,6 +88,17 @@ void JSONCompactEachRowFormatReader::skipHeaderRow() skipRowEndDelimiter(); } +bool JSONCompactEachRowFormatReader::checkForSuffix() +{ + skipWhitespaceIfAny(*in); + return in->eof(); +} + +void JSONCompactEachRowFormatReader::skipRow() +{ + JSONUtils::skipRowForJSONCompactEachRow(*in); +} + std::vector JSONCompactEachRowFormatReader::readHeaderRow() { skipRowStartDelimiter(); @@ -112,6 +123,12 @@ bool JSONCompactEachRowFormatReader::readField(IColumn & column, const DataTypeP return JSONUtils::readField(*in, column, type, serialization, column_name, format_settings, yield_strings); } +bool JSONCompactEachRowFormatReader::checkForEndOfRow() +{ + skipWhitespaceIfAny(*in); + return !in->eof() && *in->position() == ']'; +} + bool JSONCompactEachRowFormatReader::parseRowStartWithDiagnosticInfo(WriteBuffer & out) { skipWhitespaceIfAny(*in); @@ -187,7 +204,7 @@ JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader( { } -DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypesImpl() +std::optional JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypesImpl() { if (first_row) first_row = false; diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h index bb699f0ca2e..2e255a55d57 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h @@ -37,6 +37,7 @@ public: private: bool allowSyncAfterError() const override { return true; } void syncAfterError() override; + bool supportsCountRows() const override { return true; } }; class JSONCompactEachRowFormatReader : public FormatWithNamesAndTypesReader @@ -64,10 +65,17 @@ public: void skipFieldDelimiter() override; void skipRowEndDelimiter() override; + void skipRow() override; + + bool checkForSuffix() override; + std::vector readHeaderRow(); std::vector readNames() override { return readHeaderRow(); } std::vector readTypes() override { return readHeaderRow(); } + bool checkForEndOfRow() override; + bool allowVariableNumberOfColumns() const override { return format_settings.json.compact_allow_variable_number_of_columns; } + bool yieldStrings() const { return yield_strings; } private: bool yield_strings; @@ -79,7 +87,9 @@ public: JSONCompactEachRowRowSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool yield_strings_, const FormatSettings & format_settings_); private: - DataTypes readRowAndGetDataTypesImpl() override; + bool allowVariableNumberOfColumns() const override { return format_settings.json.compact_allow_variable_number_of_columns; } + + std::optional readRowAndGetDataTypesImpl() override; void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; void transformFinalTypeIfNeeded(DataTypePtr & type) override; diff --git a/src/Processors/Formats/Impl/JSONCompactRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactRowInputFormat.h index ccd28f8bc50..09f2f77268d 100644 --- a/src/Processors/Formats/Impl/JSONCompactRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactRowInputFormat.h @@ -16,6 +16,7 @@ public: private: bool allowSyncAfterError() const override { return true; } void syncAfterError() override; + bool supportsCountRows() const override { return true; } void readPrefix() override; void readSuffix() override; diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index e5f52936021..6038ab204a3 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -39,9 +39,9 @@ JSONEachRowRowInputFormat::JSONEachRowRowInputFormat( const FormatSettings & format_settings_, bool yield_strings_) : IRowInputFormat(header_, in_, std::move(params_)) - , format_settings(format_settings_) , prev_positions(header_.columns()) , yield_strings(yield_strings_) + , format_settings(format_settings_) { const auto & header = getPort().getHeader(); name_map = header.getNamesToIndexesMap(); @@ -236,10 +236,10 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi bool JSONEachRowRowInputFormat::checkEndOfData(bool is_first_row) { - /// We consume , or \n before scanning a new row, instead scanning to next row at the end. + /// We consume ',' or '\n' before scanning a new row, instead scanning to next row at the end. /// The reason is that if we want an exact number of rows read with LIMIT x /// from a streaming table engine with text data format, like File or Kafka - /// then seeking to next ;, or \n would trigger reading of an extra row at the end. + /// then seeking to next ';,' or '\n' would trigger reading of an extra row at the end. /// Semicolon is added for convenience as it could be used at end of INSERT query. if (!in->eof()) @@ -302,6 +302,26 @@ void JSONEachRowRowInputFormat::readSuffix() assertEOF(*in); } +size_t JSONEachRowRowInputFormat::countRows(size_t max_block_size) +{ + if (unlikely(!allow_new_rows)) + return 0; + + size_t num_rows = 0; + bool is_first_row = getCurrentUnitNumber() == 0 && getTotalRows() == 0; + skipWhitespaceIfAny(*in); + while (num_rows < max_block_size && !checkEndOfData(is_first_row)) + { + skipRowStart(); + JSONUtils::skipRowForJSONEachRow(*in); + ++num_rows; + is_first_row = false; + skipWhitespaceIfAny(*in); + } + + return num_rows; +} + JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : IRowWithNamesSchemaReader(in_, format_settings_) { diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h index ce42071585e..d4246c37ea0 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h @@ -40,6 +40,9 @@ private: bool allowSyncAfterError() const override { return true; } void syncAfterError() override; + size_t countRows(size_t max_block_size) override; + bool supportsCountRows() const override { return true; } + const String & columnName(size_t i) const; size_t columnIndex(StringRef name, size_t key_index); bool advanceToNextKey(size_t key_index); @@ -50,9 +53,7 @@ private: void readNestedData(const String & name, MutableColumns & columns); virtual void readRowStart(MutableColumns &) {} - virtual bool checkEndOfData(bool is_first_row); - - const FormatSettings format_settings; + virtual void skipRowStart() {} /// Buffer for the read from the stream field name. Used when you have to copy it. /// Also, if processing of Nested data is in progress, it holds the common prefix @@ -76,11 +77,12 @@ private: /// Cached search results for previous row (keyed as index in JSON object) - used as a hint. std::vector prev_positions; - bool allow_new_rows = true; - bool yield_strings; protected: + virtual bool checkEndOfData(bool is_first_row); + + const FormatSettings format_settings; /// Set of columns for which the values were read. The rest will be filled with default values. std::vector read_columns; @@ -89,6 +91,8 @@ protected: /// This flag is needed to know if data is in square brackets. bool data_in_square_brackets = false; + + bool allow_new_rows = true; }; class JSONEachRowSchemaReader : public IRowWithNamesSchemaReader diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp index f01f07024da..b3d5eb6cb0c 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp @@ -55,6 +55,11 @@ void JSONObjectEachRowInputFormat::readRowStart(MutableColumns & columns) } } +void JSONObjectEachRowInputFormat::skipRowStart() +{ + JSONUtils::readFieldName(*in); +} + bool JSONObjectEachRowInputFormat::checkEndOfData(bool is_first_row) { if (in->eof() || JSONUtils::checkAndSkipObjectEnd(*in)) diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h index a15bc558c65..14df8e624f0 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h @@ -29,6 +29,7 @@ private: void readPrefix() override; void readSuffix() override {} void readRowStart(MutableColumns & columns) override; + void skipRowStart() override; bool checkEndOfData(bool is_first_row) override; std::optional field_index_for_object_name; diff --git a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp index 950bf8e5731..ac7ba6048a5 100644 --- a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp index 2fd87540e7d..036539c87e7 100644 --- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp @@ -51,6 +51,18 @@ bool LineAsStringRowInputFormat::readRow(MutableColumns & columns, RowReadExtens return true; } +size_t LineAsStringRowInputFormat::countRows(size_t max_block_size) +{ + size_t num_rows = 0; + while (!in->eof() && num_rows < max_block_size) + { + skipToNextLineOrEOF(*in); + ++num_rows; + } + + return num_rows; +} + void registerInputFormatLineAsString(FormatFactory & factory) { factory.registerInputFormat("LineAsString", []( diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h index 080ff9985af..3803056123f 100644 --- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h +++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h @@ -26,6 +26,9 @@ private: bool readRow(MutableColumns & columns, RowReadExtension & ext) override; void readLineObject(IColumn & column); + + size_t countRows(size_t max_block_size) override; + bool supportsCountRows() const override { return true; } }; class LinaAsStringSchemaReader : public IExternalSchemaReader diff --git a/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp b/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp index ea414171ed6..00bb5ff6fcf 100644 --- a/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp @@ -52,7 +52,7 @@ void MarkdownRowOutputFormat::writeRowEndDelimiter() void MarkdownRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) { - serialization.serializeTextEscaped(column, row_num, out, format_settings); + serialization.serializeTextMarkdown(column, row_num, out, format_settings); } void registerOutputFormatMarkdown(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index eeca14176cc..7678d1d3ae2 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -244,7 +244,8 @@ static void insertString(IColumn & column, DataTypePtr type, const char * value, case TypeIndex::Decimal256: insertFromBinaryRepresentation>(column, type, value, size); return; - default:; + default: + break; } } @@ -326,8 +327,8 @@ static void insertUUID(IColumn & column, DataTypePtr type, const char * value, s throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack UUID into column with type {}.", type->getName()); ReadBufferFromMemory buf(value, size); UUID uuid; - readBinaryBigEndian(uuid.toUnderType().items[0], buf); - readBinaryBigEndian(uuid.toUnderType().items[1], buf); + readBinaryBigEndian(UUIDHelpers::getHighBytes(uuid), buf); + readBinaryBigEndian(UUIDHelpers::getLowBytes(uuid), buf); assert_cast(column).insertValue(uuid); } @@ -482,14 +483,15 @@ void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT throw Exception(ErrorCodes::INCORRECT_DATA, "Error occurred while parsing msgpack data."); } -bool MsgPackRowInputFormat::readObject() +template +bool MsgPackRowInputFormat::readObject(Parser & msgpack_parser) { if (buf->eof()) return false; PeekableReadBufferCheckpoint checkpoint{*buf}; size_t offset = 0; - while (!parser.execute(buf->position(), buf->available(), offset)) + while (!msgpack_parser.execute(buf->position(), buf->available(), offset)) { buf->position() = buf->buffer().end(); if (buf->eof()) @@ -502,6 +504,24 @@ bool MsgPackRowInputFormat::readObject() return true; } +size_t MsgPackRowInputFormat::countRows(size_t max_block_size) +{ + size_t num_rows = 0; + msgpack::null_visitor null_visitor; + msgpack::detail::parse_helper null_parser(null_visitor); + + size_t columns = getPort().getHeader().columns(); + + while (!buf->eof() && num_rows < max_block_size) + { + for (size_t i = 0; i < columns; ++i) + readObject(null_parser); + ++num_rows; + } + + return num_rows; +} + bool MsgPackRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext) { size_t column_index = 0; @@ -510,7 +530,7 @@ bool MsgPackRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & for (; column_index != columns.size(); ++column_index) { visitor.set_info(*columns[column_index], data_types[column_index], ext.read_columns[column_index]); - has_more_data = readObject(); + has_more_data = readObject(parser); if (!has_more_data) break; } @@ -634,7 +654,7 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) UNREACHABLE(); } -DataTypes MsgPackSchemaReader::readRowAndGetDataTypes() +std::optional MsgPackSchemaReader::readRowAndGetDataTypes() { if (buf.eof()) return {}; diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h index 0b485d3b97c..3876b372670 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -75,7 +75,11 @@ private: bool readRow(MutableColumns & columns, RowReadExtension & ext) override; - bool readObject(); + template + bool readObject(Parser & msgpack_parser); + + size_t countRows(size_t max_block_size) override; + bool supportsCountRows() const override { return true; } std::unique_ptr buf; MsgPackVisitor visitor; @@ -91,7 +95,7 @@ public: private: msgpack::object_handle readObject(); DataTypePtr getDataType(const msgpack::object & object); - DataTypes readRowAndGetDataTypes() override; + std::optional readRowAndGetDataTypes() override; PeekableReadBuffer buf; UInt64 number_of_columns; diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index 9c601492217..12bbd35b77b 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -270,8 +270,8 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr { WriteBufferFromOwnString buf; UUID value = uuid_column.getElement(row_num); - writeBinaryBigEndian(value.toUnderType().items[0], buf); - writeBinaryBigEndian(value.toUnderType().items[1], buf); + writeBinaryBigEndian(UUIDHelpers::getHighBytes(value), buf); + writeBinaryBigEndian(UUIDHelpers::getLowBytes(value), buf); std::string_view uuid_ext = buf.stringView(); packer.pack_ext(sizeof(UUID), int8_t(MsgPackExtensionTypes::UUIDType)); packer.pack_ext_body(uuid_ext.data(), static_cast(uuid_ext.size())); diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp index 90dd07bd5a8..6ae32aa5842 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp @@ -1,4 +1,5 @@ -#include "MySQLDumpRowInputFormat.h" +#include +#include #include #include #include @@ -303,15 +304,8 @@ static void skipFieldDelimiter(ReadBuffer & in) skipWhitespaceIfAny(in); } -static void skipEndOfRow(ReadBuffer & in, String & table_name) +static void skipEndOfInsertQueryIfNeeded(ReadBuffer & in, String & table_name) { - skipWhitespaceIfAny(in); - assertChar(')', in); - - skipWhitespaceIfAny(in); - if (!in.eof() && *in.position() == ',') - ++in.position(); - skipWhitespaceIfAny(in); if (!in.eof() && *in.position() == ';') { @@ -323,6 +317,18 @@ static void skipEndOfRow(ReadBuffer & in, String & table_name) } } +static void skipEndOfRow(ReadBuffer & in, String & table_name) +{ + skipWhitespaceIfAny(in); + assertChar(')', in); + + skipWhitespaceIfAny(in); + if (!in.eof() && *in.position() == ',') + ++in.position(); + + skipEndOfInsertQueryIfNeeded(in, table_name); +} + static void readFirstCreateAndInsertQueries(ReadBuffer & in, String & table_name, NamesAndTypesList & structure_from_create, Names & column_names) { auto type = skipToInsertOrCreateQuery(table_name, in); @@ -385,6 +391,19 @@ bool MySQLDumpRowInputFormat::readRow(MutableColumns & columns, RowReadExtension return true; } +size_t MySQLDumpRowInputFormat::countRows(size_t max_block_size) +{ + size_t num_rows = 0; + while (!in->eof() && num_rows < max_block_size) + { + ValuesBlockInputFormat::skipToNextRow(in, 1, 0); + skipEndOfInsertQueryIfNeeded(*in, table_name); + ++num_rows; + } + + return num_rows; +} + bool MySQLDumpRowInputFormat::readField(IColumn & column, size_t column_idx) { const auto & type = types[column_idx]; @@ -422,7 +441,7 @@ NamesAndTypesList MySQLDumpSchemaReader::readSchema() return IRowSchemaReader::readSchema(); } -DataTypes MySQLDumpSchemaReader::readRowAndGetDataTypes() +std::optional MySQLDumpSchemaReader::readRowAndGetDataTypes() { if (in.eof()) return {}; diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h index c28355054d7..4148b6e79a3 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h @@ -20,6 +20,9 @@ private: bool readField(IColumn & column, size_t column_idx); void skipField(); + bool supportsCountRows() const override { return true; } + size_t countRows(size_t max_block_size) override; + String table_name; DataTypes types; Block::NameMap column_indexes_by_names; @@ -33,7 +36,7 @@ public: private: NamesAndTypesList readSchema() override; - DataTypes readRowAndGetDataTypes() override; + std::optional readRowAndGetDataTypes() override; String table_name; }; diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index f8c9a39eedf..65ea87479a3 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -66,7 +66,7 @@ private: std::unique_ptr reader; Block header; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; }; class NativeOutputFormat final : public IOutputFormat diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp new file mode 100644 index 00000000000..1b6cde11be7 --- /dev/null +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -0,0 +1,1023 @@ +#include "NativeORCBlockInputFormat.h" + +#if USE_ORC +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include "ArrowBufferedStreams.h" + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int UNKNOWN_TYPE; + extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; + extern const int THERE_IS_NO_COLUMN; + extern const int INCORRECT_DATA; + extern const int ARGUMENT_OUT_OF_BOUND; +} + +ORCInputStream::ORCInputStream(SeekableReadBuffer & in_, size_t file_size_) : in(in_), file_size(file_size_) +{ +} + +uint64_t ORCInputStream::getLength() const +{ + return file_size; +} + +uint64_t ORCInputStream::getNaturalReadSize() const +{ + return 128 * 1024; +} + +void ORCInputStream::read(void * buf, uint64_t length, uint64_t offset) +{ + if (offset != static_cast(in.getPosition())) + in.seek(offset, SEEK_SET); + + in.readStrict(reinterpret_cast(buf), length); +} + +std::unique_ptr asORCInputStream(ReadBuffer & in, const FormatSettings & settings, std::atomic & is_cancelled) +{ + bool has_file_size = isBufferWithFileSize(in); + auto * seekable_in = dynamic_cast(&in); + + if (has_file_size && seekable_in && settings.seekable_read && seekable_in->checkIfActuallySeekable()) + return std::make_unique(*seekable_in, getFileSizeFromReadBuffer(in)); + + /// Fallback to loading the entire file in memory + return asORCInputStreamLoadIntoMemory(in, is_cancelled); +} + +std::unique_ptr asORCInputStreamLoadIntoMemory(ReadBuffer & in, std::atomic & is_cancelled) +{ + size_t magic_size = strlen(ORC_MAGIC_BYTES); + std::string file_data(magic_size, '\0'); + + /// Avoid loading the whole file if it doesn't seem to even be in the correct format. + size_t bytes_read = in.read(file_data.data(), magic_size); + if (bytes_read < magic_size || file_data != ORC_MAGIC_BYTES) + throw Exception(ErrorCodes::INCORRECT_DATA, "Not an ORC file"); + + WriteBufferFromString file_buffer(file_data, AppendModeTag{}); + copyData(in, file_buffer, is_cancelled); + file_buffer.finalize(); + + size_t file_size = file_data.size(); + return std::make_unique(std::move(file_data), file_size); +} + +static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_with_unsupported_types, bool & skipped) +{ + assert(orc_type != nullptr); + + const int subtype_count = static_cast(orc_type->getSubtypeCount()); + switch (orc_type->getKind()) + { + case orc::TypeKind::BOOLEAN: + return DataTypeFactory::instance().get("Bool"); + case orc::TypeKind::BYTE: + return std::make_shared(); + case orc::TypeKind::SHORT: + return std::make_shared(); + case orc::TypeKind::INT: + return std::make_shared(); + case orc::TypeKind::LONG: + return std::make_shared(); + case orc::TypeKind::FLOAT: + return std::make_shared(); + case orc::TypeKind::DOUBLE: + return std::make_shared(); + case orc::TypeKind::DATE: + return std::make_shared(); + case orc::TypeKind::TIMESTAMP: + return std::make_shared(9); + case orc::TypeKind::VARCHAR: + case orc::TypeKind::BINARY: + case orc::TypeKind::STRING: + return std::make_shared(); + case orc::TypeKind::CHAR: + return std::make_shared(orc_type->getMaximumLength()); + case orc::TypeKind::DECIMAL: { + UInt64 precision = orc_type->getPrecision(); + UInt64 scale = orc_type->getScale(); + if (precision == 0) + { + // In HIVE 0.11/0.12 precision is set as 0, but means max precision + return createDecimal(38, 6); + } + else + return createDecimal(precision, scale); + } + case orc::TypeKind::LIST: { + if (subtype_count != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Orc List type {}", orc_type->toString()); + + DataTypePtr nested_type = parseORCType(orc_type->getSubtype(0), skip_columns_with_unsupported_types, skipped); + if (skipped) + return {}; + + return std::make_shared(nested_type); + } + case orc::TypeKind::MAP: { + if (subtype_count != 2) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Orc Map type {}", orc_type->toString()); + + DataTypePtr key_type = parseORCType(orc_type->getSubtype(0), skip_columns_with_unsupported_types, skipped); + if (skipped) + return {}; + + DataTypePtr value_type = parseORCType(orc_type->getSubtype(1), skip_columns_with_unsupported_types, skipped); + if (skipped) + return {}; + + return std::make_shared(key_type, value_type); + } + case orc::TypeKind::STRUCT: { + DataTypes nested_types; + Strings nested_names; + nested_types.reserve(subtype_count); + nested_names.reserve(subtype_count); + + for (size_t i = 0; i < orc_type->getSubtypeCount(); ++i) + { + auto parsed_type = parseORCType(orc_type->getSubtype(i), skip_columns_with_unsupported_types, skipped); + if (skipped) + return {}; + + nested_types.push_back(parsed_type); + nested_names.push_back(orc_type->getFieldName(i)); + } + return std::make_shared(nested_types, nested_names); + } + default: { + if (skip_columns_with_unsupported_types) + { + skipped = true; + return {}; + } + + throw Exception( + ErrorCodes::UNKNOWN_TYPE, + "Unsupported ORC type '{}'." + "If you want to skip columns with unsupported types, " + "you can enable setting input_format_orc_skip_columns_with_unsupported_types_in_schema_inference", + orc_type->toString()); + } + } +} + + +static void getFileReaderAndSchema( + ReadBuffer & in, + std::unique_ptr & file_reader, + Block & header, + const FormatSettings & format_settings, + std::atomic & is_stopped) +{ + if (is_stopped) + return; + + orc::ReaderOptions options; + auto input_stream = asORCInputStream(in, format_settings, is_stopped); + file_reader = orc::createReader(std::move(input_stream), options); + const auto & schema = file_reader->getType(); + + for (size_t i = 0; i < schema.getSubtypeCount(); ++i) + { + const std::string & name = schema.getFieldName(i); + const orc::Type * orc_type = schema.getSubtype(i); + + bool skipped = false; + DataTypePtr type = parseORCType(orc_type, format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference, skipped); + if (!skipped) + header.insert(ColumnWithTypeAndName{type, name}); + } +} + +NativeORCBlockInputFormat::NativeORCBlockInputFormat(ReadBuffer & in_, Block header_, const FormatSettings & format_settings_) + : IInputFormat(std::move(header_), &in_), format_settings(format_settings_), skip_stripes(format_settings.orc.skip_stripes) +{ +} + +void NativeORCBlockInputFormat::prepareFileReader() +{ + Block schema; + getFileReaderAndSchema(*in, file_reader, schema, format_settings, is_stopped); + if (is_stopped) + return; + + total_stripes = static_cast(file_reader->getNumberOfStripes()); + current_stripe = -1; + + orc_column_to_ch_column = std::make_unique( + getPort().getHeader(), + format_settings.orc.allow_missing_columns, + format_settings.null_as_default, + format_settings.orc.case_insensitive_column_matching); + + const bool ignore_case = format_settings.orc.case_insensitive_column_matching; + std::unordered_set nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); + + for (size_t i = 0; i < schema.columns(); ++i) + { + const auto & name = schema.getByPosition(i).name; + if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name)) + include_indices.push_back(static_cast(i)); + } +} + +bool NativeORCBlockInputFormat::prepareStripeReader() +{ + assert(file_reader); + + ++current_stripe; + for (; current_stripe < total_stripes && skip_stripes.contains(current_stripe); ++current_stripe) + ; + + /// No more stripes to read + if (current_stripe >= total_stripes) + return false; + + current_stripe_info = file_reader->getStripe(current_stripe); + if (!current_stripe_info->getNumberOfRows()) + throw Exception(ErrorCodes::INCORRECT_DATA, "ORC stripe {} has no rows", current_stripe); + + orc::RowReaderOptions row_reader_options; + row_reader_options.include(include_indices); + row_reader_options.range(current_stripe_info->getOffset(), current_stripe_info->getLength()); + stripe_reader = file_reader->createRowReader(row_reader_options); + + if (!batch) + batch = stripe_reader->createRowBatch(format_settings.orc.row_batch_size); + + return true; +} + +Chunk NativeORCBlockInputFormat::generate() +{ + block_missing_values.clear(); + + if (!file_reader) + prepareFileReader(); + + if (need_only_count) + { + ++current_stripe; + for (; current_stripe < total_stripes && skip_stripes.contains(current_stripe); ++current_stripe) + ; + + if (current_stripe >= total_stripes) + return {}; + + return getChunkForCount(file_reader->getStripe(current_stripe)->getNumberOfRows()); + } + + if (!stripe_reader) + { + if (!prepareStripeReader()) + return {}; + } + + if (is_stopped) + return {}; + + while (true) + { + bool ok = stripe_reader->next(*batch); + if (ok) + break; + + /// No more rows to read in current stripe, continue to prepare reading next stripe + if (!prepareStripeReader()) + return {}; + } + + Chunk res; + size_t num_rows = batch->numElements; + const auto & schema = stripe_reader->getSelectedType(); + orc_column_to_ch_column->orcTableToCHChunk(res, &schema, batch.get(), num_rows, &block_missing_values); + + approx_bytes_read_for_chunk = num_rows * current_stripe_info->getLength() / current_stripe_info->getNumberOfRows(); + return res; +} + +void NativeORCBlockInputFormat::resetParser() +{ + IInputFormat::resetParser(); + + file_reader.reset(); + stripe_reader.reset(); + include_indices.clear(); + batch.reset(); + block_missing_values.clear(); +} + +const BlockMissingValues & NativeORCBlockInputFormat::getMissingValues() const +{ + return block_missing_values; +} + +NativeORCSchemaReader::NativeORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) + : ISchemaReader(in_), format_settings(format_settings_) +{ +} + +NamesAndTypesList NativeORCSchemaReader::readSchema() +{ + Block header; + std::unique_ptr file_reader; + std::atomic is_stopped = 0; + getFileReaderAndSchema(in, file_reader, header, format_settings, is_stopped); + + if (format_settings.schema_inference_make_columns_nullable) + return getNamesAndRecursivelyNullableTypes(header); + return header.getNamesAndTypesList(); +} + + +ORCColumnToCHColumn::ORCColumnToCHColumn( + const Block & header_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_) + : header(header_) + , allow_missing_columns(allow_missing_columns_) + , null_as_default(null_as_default_) + , case_insensitive_matching(case_insensitive_matching_) +{ +} + +void ORCColumnToCHColumn::orcTableToCHChunk( + Chunk & res, const orc::Type * schema, const orc::ColumnVectorBatch * table, size_t num_rows, BlockMissingValues * block_missing_values) +{ + const auto * struct_batch = dynamic_cast(table); + if (!struct_batch) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ORC table must be StructVectorBatch but is {}", struct_batch->toString()); + + if (schema->getSubtypeCount() != struct_batch->fields.size()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "ORC table has {} fields but schema has {}", struct_batch->fields.size(), schema->getSubtypeCount()); + + size_t field_num = struct_batch->fields.size(); + NameToColumnPtr name_to_column_ptr; + for (size_t i = 0; i < field_num; ++i) + { + auto name = schema->getFieldName(i); + const auto * field = struct_batch->fields[i]; + if (!field) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ORC table field {} is null", name); + + if (case_insensitive_matching) + boost::to_lower(name); + + name_to_column_ptr[std::move(name)] = {field, schema->getSubtype(i)}; + } + + orcColumnsToCHChunk(res, name_to_column_ptr, num_rows, block_missing_values); +} + +/// Creates a null bytemap from ORC's not-null bytemap +static ColumnPtr readByteMapFromORCColumn(const orc::ColumnVectorBatch * orc_column) +{ + if (!orc_column->hasNulls) + return ColumnUInt8::create(orc_column->numElements, 0); + + auto nullmap_column = ColumnUInt8::create(); + PaddedPODArray & bytemap_data = assert_cast &>(*nullmap_column).getData(); + bytemap_data.resize(orc_column->numElements); + + for (size_t i = 0; i < orc_column->numElements; ++i) + bytemap_data[i] = 1 - orc_column->notNull[i]; + return nullmap_column; +} + + +static const orc::ColumnVectorBatch * getNestedORCColumn(const orc::ListVectorBatch * orc_column) +{ + return orc_column->elements.get(); +} + +template +static ColumnPtr readOffsetsFromORCListColumn(const BatchType * orc_column) +{ + auto offsets_column = ColumnUInt64::create(); + ColumnArray::Offsets & offsets_data = assert_cast &>(*offsets_column).getData(); + offsets_data.reserve(orc_column->numElements); + + for (size_t i = 0; i < orc_column->numElements; ++i) + offsets_data.push_back(orc_column->offsets[i + 1]); + + return offsets_column; +} + +static ColumnWithTypeAndName +readColumnWithBooleanData(const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name) +{ + const auto * orc_bool_column = dynamic_cast(orc_column); + auto internal_type = DataTypeFactory::instance().get("Bool"); + auto internal_column = internal_type->createColumn(); + auto & column_data = assert_cast &>(*internal_column).getData(); + column_data.reserve(orc_bool_column->numElements); + + for (size_t i = 0; i < orc_bool_column->numElements; ++i) + column_data.push_back(static_cast(orc_bool_column->data[i])); + + return {std::move(internal_column), internal_type, column_name}; +} + +/// Inserts numeric data right into internal column data to reduce an overhead +template > +static ColumnWithTypeAndName +readColumnWithNumericData(const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name) +{ + auto internal_type = std::make_shared>(); + auto internal_column = internal_type->createColumn(); + auto & column_data = static_cast(*internal_column).getData(); + column_data.reserve(orc_column->numElements); + + const auto * orc_int_column = dynamic_cast(orc_column); + column_data.insert_assume_reserved(orc_int_column->data.data(), orc_int_column->data.data() + orc_int_column->numElements); + + return {std::move(internal_column), std::move(internal_type), column_name}; +} + +template > +static ColumnWithTypeAndName +readColumnWithNumericDataCast(const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name) +{ + auto internal_type = std::make_shared>(); + auto internal_column = internal_type->createColumn(); + auto & column_data = static_cast(*internal_column).getData(); + column_data.reserve(orc_column->numElements); + + const auto * orc_int_column = dynamic_cast(orc_column); + for (size_t i = 0; i < orc_int_column->numElements; ++i) + column_data.push_back(static_cast(orc_int_column->data[i])); + + return {std::move(internal_column), std::move(internal_type), column_name}; +} + +static ColumnWithTypeAndName +readColumnWithStringData(const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name) +{ + auto internal_type = std::make_shared(); + auto internal_column = internal_type->createColumn(); + PaddedPODArray & column_chars_t = assert_cast(*internal_column).getChars(); + PaddedPODArray & column_offsets = assert_cast(*internal_column).getOffsets(); + + const auto * orc_str_column = dynamic_cast(orc_column); + size_t reserver_size = 0; + for (size_t i = 0; i < orc_str_column->numElements; ++i) + reserver_size += orc_str_column->length[i] + 1; + column_chars_t.reserve(reserver_size); + column_offsets.reserve(orc_str_column->numElements); + + size_t curr_offset = 0; + for (size_t i = 0; i < orc_str_column->numElements; ++i) + { + const auto * buf = orc_str_column->data[i]; + if (buf) + { + size_t buf_size = orc_str_column->length[i]; + column_chars_t.insert_assume_reserved(buf, buf + buf_size); + curr_offset += buf_size; + } + + column_chars_t.push_back(0); + ++curr_offset; + + column_offsets.push_back(curr_offset); + } + return {std::move(internal_column), std::move(internal_type), column_name}; +} + +static ColumnWithTypeAndName +readColumnWithFixedStringData(const orc::ColumnVectorBatch * orc_column, const orc::Type * orc_type, const String & column_name) +{ + size_t fixed_len = orc_type->getMaximumLength(); + auto internal_type = std::make_shared(fixed_len); + auto internal_column = internal_type->createColumn(); + PaddedPODArray & column_chars_t = assert_cast(*internal_column).getChars(); + column_chars_t.reserve(orc_column->numElements * fixed_len); + + const auto * orc_str_column = dynamic_cast(orc_column); + for (size_t i = 0; i < orc_str_column->numElements; ++i) + { + if (orc_str_column->data[i]) + column_chars_t.insert_assume_reserved(orc_str_column->data[i], orc_str_column->data[i] + orc_str_column->length[i]); + else + column_chars_t.resize_fill(column_chars_t.size() + fixed_len); + } + + return {std::move(internal_column), std::move(internal_type), column_name}; +} + + +template > +static ColumnWithTypeAndName readColumnWithDecimalDataCast( + const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name, DataTypePtr internal_type) +{ + using NativeType = typename DecimalType::NativeType; + static_assert(std::is_same_v || std::is_same_v); + + auto internal_column = internal_type->createColumn(); + auto & column_data = static_cast(*internal_column).getData(); + column_data.reserve(orc_column->numElements); + + const auto * orc_decimal_column = dynamic_cast(orc_column); + for (size_t i = 0; i < orc_decimal_column->numElements; ++i) + { + DecimalType decimal_value; + if constexpr (std::is_same_v) + { + Int128 int128_value; + int128_value.items[0] = orc_decimal_column->values[i].getLowBits(); + int128_value.items[1] = orc_decimal_column->values[i].getHighBits(); + decimal_value.value = static_cast(int128_value); + } + else + decimal_value.value = static_cast(orc_decimal_column->values[i]); + + column_data.push_back(std::move(decimal_value)); + } + + return {std::move(internal_column), internal_type, column_name}; +} + +static ColumnWithTypeAndName +readIPv6ColumnFromBinaryData(const orc::ColumnVectorBatch * orc_column, const orc::Type * orc_type, const String & column_name) +{ + const auto * orc_str_column = dynamic_cast(orc_column); + + for (size_t i = 0; i < orc_str_column->numElements; ++i) + { + /// If at least one value size is not 16 bytes, fallback to reading String column and further cast to IPv6. + if (orc_str_column->data[i] && orc_str_column->length[i] != sizeof(IPv6)) + return readColumnWithStringData(orc_column, orc_type, column_name); + } + + auto internal_type = std::make_shared(); + auto internal_column = internal_type->createColumn(); + auto & ipv6_column = assert_cast(*internal_column); + ipv6_column.reserve(orc_str_column->numElements); + + for (size_t i = 0; i < orc_str_column->numElements; ++i) + { + if (!orc_str_column->data[i]) [[unlikely]] + ipv6_column.insertDefault(); + else + ipv6_column.insertData(orc_str_column->data[i], orc_str_column->length[i]); + } + + return {std::move(internal_column), std::move(internal_type), column_name}; +} + +static ColumnWithTypeAndName +readIPv4ColumnWithInt32Data(const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name) +{ + const auto * orc_int_column = dynamic_cast(orc_column); + + auto internal_type = std::make_shared(); + auto internal_column = internal_type->createColumn(); + auto & column_data = assert_cast(*internal_column).getData(); + column_data.reserve(orc_int_column->numElements); + + for (size_t i = 0; i < orc_int_column->numElements; ++i) + column_data.push_back(static_cast(orc_int_column->data[i])); + + return {std::move(internal_column), std::move(internal_type), column_name}; +} + +template +static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData( + const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name, const DataTypePtr & column_type) +{ + const auto * orc_str_column = dynamic_cast(orc_column); + + auto internal_column = column_type->createColumn(); + auto & integer_column = assert_cast(*internal_column); + integer_column.reserve(orc_str_column->numElements); + + for (size_t i = 0; i < orc_str_column->numElements; ++i) + { + if (!orc_str_column->data[i]) [[unlikely]] + integer_column.insertDefault(); + else + { + if (sizeof(typename ColumnType::ValueType) != orc_str_column->length[i]) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "ValueType size {} of column {} is not equal to size of binary data {}", + sizeof(typename ColumnType::ValueType), + integer_column.getName(), + orc_str_column->length[i]); + + integer_column.insertData(orc_str_column->data[i], orc_str_column->length[i]); + } + } + return {std::move(internal_column), column_type, column_name}; +} + +static ColumnWithTypeAndName readColumnWithDateData( + const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name, const DataTypePtr & type_hint) +{ + DataTypePtr internal_type; + bool check_date_range = false; + /// Make result type Date32 when requested type is actually Date32 or when we use schema inference + if (!type_hint || (type_hint && isDate32(*type_hint))) + { + internal_type = std::make_shared(); + check_date_range = true; + } + else + { + internal_type = std::make_shared(); + } + + const auto * orc_int_column = dynamic_cast(orc_column); + auto internal_column = internal_type->createColumn(); + PaddedPODArray & column_data = assert_cast &>(*internal_column).getData(); + column_data.reserve(orc_int_column->numElements); + + for (size_t i = 0; i < orc_int_column->numElements; ++i) + { + Int32 days_num = static_cast(orc_int_column->data[i]); + if (check_date_range && (days_num > DATE_LUT_MAX_EXTEND_DAY_NUM || days_num < -DAYNUM_OFFSET_EPOCH)) + throw Exception( + ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, + "Input value {} of a column \"{}\" exceeds the range of type Date32", + days_num, + column_name); + + column_data.push_back(days_num); + } + + return {std::move(internal_column), internal_type, column_name}; +} + +static ColumnWithTypeAndName +readColumnWithTimestampData(const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name) +{ + const auto * orc_ts_column = dynamic_cast(orc_column); + + auto internal_type = std::make_shared(9); + auto internal_column = internal_type->createColumn(); + auto & column_data = assert_cast(*internal_column).getData(); + column_data.reserve(orc_ts_column->numElements); + + constexpr Int64 multiplier = 1e9L; + for (size_t i = 0; i < orc_ts_column->numElements; ++i) + { + Decimal64 decimal64; + decimal64.value = orc_ts_column->data[i] * multiplier + orc_ts_column->nanoseconds[i]; + column_data.emplace_back(std::move(decimal64)); + } + return {std::move(internal_column), std::move(internal_type), column_name}; +} + +static ColumnWithTypeAndName readColumnFromORCColumn( + const orc::ColumnVectorBatch * orc_column, + const orc::Type * orc_type, + const std::string & column_name, + bool inside_nullable, + DataTypePtr type_hint = nullptr) +{ + bool skipped = false; + + if (!inside_nullable && (orc_column->hasNulls || (type_hint && type_hint->isNullable())) + && (orc_type->getKind() != orc::LIST && orc_type->getKind() != orc::MAP && orc_type->getKind() != orc::STRUCT)) + { + DataTypePtr nested_type_hint; + if (type_hint) + nested_type_hint = removeNullable(type_hint); + + auto nested_column = readColumnFromORCColumn(orc_column, orc_type, column_name, true, nested_type_hint); + + auto nullmap_column = readByteMapFromORCColumn(orc_column); + auto nullable_type = std::make_shared(std::move(nested_column.type)); + auto nullable_column = ColumnNullable::create(nested_column.column, nullmap_column); + return {std::move(nullable_column), std::move(nullable_type), column_name}; + } + + switch (orc_type->getKind()) + { + case orc::STRING: + case orc::BINARY: + case orc::VARCHAR: { + if (type_hint) + { + switch (type_hint->getTypeId()) + { + case TypeIndex::IPv6: + return readIPv6ColumnFromBinaryData(orc_column, orc_type, column_name); + /// ORC format outputs big integers as binary column, because there is no fixed binary in ORC. + case TypeIndex::Int128: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + case TypeIndex::UInt128: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + case TypeIndex::Int256: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + case TypeIndex::UInt256: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + /// ORC doesn't support Decimal256 as separate type. We read and write it as binary data. + case TypeIndex::Decimal256: + return readColumnWithBigNumberFromBinaryData>( + orc_column, orc_type, column_name, type_hint); + default:; + } + } + return readColumnWithStringData(orc_column, orc_type, column_name); + } + case orc::CHAR: { + if (type_hint) + { + switch (type_hint->getTypeId()) + { + case TypeIndex::Int128: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + case TypeIndex::UInt128: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + case TypeIndex::Int256: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + case TypeIndex::UInt256: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + default:; + } + } + return readColumnWithFixedStringData(orc_column, orc_type, column_name); + } + case orc::BOOLEAN: + return readColumnWithBooleanData(orc_column, orc_type, column_name); + case orc::BYTE: + return readColumnWithNumericDataCast(orc_column, orc_type, column_name); + case orc::SHORT: + return readColumnWithNumericDataCast(orc_column, orc_type, column_name); + case orc::INT: { + /// ORC format doesn't have unsigned integers and we output IPv4 as Int32. + /// We should allow to read it back from Int32. + if (type_hint && isIPv4(type_hint)) + return readIPv4ColumnWithInt32Data(orc_column, orc_type, column_name); + return readColumnWithNumericDataCast(orc_column, orc_type, column_name); + } + case orc::LONG: + return readColumnWithNumericData(orc_column, orc_type, column_name); + case orc::FLOAT: + return readColumnWithNumericDataCast(orc_column, orc_type, column_name); + case orc::DOUBLE: + return readColumnWithNumericData(orc_column, orc_type, column_name); + case orc::DATE: + return readColumnWithDateData(orc_column, orc_type, column_name, type_hint); + case orc::TIMESTAMP: + return readColumnWithTimestampData(orc_column, orc_type, column_name); + case orc::DECIMAL: { + auto interal_type = parseORCType(orc_type, false, skipped); + + auto precision = orc_type->getPrecision(); + if (precision == 0) + precision = 38; + + if (precision <= DecimalUtils::max_precision) + return readColumnWithDecimalDataCast(orc_column, orc_type, column_name, interal_type); + else if (precision <= DecimalUtils::max_precision) + return readColumnWithDecimalDataCast(orc_column, orc_type, column_name, interal_type); + else if (precision <= DecimalUtils::max_precision) + return readColumnWithDecimalDataCast( + orc_column, orc_type, column_name, interal_type); + else + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Decimal precision {} in ORC type {} is out of bound", + precision, + orc_type->toString()); + } + case orc::MAP: { + DataTypePtr key_type_hint; + DataTypePtr value_type_hint; + if (type_hint) + { + const auto * map_type_hint = typeid_cast(type_hint.get()); + if (map_type_hint) + { + key_type_hint = map_type_hint->getKeyType(); + value_type_hint = map_type_hint->getValueType(); + } + } + + const auto * orc_map_column = dynamic_cast(orc_column); + const auto * orc_key_column = orc_map_column->keys.get(); + const auto * orc_value_column = orc_map_column->elements.get(); + const auto * orc_key_type = orc_type->getSubtype(0); + const auto * orc_value_type = orc_type->getSubtype(1); + + auto key_column = readColumnFromORCColumn(orc_key_column, orc_key_type, "key", false, key_type_hint); + if (key_type_hint && !key_type_hint->equals(*key_column.type)) + { + /// Cast key column to target type, because it can happen + /// that parsed type cannot be ClickHouse Map key type. + key_column.column = castColumn(key_column, key_type_hint); + key_column.type = key_type_hint; + } + + auto value_column = readColumnFromORCColumn(orc_value_column, orc_value_type, "value", false, value_type_hint); + if (skipped) + return {}; + + auto offsets_column = readOffsetsFromORCListColumn(orc_map_column); + auto map_column = ColumnMap::create(key_column.column, value_column.column, offsets_column); + auto map_type = std::make_shared(key_column.type, value_column.type); + return {std::move(map_column), std::move(map_type), column_name}; + } + case orc::LIST: { + DataTypePtr nested_type_hint; + if (type_hint) + { + const auto * array_type_hint = typeid_cast(type_hint.get()); + if (array_type_hint) + nested_type_hint = array_type_hint->getNestedType(); + } + + const auto * orc_list_column = dynamic_cast(orc_column); + const auto * orc_nested_column = getNestedORCColumn(orc_list_column); + const auto * orc_nested_type = orc_type->getSubtype(0); + auto nested_column = readColumnFromORCColumn(orc_nested_column, orc_nested_type, column_name, false, nested_type_hint); + + auto offsets_column = readOffsetsFromORCListColumn(orc_list_column); + auto array_column = ColumnArray::create(nested_column.column, offsets_column); + auto array_type = std::make_shared(nested_column.type); + return {std::move(array_column), std::move(array_type), column_name}; + } + case orc::STRUCT: { + Columns tuple_elements; + DataTypes tuple_types; + std::vector tuple_names; + const auto * tuple_type_hint = type_hint ? typeid_cast(type_hint.get()) : nullptr; + + const auto * orc_struct_column = dynamic_cast(orc_column); + for (size_t i = 0; i < orc_type->getSubtypeCount(); ++i) + { + const auto & field_name = orc_type->getFieldName(i); + + DataTypePtr nested_type_hint; + if (tuple_type_hint) + { + if (tuple_type_hint->haveExplicitNames()) + { + auto pos = tuple_type_hint->tryGetPositionByName(field_name); + if (pos) + nested_type_hint = tuple_type_hint->getElement(*pos); + } + else if (size_t(i) < tuple_type_hint->getElements().size()) + nested_type_hint = tuple_type_hint->getElement(i); + } + + const auto * nested_orc_column = orc_struct_column->fields[i]; + const auto * nested_orc_type = orc_type->getSubtype(i); + auto element = readColumnFromORCColumn(nested_orc_column, nested_orc_type, field_name, false, nested_type_hint); + + tuple_elements.emplace_back(std::move(element.column)); + tuple_types.emplace_back(std::move(element.type)); + tuple_names.emplace_back(std::move(element.name)); + } + + auto tuple_column = ColumnTuple::create(std::move(tuple_elements)); + auto tuple_type = std::make_shared(std::move(tuple_types), std::move(tuple_names)); + return {std::move(tuple_column), std::move(tuple_type), column_name}; + } + default: + throw Exception( + ErrorCodes::UNKNOWN_TYPE, "Unsupported ORC type {} while reading column {}.", orc_type->toString(), column_name); + } +} + +void ORCColumnToCHColumn::orcColumnsToCHChunk( + Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows, BlockMissingValues * block_missing_values) +{ + Columns columns_list; + columns_list.reserve(header.columns()); + std::unordered_map>> nested_tables; + for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i) + { + const ColumnWithTypeAndName & header_column = header.getByPosition(column_i); + + auto search_column_name = header_column.name; + if (case_insensitive_matching) + boost::to_lower(search_column_name); + + ColumnWithTypeAndName column; + if (!name_to_column_ptr.contains(search_column_name)) + { + bool read_from_nested = false; + + /// Check if it's a column from nested table. + String nested_table_name = Nested::extractTableName(header_column.name); + String search_nested_table_name = nested_table_name; + if (case_insensitive_matching) + boost::to_lower(search_nested_table_name); + if (name_to_column_ptr.contains(search_nested_table_name)) + { + if (!nested_tables.contains(search_nested_table_name)) + { + NamesAndTypesList nested_columns; + for (const auto & name_and_type : header.getNamesAndTypesList()) + { + if (name_and_type.name.starts_with(nested_table_name + ".")) + nested_columns.push_back(name_and_type); + } + auto nested_table_type = Nested::collect(nested_columns).front().type; + + auto orc_column_with_type = name_to_column_ptr[search_nested_table_name]; + ColumnsWithTypeAndName cols = {readColumnFromORCColumn( + orc_column_with_type.first, orc_column_with_type.second, nested_table_name, false, nested_table_type)}; + BlockPtr block_ptr = std::make_shared(cols); + auto column_extractor = std::make_shared(*block_ptr, case_insensitive_matching); + nested_tables[search_nested_table_name] = {block_ptr, column_extractor}; + } + + auto nested_column = nested_tables[search_nested_table_name].second->extractColumn(search_column_name); + if (nested_column) + { + column = *nested_column; + if (case_insensitive_matching) + column.name = header_column.name; + read_from_nested = true; + } + } + + if (!read_from_nested) + { + if (!allow_missing_columns) + throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name}; + else + { + column.name = header_column.name; + column.type = header_column.type; + column.column = header_column.column->cloneResized(num_rows); + columns_list.push_back(std::move(column.column)); + if (block_missing_values) + block_missing_values->setBits(column_i, num_rows); + continue; + } + } + } + else + { + auto orc_column_with_type = name_to_column_ptr[search_column_name]; + column = readColumnFromORCColumn( + orc_column_with_type.first, orc_column_with_type.second, header_column.name, false, header_column.type); + } + + if (null_as_default) + insertNullAsDefaultIfNeeded(column, header_column, column_i, block_missing_values); + + try + { + column.column = castColumn(column, header_column.type); + } + catch (Exception & e) + { + e.addMessage(fmt::format( + "while converting column {} from type {} to type {}", + backQuote(header_column.name), + column.type->getName(), + header_column.type->getName())); + throw; + } + + column.type = header_column.type; + columns_list.push_back(std::move(column.column)); + } + + res.setColumns(columns_list, num_rows); +} + +} + +#endif diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.h b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.h new file mode 100644 index 00000000000..3326999f0aa --- /dev/null +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.h @@ -0,0 +1,129 @@ +#pragma once +#include "config.h" + +#if USE_ORC +# include +# include +# include +# include +# include + +namespace DB +{ + +class ORCInputStream : public orc::InputStream +{ +public: + ORCInputStream(SeekableReadBuffer & in_, size_t file_size_); + + uint64_t getLength() const override; + uint64_t getNaturalReadSize() const override; + void read(void * buf, uint64_t length, uint64_t offset) override; + const std::string & getName() const override { return name; } + +protected: + SeekableReadBuffer & in; + size_t file_size; + std::string name = "ORCInputStream"; +}; + +class ORCInputStreamFromString : public ReadBufferFromOwnString, public ORCInputStream +{ +public: + template + ORCInputStreamFromString(S && s_, size_t file_size_) + : ReadBufferFromOwnString(std::forward(s_)), ORCInputStream(dynamic_cast(*this), file_size_) + { + } +}; + +std::unique_ptr asORCInputStream(ReadBuffer & in, const FormatSettings & settings, std::atomic & is_cancelled); + +// Reads the whole file into a memory buffer, owned by the returned RandomAccessFile. +std::unique_ptr asORCInputStreamLoadIntoMemory(ReadBuffer & in, std::atomic & is_cancelled); + + +class ORCColumnToCHColumn; +class NativeORCBlockInputFormat : public IInputFormat +{ +public: + NativeORCBlockInputFormat(ReadBuffer & in_, Block header_, const FormatSettings & format_settings_); + + String getName() const override { return "ORCBlockInputFormat"; } + + void resetParser() override; + + const BlockMissingValues & getMissingValues() const override; + + size_t getApproxBytesReadForChunk() const override { return approx_bytes_read_for_chunk; } + +protected: + Chunk generate() override; + + void onCancel() override { is_stopped = 1; } + +private: + void prepareFileReader(); + bool prepareStripeReader(); + + std::unique_ptr file_reader; + std::unique_ptr stripe_reader; + std::unique_ptr orc_column_to_ch_column; + std::unique_ptr batch; + + // indices of columns to read from ORC file + std::list include_indices; + + BlockMissingValues block_missing_values; + size_t approx_bytes_read_for_chunk = 0; + + const FormatSettings format_settings; + const std::unordered_set & skip_stripes; + + int total_stripes = 0; + int current_stripe = -1; + std::unique_ptr current_stripe_info; + + std::atomic is_stopped{0}; +}; + +class NativeORCSchemaReader : public ISchemaReader +{ +public: + NativeORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_); + + NamesAndTypesList readSchema() override; + +private: + const FormatSettings format_settings; +}; + +class ORCColumnToCHColumn +{ +public: + using ORCColumnPtr = const orc::ColumnVectorBatch *; + using ORCTypePtr = const orc::Type *; + using ORCColumnWithType = std::pair; + using NameToColumnPtr = std::unordered_map; + + ORCColumnToCHColumn(const Block & header_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_ = false); + + void orcTableToCHChunk( + Chunk & res, + const orc::Type * schema, + const orc::ColumnVectorBatch * table, + size_t num_rows, + BlockMissingValues * block_missing_values = nullptr); + + void orcColumnsToCHChunk( + Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows, BlockMissingValues * block_missing_values = nullptr); + +private: + const Block & header; + /// If false, throw exception if some columns in header not exists in arrow table. + bool allow_missing_columns; + bool null_as_default; + bool case_insensitive_matching; +}; +} +#endif diff --git a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp index 43294355f2f..7c0428834e0 100644 --- a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp @@ -3,9 +3,6 @@ #include #include #include - - -#include #include @@ -19,7 +16,7 @@ ODBCDriver2BlockOutputFormat::ODBCDriver2BlockOutputFormat( static void writeODBCString(WriteBuffer & out, const std::string & str) { - writeIntBinary(Int32(str.size()), out); + writeBinaryLittleEndian(Int32(str.size()), out); out.write(str.data(), str.size()); } @@ -33,7 +30,7 @@ void ODBCDriver2BlockOutputFormat::writeRow(const Columns & columns, size_t row_ if (column->isNullAt(row_idx)) { - writeIntBinary(Int32(-1), out); + writeBinaryLittleEndian(Int32(-1), out); } else { @@ -72,11 +69,11 @@ void ODBCDriver2BlockOutputFormat::writePrefix() const size_t columns = header.columns(); /// Number of header rows. - writeIntBinary(Int32(2), out); + writeBinaryLittleEndian(Int32(2), out); /// Names of columns. /// Number of columns + 1 for first name column. - writeIntBinary(Int32(columns + 1), out); + writeBinaryLittleEndian(Int32(columns + 1), out); writeODBCString(out, "name"); for (size_t i = 0; i < columns; ++i) { @@ -85,7 +82,7 @@ void ODBCDriver2BlockOutputFormat::writePrefix() } /// Types of columns. - writeIntBinary(Int32(columns + 1), out); + writeBinaryLittleEndian(Int32(columns + 1), out); writeODBCString(out, "type"); for (size_t i = 0; i < columns; ++i) { diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 016f07731d5..c0ac0e2c232 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -1,16 +1,17 @@ #include "ORCBlockInputFormat.h" -#include -#if USE_ORC -#include -#include -#include -#include -#include -#include "ArrowBufferedStreams.h" -#include "ArrowColumnToCHColumn.h" -#include "ArrowFieldIndexUtil.h" -#include +#if USE_ORC +# include +# include +# include +# include +# include +# include +# include +# include "ArrowBufferedStreams.h" +# include "ArrowColumnToCHColumn.h" +# include "ArrowFieldIndexUtil.h" +# include "NativeORCBlockInputFormat.h" namespace DB { @@ -42,6 +43,9 @@ Chunk ORCBlockInputFormat::generate() if (stripe_current >= stripe_total) return {}; + if (need_only_count) + return getChunkForCount(file_reader->GetRawORCReader()->getStripe(stripe_current++)->getNumberOfRows()); + auto batch_result = file_reader->ReadStripe(stripe_current, include_indices); if (!batch_result.ok()) throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Failed to create batch reader: {}", batch_result.status().ToString()); @@ -125,16 +129,12 @@ void ORCBlockInputFormat::prepareReader() arrow_column_to_ch_column = std::make_unique( getPort().getHeader(), "ORC", - format_settings.orc.import_nested, format_settings.orc.allow_missing_columns, format_settings.null_as_default, format_settings.orc.case_insensitive_column_matching); const bool ignore_case = format_settings.orc.case_insensitive_column_matching; - std::unordered_set nested_table_names; - if (format_settings.orc.import_nested) - nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); - + std::unordered_set nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); for (int i = 0; i < schema->num_fields(); ++i) { const auto & name = schema->field(i)->name(); @@ -148,30 +148,45 @@ ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format { } -NamesAndTypesList ORCSchemaReader::readSchema() +void ORCSchemaReader::initializeIfNeeded() { - std::unique_ptr file_reader; - std::shared_ptr schema; + if (file_reader) + return; + std::atomic is_stopped = 0; getFileReaderAndSchema(in, file_reader, schema, format_settings, is_stopped); +} + +NamesAndTypesList ORCSchemaReader::readSchema() +{ + initializeIfNeeded(); auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( *schema, "ORC", format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference); if (format_settings.schema_inference_make_columns_nullable) return getNamesAndRecursivelyNullableTypes(header); - return header.getNamesAndTypesList();} + return header.getNamesAndTypesList(); +} + +std::optional ORCSchemaReader::readNumberOrRows() +{ + initializeIfNeeded(); + return file_reader->NumberOfRows(); +} void registerInputFormatORC(FormatFactory & factory) { factory.registerInputFormat( - "ORC", - [](ReadBuffer &buf, - const Block &sample, - const RowInputFormatParams &, - const FormatSettings & settings) - { - return std::make_shared(buf, sample, settings); - }); - factory.markFormatSupportsSubcolumns("ORC"); + "ORC", + [](ReadBuffer & buf, const Block & sample, const RowInputFormatParams &, const FormatSettings & settings) + { + InputFormatPtr res; + if (settings.orc.use_fast_decoder) + res = std::make_shared(buf, sample, settings); + else + res = std::make_shared(buf, sample, settings); + + return res; + }); factory.markFormatSupportsSubsetOfColumns("ORC"); } @@ -181,7 +196,13 @@ void registerORCSchemaReader(FormatFactory & factory) "ORC", [](ReadBuffer & buf, const FormatSettings & settings) { - return std::make_shared(buf, settings); + SchemaReaderPtr res; + if (settings.orc.use_fast_decoder) + res = std::make_shared(buf, settings); + else + res = std::make_shared(buf, settings); + + return res; } ); diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index 98561e72e61..4d878f85255 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -52,7 +52,7 @@ private: std::vector include_indices; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; const FormatSettings format_settings; const std::unordered_set & skip_stripes; @@ -69,8 +69,13 @@ public: ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_); NamesAndTypesList readSchema() override; + std::optional readNumberOrRows() override; private: + void initializeIfNeeded(); + + std::unique_ptr file_reader; + std::shared_ptr schema; const FormatSettings format_settings; }; diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 9ef6ba51065..827752d9db0 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -432,7 +432,7 @@ void ORCBlockOutputFormat::writeColumn( [scale](UInt64 value){ return (value % UInt64(std::pow(10, scale))) * std::pow(10, 9 - scale); }); break; } - case TypeIndex::Decimal32:; + case TypeIndex::Decimal32: { writeDecimals( orc_column, diff --git a/src/Processors/Formats/Impl/OneFormat.cpp b/src/Processors/Formats/Impl/OneFormat.cpp new file mode 100644 index 00000000000..4a9c8caebf3 --- /dev/null +++ b/src/Processors/Formats/Impl/OneFormat.cpp @@ -0,0 +1,57 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +OneInputFormat::OneInputFormat(const Block & header, ReadBuffer & in_) : IInputFormat(header, &in_) +{ + if (header.columns() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "One input format is only suitable for tables with a single column of type UInt8 but the number of columns is {}", + header.columns()); + + if (!WhichDataType(header.getByPosition(0).type).isUInt8()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "One input format is only suitable for tables with a single column of type String but the column type is {}", + header.getByPosition(0).type->getName()); +} + +Chunk OneInputFormat::generate() +{ + if (done) + return {}; + + done = true; + auto column = ColumnUInt8::create(); + column->insertDefault(); + return Chunk(Columns{std::move(column)}, 1); +} + +void registerInputFormatOne(FormatFactory & factory) +{ + factory.registerInputFormat("One", []( + ReadBuffer & buf, + const Block & sample, + const RowInputFormatParams &, + const FormatSettings &) + { + return std::make_shared(sample, buf); + }); +} + +void registerOneSchemaReader(FormatFactory & factory) +{ + factory.registerExternalSchemaReader("One", [](const FormatSettings &) + { + return std::make_shared(); + }); +} + +} diff --git a/src/Processors/Formats/Impl/OneFormat.h b/src/Processors/Formats/Impl/OneFormat.h new file mode 100644 index 00000000000..f73b2dab66a --- /dev/null +++ b/src/Processors/Formats/Impl/OneFormat.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class OneInputFormat final : public IInputFormat +{ +public: + OneInputFormat(const Block & header, ReadBuffer & in_); + + String getName() const override { return "One"; } + +protected: + Chunk generate() override; + +private: + bool done = false; +}; + +class OneSchemaReader: public IExternalSchemaReader +{ +public: + NamesAndTypesList readSchema() override + { + return {{"dummy", std::make_shared()}}; + } +}; + +} diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h index 4495680f5b2..f61dc3fbc78 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h @@ -202,7 +202,7 @@ private: const size_t max_block_size; BlockMissingValues last_block_missing_values; - size_t last_approx_bytes_read_for_chunk; + size_t last_approx_bytes_read_for_chunk = 0; /// Non-atomic because it is used in one thread. std::optional next_block_in_current_unit; diff --git a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp new file mode 100644 index 00000000000..9b51ca0c295 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp @@ -0,0 +1,640 @@ +#include "Processors/Formats/Impl/Parquet/Write.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/// This file deals with schema conversion and with repetition and definition levels. + +/// Schema conversion is pretty straightforward. + +/// "Repetition and definition levels" are a somewhat tricky way of encoding information about +/// optional fields and lists. +/// +/// If you don't want to learn how these work, feel free to skip the updateRepDefLevels* functions. +/// All you need to know is: +/// * values for nulls are not encoded, so we have to filter nullable columns, +/// * information about all array lengths and nulls is encoded in the arrays `def` and `rep`, +/// which need to be encoded next to the data, +/// * `def` and `rep` arrays can be longer than `primitive_column`, because they include nulls and +/// empty arrays; the values in primitive_column correspond to positions where def[i] == max_def. +/// +/// If you do want to learn it, dremel paper: https://research.google/pubs/pub36632/ +/// Instead of reading the whole paper, try staring at figures 2-3 for a while - it might be enough. +/// (Why does Parquet do all this instead of just storing array lengths and null masks? I'm not +/// really sure.) +/// +/// We calculate the levels recursively, from inner to outer columns. +/// This means scanning the whole array for each Array/Nullable nesting level, which is probably not +/// the most efficient way to do it. But there's usually at most one nesting level, so it's fine. +/// +/// Most of this is moot because ClickHouse doesn't support nullable arrays or tuples right now, so +/// almost none of the tricky cases can happen. We implement it in full generality anyway (mostly +/// because I only learned the previous sentence after writing most of the code). + + +namespace DB::ErrorCodes +{ + extern const int UNKNOWN_TYPE; + extern const int TOO_DEEP_RECURSION; // I'm 14 and this is deep + extern const int UNKNOWN_COMPRESSION_METHOD; + extern const int LOGICAL_ERROR; +} + +namespace DB::Parquet +{ + +/// Thrift structs that Parquet uses for various metadata inside the parquet file. +namespace parq = parquet::format; + +namespace +{ + +void assertNoDefOverflow(ColumnChunkWriteState & s) +{ + if (s.max_def == UINT8_MAX) + throw Exception(ErrorCodes::TOO_DEEP_RECURSION, + "Column has more than 255 levels of nested Array/Nullable. Impressive! Unfortunately, " + "this is not supported by this Parquet encoder (but is supported by Parquet, if you " + "really need this for some reason)."); +} + +void updateRepDefLevelsAndFilterColumnForNullable(ColumnChunkWriteState & s, const NullMap & null_map) +{ + /// Increment definition levels for non-nulls. + /// Filter the column to contain only non-null values. + + assertNoDefOverflow(s); + ++s.max_def; + + /// Normal case: no arrays or nullables inside this nullable. + if (s.max_def == 1) + { + chassert(s.def.empty()); + s.def.resize(null_map.size()); + for (size_t i = 0; i < s.def.size(); ++i) + s.def[i] = !null_map[i]; + + /// We could be more efficient with this: + /// * Instead of doing the filter() here, we could defer it to writeColumnChunkBody(), at + /// least in the simple case of Nullable(Primitive). Then it'll parallelize if the table + /// consists of one big tuple. + /// * Instead of filtering explicitly, we could build filtering into the data encoder. + /// * Instead of filling out the `def` values above, we could point to null_map and build + /// the '!' into the encoder. + /// None of these seem worth the complexity right now. + s.primitive_column = s.primitive_column->filter(s.def, /*result_size_hint*/ -1); + + return; + } + + /// Weird general case: Nullable(Array), Nullable(Nullable), or any arbitrary nesting like that. + /// This is currently not allowed in ClickHouse, but let's support it anyway just in case. + + IColumn::Filter filter; + size_t row_idx = static_cast(-1); + for (size_t i = 0; i < s.def.size(); ++i) + { + row_idx += s.max_rep == 0 || s.rep[i] == 0; + if (s.def[i] == s.max_def - 1) + filter.push_back(!null_map[row_idx]); + s.def[i] += !null_map[row_idx]; + } + s.primitive_column = s.primitive_column->filter(filter, /*result_size_hint*/ -1); +} + +void updateRepDefLevelsForArray(ColumnChunkWriteState & s, const IColumn::Offsets & offsets) +{ + /// Increment all definition levels. + /// For non-first elements of arrays, increment repetition levels. + /// For empty arrays, insert a zero into repetition and definition levels arrays. + + assertNoDefOverflow(s); + ++s.max_def; + ++s.max_rep; + + /// Common case: no arrays or nullables inside this array. + if (s.max_rep == 1 && s.max_def == 1) + { + s.def.resize_fill(s.primitive_column->size(), 1); + s.rep.resize_fill(s.primitive_column->size(), 1); + size_t i = 0; + for (ssize_t row = 0; row < static_cast(offsets.size()); ++row) + { + size_t n = offsets[row] - offsets[row - 1]; + if (n) + { + s.rep[i] = 0; + i += n; + } + else + { + s.def.push_back(1); + s.rep.push_back(1); + s.def[i] = 0; + s.rep[i] = 0; + i += 1; + } + } + return; + } + + /// General case: Array(Array), Array(Nullable), or any arbitrary nesting like that. + + for (auto & x : s.def) + ++x; + + if (s.max_rep == 1) + s.rep.resize_fill(s.def.size(), 1); + else + for (auto & x : s.rep) + ++x; + + PaddedPODArray mask(s.def.size(), 1); // for inserting zeroes to rep and def + size_t i = 0; // in the input (s.def/s.rep) + size_t empty_arrays = 0; + for (ssize_t row = 0; row < static_cast(offsets.size()); ++row) + { + size_t n = offsets[row] - offsets[row - 1]; + if (n) + { + /// Un-increment the first rep of the array. + /// Skip n "items" in the nested column; first element of each item has rep = 1 + /// (we incremented it above). + chassert(s.rep[i] == 1); + --s.rep[i]; + do + { + ++i; + if (i == s.rep.size()) + { + --n; + chassert(n == 0); + break; + } + n -= s.rep[i] == 1; + } while (n); + } + else + { + mask.push_back(1); + mask[i + empty_arrays] = 0; + ++empty_arrays; + } + } + + if (empty_arrays != 0) + { + expandDataByMask(s.def, mask, false); + expandDataByMask(s.rep, mask, false); + } +} + +parq::CompressionCodec::type compressionMethodToParquet(CompressionMethod c) +{ + switch (c) + { + case CompressionMethod::None: return parq::CompressionCodec::UNCOMPRESSED; + case CompressionMethod::Snappy: return parq::CompressionCodec::SNAPPY; + case CompressionMethod::Gzip: return parq::CompressionCodec::GZIP; + case CompressionMethod::Brotli: return parq::CompressionCodec::BROTLI; + case CompressionMethod::Lz4: return parq::CompressionCodec::LZ4_RAW; + case CompressionMethod::Zstd: return parq::CompressionCodec::ZSTD; + + default: + throw Exception(ErrorCodes::UNKNOWN_COMPRESSION_METHOD, "Compression method {} is not supported by Parquet", toContentEncodingName(c)); + } +} + +/// Depth-first traversal of the schema tree for this column. +void prepareColumnRecursive( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas); + +void preparePrimitiveColumn(ColumnPtr column, DataTypePtr type, const std::string & name, + const WriteOptions & options, ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + /// Add physical column info. + auto & state = states.emplace_back(); + state.primitive_column = column; + state.compression = options.compression; + + state.column_chunk.__isset.meta_data = true; + state.column_chunk.meta_data.__set_path_in_schema({name}); + state.column_chunk.meta_data.__set_codec(compressionMethodToParquet(state.compression)); + + /// Add logical schema leaf. + auto & schema = schemas.emplace_back(); + schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + schema.__set_name(name); + + /// Convert the type enums. + + using T = parq::Type; + using C = parq::ConvertedType; + + auto types = [&](T::type type_, std::optional converted = std::nullopt, std::optional logical = std::nullopt) + { + state.column_chunk.meta_data.__set_type(type_); + schema.__set_type(type_); + if (converted) + schema.__set_converted_type(*converted); + if (logical) + schema.__set_logicalType(*logical); + }; + + auto int_type = [](Int8 bits, bool signed_) + { + parq::LogicalType t; + t.__isset.INTEGER = true; + t.INTEGER.__set_bitWidth(bits); + t.INTEGER.__set_isSigned(signed_); + return t; + }; + + auto fixed_string = [&](size_t size, std::optional converted = std::nullopt, std::optional logical = std::nullopt) + { + state.column_chunk.meta_data.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type_length(static_cast(size)); + if (converted) + schema.__set_converted_type(*converted); + if (logical) + schema.__set_logicalType(*logical); + }; + + auto decimal = [&](Int32 bytes, UInt32 precision, UInt32 scale) + { + /// Currently we encode all decimals as byte arrays, even though Decimal32 and Decimal64 + /// could be INT32 and INT64 instead. There doesn't seem to be much difference. + state.column_chunk.meta_data.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type_length(bytes); + schema.__set_scale(static_cast(scale)); + schema.__set_precision(static_cast(precision)); + schema.__set_converted_type(parq::ConvertedType::DECIMAL); + parq::DecimalType d; + d.__set_scale(static_cast(scale)); + d.__set_precision(static_cast(precision)); + parq::LogicalType t; + t.__set_DECIMAL(d); + schema.__set_logicalType(t); + }; + + switch (type->getTypeId()) + { + case TypeIndex::UInt8: + if (isBool(type)) + { + types(T::BOOLEAN); + state.is_bool = true; + } + else + { + types(T::INT32, C::UINT_8, int_type(8, false)); + } + break; + case TypeIndex::UInt16: types(T::INT32, C::UINT_16, int_type(16, false)); break; + case TypeIndex::UInt32: types(T::INT32, C::UINT_32, int_type(32, false)); break; + case TypeIndex::UInt64: types(T::INT64, C::UINT_64, int_type(64, false)); break; + case TypeIndex::Int8: types(T::INT32, C::INT_8, int_type(8, true)); break; + case TypeIndex::Int16: types(T::INT32, C::INT_16, int_type(16, true)); break; + case TypeIndex::Int32: types(T::INT32); break; + case TypeIndex::Int64: types(T::INT64); break; + case TypeIndex::Float32: types(T::FLOAT); break; + case TypeIndex::Float64: types(T::DOUBLE); break; + + /// These don't have suitable parquet logical types, so we write them as plain numbers. + /// (Parquet has "enums" but they're just strings, with nowhere to declare all possible enum + /// values in advance as part of the data type.) + case TypeIndex::Enum8: types(T::INT32, C::INT_8, int_type(8, true)); break; // Int8 + case TypeIndex::Enum16: types(T::INT32, C::INT_16, int_type(16, true)); break; // Int16 + case TypeIndex::IPv4: types(T::INT32, C::UINT_32, int_type(32, false)); break; // UInt32 + case TypeIndex::Date: types(T::INT32, C::UINT_16, int_type(16, false)); break; // UInt16 + case TypeIndex::DateTime: types(T::INT32, C::UINT_32, int_type(32, false)); break; // UInt32 + + case TypeIndex::Date32: + { + parq::LogicalType t; + t.__set_DATE({}); + types(T::INT32, C::DATE, t); + break; + } + + case TypeIndex::DateTime64: + { + parq::ConvertedType::type converted; + parq::TimeUnit unit; + const auto & dt = assert_cast(*type); + UInt32 scale = dt.getScale(); + UInt32 converted_scale; + if (scale <= 3) + { + converted = parq::ConvertedType::TIMESTAMP_MILLIS; + unit.__set_MILLIS({}); + converted_scale = 3; + } + else if (scale <= 6) + { + converted = parq::ConvertedType::TIMESTAMP_MICROS; + unit.__set_MICROS({}); + converted_scale = 6; + } + else if (scale <= 9) + { + unit.__set_NANOS({}); + converted_scale = 9; + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected DateTime64 scale: {}", scale); + } + + parq::TimestampType tt; + /// (Shouldn't we check the DateTime64's timezone parameter here? No, the actual number + /// in DateTime64 column is always in UTC, regardless of the timezone parameter.) + tt.__set_isAdjustedToUTC(true); + tt.__set_unit(unit); + parq::LogicalType t; + t.__set_TIMESTAMP(tt); + types(T::INT64, converted, t); + state.datetime64_multiplier = DataTypeDateTime64::getScaleMultiplier(converted_scale - scale); + break; + } + + case TypeIndex::String: + case TypeIndex::FixedString: + { + if (options.output_fixed_string_as_fixed_byte_array && + type->getTypeId() == TypeIndex::FixedString) + { + fixed_string(assert_cast(*type).getN()); + } + else if (options.output_string_as_string) + { + parq::LogicalType t; + t.__set_STRING({}); + types(T::BYTE_ARRAY, C::UTF8, t); + } + else + { + types(T::BYTE_ARRAY); + } + break; + } + + /// Parquet doesn't have logical types for these. + case TypeIndex::UInt128: fixed_string(16); break; + case TypeIndex::UInt256: fixed_string(32); break; + case TypeIndex::Int128: fixed_string(16); break; + case TypeIndex::Int256: fixed_string(32); break; + case TypeIndex::IPv6: fixed_string(16); break; + + case TypeIndex::Decimal32: decimal(4, getDecimalPrecision(*type), getDecimalScale(*type)); break; + case TypeIndex::Decimal64: decimal(8, getDecimalPrecision(*type), getDecimalScale(*type)); break; + case TypeIndex::Decimal128: decimal(16, getDecimalPrecision(*type), getDecimalScale(*type)); break; + case TypeIndex::Decimal256: decimal(32, getDecimalPrecision(*type), getDecimalScale(*type)); break; + + default: + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Internal type '{}' of column '{}' is not supported for conversion into Parquet data format.", type->getFamilyName(), name); + } +} + +void prepareColumnNullable( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const ColumnNullable * column_nullable = assert_cast(column.get()); + ColumnPtr nested_column = column_nullable->getNestedColumnPtr(); + DataTypePtr nested_type = assert_cast(type.get())->getNestedType(); + const NullMap & null_map = column_nullable->getNullMapData(); + + size_t child_states_begin = states.size(); + size_t child_schema_idx = schemas.size(); + + prepareColumnRecursive(nested_column, nested_type, name, options, states, schemas); + + if (schemas[child_schema_idx].repetition_type == parq::FieldRepetitionType::REQUIRED) + { + /// Normal case: we just slap a FieldRepetitionType::OPTIONAL onto the nested column. + schemas[child_schema_idx].repetition_type = parq::FieldRepetitionType::OPTIONAL; + } + else + { + /// Weird case: Nullable(Nullable(...)). Or Nullable(Tuple(Nullable(...))), etc. + /// This is probably not allowed in ClickHouse, but let's support it just in case. + auto & schema = *schemas.insert(schemas.begin() + child_schema_idx, {}); + schema.__set_repetition_type(parq::FieldRepetitionType::OPTIONAL); + schema.__set_name("nullable"); + schema.__set_num_children(1); + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + path.insert(path.begin(), schema.name + "."); + } + } + + for (size_t i = child_states_begin; i < states.size(); ++i) + { + auto & s = states[i]; + updateRepDefLevelsAndFilterColumnForNullable(s, null_map); + } +} + +void prepareColumnTuple( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const auto * column_tuple = assert_cast(column.get()); + const auto * type_tuple = assert_cast(type.get()); + + auto & tuple_schema = schemas.emplace_back(); + tuple_schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + tuple_schema.__set_name(name); + tuple_schema.__set_num_children(static_cast(type_tuple->getElements().size())); + + size_t child_states_begin = states.size(); + + for (size_t i = 0; i < type_tuple->getElements().size(); ++i) + prepareColumnRecursive(column_tuple->getColumnPtr(i), type_tuple->getElement(i), type_tuple->getNameByPosition(i + 1), options, states, schemas); + + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + /// O(nesting_depth^2), but who cares. + path.insert(path.begin(), name); + } +} + +void prepareColumnArray( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const auto * column_array = assert_cast(column.get()); + ColumnPtr nested_column = column_array->getDataPtr(); + DataTypePtr nested_type = assert_cast(type.get())->getNestedType(); + const auto & offsets = column_array->getOffsets(); + + /// Schema for lists https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists + /// + /// required group `name` (List): + /// repeated group "list": + /// "element" + + /// Add the groups schema. + + schemas.emplace_back(); + schemas.emplace_back(); + auto & list_schema = schemas[schemas.size() - 2]; + auto & item_schema = schemas[schemas.size() - 1]; + + list_schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + list_schema.__set_name(name); + list_schema.__set_num_children(1); + list_schema.__set_converted_type(parq::ConvertedType::LIST); + list_schema.__isset.logicalType = true; + list_schema.logicalType.__set_LIST({}); + + item_schema.__set_repetition_type(parq::FieldRepetitionType::REPEATED); + item_schema.__set_name("list"); + item_schema.__set_num_children(1); + + std::array path_prefix = {list_schema.name, item_schema.name}; + size_t child_states_begin = states.size(); + + /// Recurse. + prepareColumnRecursive(nested_column, nested_type, "element", options, states, schemas); + + /// Update repetition+definition levels and fully-qualified column names (x -> myarray.list.x). + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + path.insert(path.begin(), path_prefix.begin(), path_prefix.end()); + + updateRepDefLevelsForArray(states[i], offsets); + } +} + +void prepareColumnMap( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const auto * column_map = assert_cast(column.get()); + const auto * column_array = &column_map->getNestedColumn(); + const auto & offsets = column_array->getOffsets(); + ColumnPtr column_tuple = column_array->getDataPtr(); + + const auto * map_type = assert_cast(type.get()); + DataTypePtr tuple_type = std::make_shared(map_type->getKeyValueTypes(), Strings{"key", "value"}); + + /// Map is an array of tuples + /// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#maps + /// + /// required group `name` (Map): + /// repeated group "key_value": + /// reqiured <...> "key" + /// <...> "value" + + auto & map_schema = schemas.emplace_back(); + map_schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + map_schema.__set_name(name); + map_schema.__set_num_children(1); + map_schema.__set_converted_type(parq::ConvertedType::MAP); + map_schema.__set_logicalType({}); + map_schema.logicalType.__set_MAP({}); + + size_t tuple_schema_idx = schemas.size(); + size_t child_states_begin = states.size(); + + prepareColumnTuple(column_tuple, tuple_type, "key_value", options, states, schemas); + + schemas[tuple_schema_idx].__set_repetition_type(parq::FieldRepetitionType::REPEATED); + schemas[tuple_schema_idx].__set_converted_type(parq::ConvertedType::MAP_KEY_VALUE); + + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + path.insert(path.begin(), name); + + updateRepDefLevelsForArray(states[i], offsets); + } +} + +void prepareColumnRecursive( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + switch (type->getTypeId()) + { + case TypeIndex::Nullable: prepareColumnNullable(column, type, name, options, states, schemas); break; + case TypeIndex::Array: prepareColumnArray(column, type, name, options, states, schemas); break; + case TypeIndex::Tuple: prepareColumnTuple(column, type, name, options, states, schemas); break; + case TypeIndex::Map: prepareColumnMap(column, type, name, options, states, schemas); break; + case TypeIndex::LowCardinality: + { + auto nested_type = assert_cast(*type).getDictionaryType(); + if (nested_type->isNullable()) + prepareColumnNullable( + column->convertToFullColumnIfLowCardinality(), nested_type, name, options, states, schemas); + else + /// Use nested data type, but keep ColumnLowCardinality. The encoder can deal with it. + preparePrimitiveColumn(column, nested_type, name, options, states, schemas); + break; + } + default: + preparePrimitiveColumn(column, type, name, options, states, schemas); + break; + } +} + +} + +SchemaElements convertSchema(const Block & sample, const WriteOptions & options) +{ + SchemaElements schema; + auto & root = schema.emplace_back(); + root.__set_name("schema"); + root.__set_num_children(static_cast(sample.columns())); + + for (const auto & c : sample) + prepareColumnForWrite(c.column, c.type, c.name, options, nullptr, &schema); + + return schema; +} + +void prepareColumnForWrite( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates * out_columns_to_write, SchemaElements * out_schema) +{ + if (column->empty() && out_columns_to_write != nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty column passed to Parquet encoder"); + + ColumnChunkWriteStates states; + SchemaElements schemas; + prepareColumnRecursive(column, type, name, options, states, schemas); + + if (out_columns_to_write) + for (auto & s : states) + out_columns_to_write->push_back(std::move(s)); + if (out_schema) + out_schema->insert(out_schema->end(), schemas.begin(), schemas.end()); + + if (column->empty()) + states.clear(); +} + +} diff --git a/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp b/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp new file mode 100644 index 00000000000..2a99b028ae0 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp @@ -0,0 +1,35 @@ +#include +#include + +namespace DB::Parquet +{ + +class WriteBufferTransport : public apache::thrift::transport::TTransport +{ +public: + WriteBuffer & out; + size_t bytes = 0; + + explicit WriteBufferTransport(WriteBuffer & out_) : out(out_) {} + + void write(const uint8_t* buf, uint32_t len) + { + out.write(reinterpret_cast(buf), len); + bytes += len; + } +}; + +template +size_t serializeThriftStruct(const T & obj, WriteBuffer & out) +{ + auto trans = std::make_shared(out); + auto proto = apache::thrift::protocol::TCompactProtocolFactoryT().getProtocol(trans); + obj.write(proto.get()); + return trans->bytes; +} + +template size_t serializeThriftStruct(const parquet::format::PageHeader &, WriteBuffer & out); +template size_t serializeThriftStruct(const parquet::format::ColumnChunk &, WriteBuffer & out); +template size_t serializeThriftStruct(const parquet::format::FileMetaData &, WriteBuffer & out); + +} diff --git a/src/Processors/Formats/Impl/Parquet/ThriftUtil.h b/src/Processors/Formats/Impl/Parquet/ThriftUtil.h new file mode 100644 index 00000000000..1efbe0002d4 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ThriftUtil.h @@ -0,0 +1,17 @@ +#pragma once + +#include // in contrib/arrow/cpp/src/ , generated from parquet.thrift +#include + +namespace DB::Parquet +{ + +/// Returns number of bytes written. +template +size_t serializeThriftStruct(const T & obj, WriteBuffer & out); + +extern template size_t serializeThriftStruct(const parquet::format::PageHeader &, WriteBuffer & out); +extern template size_t serializeThriftStruct(const parquet::format::ColumnChunk &, WriteBuffer & out); +extern template size_t serializeThriftStruct(const parquet::format::FileMetaData &, WriteBuffer & out); + +} diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp new file mode 100644 index 00000000000..d9cfd40e168 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -0,0 +1,940 @@ +#include "Processors/Formats/Impl/Parquet/Write.h" +#include "Processors/Formats/Impl/Parquet/ThriftUtil.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "config_version.h" + +#if USE_SNAPPY +#include +#endif + +namespace DB::ErrorCodes +{ + extern const int CANNOT_COMPRESS; + extern const int LIMIT_EXCEEDED; + extern const int LOGICAL_ERROR; +} + +namespace DB::Parquet +{ + +namespace parq = parquet::format; + +namespace +{ + +template +struct StatisticsNumeric +{ + T min = std::numeric_limits::has_infinity + ? std::numeric_limits::infinity() : std::numeric_limits::max(); + T max = std::numeric_limits::has_infinity + ? -std::numeric_limits::infinity() : std::numeric_limits::lowest(); + + void add(SourceType x) + { + min = std::min(min, static_cast(x)); + max = std::max(max, static_cast(x)); + } + + void merge(const StatisticsNumeric & s) + { + min = std::min(min, s.min); + max = std::max(max, s.max); + } + + void clear() { *this = {}; } + + parq::Statistics get(const WriteOptions &) + { + parq::Statistics s; + s.__isset.min_value = s.__isset.max_value = true; + s.min_value.resize(sizeof(T)); + s.max_value.resize(sizeof(T)); + memcpy(s.min_value.data(), &min, sizeof(T)); + memcpy(s.max_value.data(), &max, sizeof(T)); + + if constexpr (std::is_signed::value) + { + s.__set_min(s.min_value); + s.__set_max(s.max_value); + } + return s; + } +}; + +struct StatisticsFixedStringRef +{ + size_t fixed_string_size = UINT64_MAX; + const uint8_t * min = nullptr; + const uint8_t * max = nullptr; + + void add(parquet::FixedLenByteArray a) + { + chassert(fixed_string_size != UINT64_MAX); + addMin(a.ptr); + addMax(a.ptr); + } + + void merge(const StatisticsFixedStringRef & s) + { + chassert(fixed_string_size == UINT64_MAX || fixed_string_size == s.fixed_string_size); + fixed_string_size = s.fixed_string_size; + if (s.min == nullptr) + return; + addMin(s.min); + addMax(s.max); + } + + void clear() { min = max = nullptr; } + + parq::Statistics get(const WriteOptions & options) const + { + parq::Statistics s; + if (min == nullptr || fixed_string_size > options.max_statistics_size) + return s; + s.__set_min_value(std::string(reinterpret_cast(min), fixed_string_size)); + s.__set_max_value(std::string(reinterpret_cast(max), fixed_string_size)); + return s; + } + + void addMin(const uint8_t * p) + { + if (min == nullptr || memcmp(p, min, fixed_string_size) < 0) + min = p; + } + void addMax(const uint8_t * p) + { + if (max == nullptr || memcmp(p, max, fixed_string_size) > 0) + max = p; + } +}; + +template +struct StatisticsFixedStringCopy +{ + bool empty = true; + std::array min {}; + std::array max {}; + + void add(parquet::FixedLenByteArray a) + { + addMin(a.ptr); + addMax(a.ptr); + empty = false; + } + + void merge(const StatisticsFixedStringCopy & s) + { + if (s.empty) + return; + addMin(&s.min[0]); + addMax(&s.max[0]); + empty = false; + } + + void clear() { empty = true; } + + parq::Statistics get(const WriteOptions &) const + { + parq::Statistics s; + if (empty) + return s; + s.__set_min_value(std::string(reinterpret_cast(min.data()), S)); + s.__set_max_value(std::string(reinterpret_cast(max.data()), S)); + return s; + } + + void addMin(const uint8_t * p) + { + if (empty || memcmp(p, min.data(), S) < 0) + memcpy(min.data(), p, S); + } + void addMax(const uint8_t * p) + { + if (empty || memcmp(p, max.data(), S) > 0) + memcpy(max.data(), p, S); + } +}; + +struct StatisticsStringRef +{ + parquet::ByteArray min; + parquet::ByteArray max; + + void add(parquet::ByteArray x) + { + addMin(x); + addMax(x); + } + + void merge(const StatisticsStringRef & s) + { + if (s.min.ptr == nullptr) + return; + addMin(s.min); + addMax(s.max); + } + + void clear() { *this = {}; } + + parq::Statistics get(const WriteOptions & options) const + { + parq::Statistics s; + if (min.ptr == nullptr) + return s; + if (static_cast(min.len) <= options.max_statistics_size) + s.__set_min_value(std::string(reinterpret_cast(min.ptr), static_cast(min.len))); + if (static_cast(max.len) <= options.max_statistics_size) + s.__set_max_value(std::string(reinterpret_cast(max.ptr), static_cast(max.len))); + return s; + } + + void addMin(parquet::ByteArray x) + { + if (min.ptr == nullptr || compare(x, min) < 0) + min = x; + } + + void addMax(parquet::ByteArray x) + { + if (max.ptr == nullptr || compare(x, max) > 0) + max = x; + } + + static int compare(parquet::ByteArray a, parquet::ByteArray b) + { + int t = memcmp(a.ptr, b.ptr, std::min(a.len, b.len)); + if (t != 0) + return t; + return a.len - b.len; + } +}; + +/// The column usually needs to be converted to one of Parquet physical types, e.g. UInt16 -> Int32 +/// or [element of ColumnString] -> std::string_view. +/// We do this conversion in small batches rather than all at once, just before encoding the batch, +/// in hopes of getting better performance through cache locality. +/// The Coverter* structs below are responsible for that. +/// When conversion is not needed, getBatch() will just return pointer into original data. + +template ::value, + To, + typename std::make_unsigned::type>::type> +struct ConverterNumeric +{ + using Statistics = StatisticsNumeric; + + const Col & column; + PODArray buf; + + explicit ConverterNumeric(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const To * getBatch(size_t offset, size_t count) + { + if constexpr (sizeof(*column.getData().data()) == sizeof(To)) + return reinterpret_cast(column.getData().data() + offset); + else + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i] = static_cast(column.getData()[offset + i]); // NOLINT + return buf.data(); + } + } +}; + +struct ConverterDateTime64WithMultiplier +{ + using Statistics = StatisticsNumeric; + + using Col = ColumnDecimal; + const Col & column; + Int64 multiplier; + PODArray buf; + + ConverterDateTime64WithMultiplier(const ColumnPtr & c, Int64 multiplier_) : column(assert_cast(*c)), multiplier(multiplier_) {} + + const Int64 * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + /// Not checking overflow because DateTime64 values should already be in the range where + /// they fit in Int64 at any allowed scale (i.e. up to nanoseconds). + buf[i] = column.getData()[offset + i].value * multiplier; + return buf.data(); + } +}; + +struct ConverterString +{ + using Statistics = StatisticsStringRef; + + const ColumnString & column; + PODArray buf; + + explicit ConverterString(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const parquet::ByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + { + StringRef s = column.getDataAt(offset + i); + buf[i] = parquet::ByteArray(static_cast(s.size), reinterpret_cast(s.data)); + } + return buf.data(); + } +}; + +struct ConverterFixedString +{ + using Statistics = StatisticsFixedStringRef; + + const ColumnFixedString & column; + PODArray buf; + + explicit ConverterFixedString(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const parquet::FixedLenByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i].ptr = reinterpret_cast(column.getChars().data() + (offset + i) * column.getN()); + return buf.data(); + } + + size_t fixedStringSize() { return column.getN(); } +}; + +struct ConverterFixedStringAsString +{ + using Statistics = StatisticsStringRef; + + const ColumnFixedString & column; + PODArray buf; + + explicit ConverterFixedStringAsString(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const parquet::ByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i] = parquet::ByteArray(static_cast(column.getN()), reinterpret_cast(column.getChars().data() + (offset + i) * column.getN())); + return buf.data(); + } +}; + +template +struct ConverterNumberAsFixedString +{ + /// Calculate min/max statistics for little-endian fixed strings, not numbers, because parquet + /// doesn't know it's numbers. + using Statistics = StatisticsFixedStringCopy; + + const ColumnVector & column; + PODArray buf; + + explicit ConverterNumberAsFixedString(const ColumnPtr & c) : column(assert_cast &>(*c)) {} + + const parquet::FixedLenByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i].ptr = reinterpret_cast(column.getData().data() + offset + i); + return buf.data(); + } + + size_t fixedStringSize() { return sizeof(T); } +}; + +/// Like ConverterNumberAsFixedString, but converts to big-endian. Because that's the byte order +/// Parquet uses for decimal types and literally nothing else, for some reason. +template +struct ConverterDecimal +{ + using Statistics = StatisticsFixedStringCopy; + + const ColumnDecimal & column; + PODArray data_buf; + PODArray ptr_buf; + + explicit ConverterDecimal(const ColumnPtr & c) : column(assert_cast &>(*c)) {} + + const parquet::FixedLenByteArray * getBatch(size_t offset, size_t count) + { + data_buf.resize(count * sizeof(T)); + ptr_buf.resize(count); + memcpy(data_buf.data(), reinterpret_cast(column.getData().data() + offset), count * sizeof(T)); + for (size_t i = 0; i < count; ++i) + { + std::reverse(data_buf.data() + i * sizeof(T), data_buf.data() + (i + 1) * sizeof(T)); + ptr_buf[i].ptr = data_buf.data() + i * sizeof(T); + } + return ptr_buf.data(); + } + + size_t fixedStringSize() { return sizeof(T); } +}; + +/// Returns either `source` or `scratch`. +PODArray & compress(PODArray & source, PODArray & scratch, CompressionMethod method) +{ + /// We could use wrapWriteBufferWithCompressionMethod() for everything, but I worry about the + /// overhead of creating a bunch of WriteBuffers on each page (thousands of values). + switch (method) + { + case CompressionMethod::None: + return source; + + case CompressionMethod::Lz4: + { + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wold-style-cast" + + size_t max_dest_size = LZ4_COMPRESSBOUND(source.size()); + + #pragma clang diagnostic pop + + if (max_dest_size > std::numeric_limits::max()) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(source.size())); + + scratch.resize(max_dest_size); + + int compressed_size = LZ4_compress_default( + source.data(), + scratch.data(), + static_cast(source.size()), + static_cast(max_dest_size)); + + scratch.resize(static_cast(compressed_size)); + return scratch; + } + +#if USE_SNAPPY + case CompressionMethod::Snappy: + { + size_t max_dest_size = snappy::MaxCompressedLength(source.size()); + + if (max_dest_size > std::numeric_limits::max()) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(source.size())); + + scratch.resize(max_dest_size); + + size_t compressed_size; + snappy::RawCompress(source.data(), source.size(), scratch.data(), &compressed_size); + + scratch.resize(static_cast(compressed_size)); + return scratch; + } +#endif + + default: + { + auto dest_buf = std::make_unique>>(scratch); + auto compressed_buf = wrapWriteBufferWithCompressionMethod( + std::move(dest_buf), + method, + /*level*/ 3, + source.size(), + /*existing_memory*/ source.data()); + chassert(compressed_buf->position() == source.data()); + chassert(compressed_buf->available() == source.size()); + compressed_buf->position() += source.size(); + compressed_buf->finalize(); + return scratch; + } + } +} + +void encodeRepDefLevelsRLE(const UInt8 * data, size_t size, UInt8 max_level, PODArray & out) +{ + using arrow::util::RleEncoder; + + chassert(max_level > 0); + size_t offset = out.size(); + size_t prefix_size = sizeof(Int32); + + int bit_width = bitScanReverse(max_level) + 1; + int max_rle_size = RleEncoder::MaxBufferSize(bit_width, static_cast(size)) + + RleEncoder::MinBufferSize(bit_width); + + out.resize(offset + prefix_size + max_rle_size); + + RleEncoder encoder(reinterpret_cast(out.data() + offset + prefix_size), max_rle_size, bit_width); + for (size_t i = 0; i < size; ++i) + encoder.Put(data[i]); + encoder.Flush(); + Int32 len = encoder.len(); + + memcpy(out.data() + offset, &len, prefix_size); + out.resize(offset + prefix_size + len); +} + +void addToEncodingsUsed(ColumnChunkWriteState & s, parq::Encoding::type e) +{ + if (!std::count(s.column_chunk.meta_data.encodings.begin(), s.column_chunk.meta_data.encodings.end(), e)) + s.column_chunk.meta_data.encodings.push_back(e); +} + +void writePage(const parq::PageHeader & header, const PODArray & compressed, ColumnChunkWriteState & s, WriteBuffer & out) +{ + size_t header_size = serializeThriftStruct(header, out); + out.write(compressed.data(), compressed.size()); + + /// Remember first data page and first dictionary page. + if (header.__isset.data_page_header && s.column_chunk.meta_data.data_page_offset == -1) + s.column_chunk.meta_data.__set_data_page_offset(s.column_chunk.meta_data.total_compressed_size); + if (header.__isset.dictionary_page_header && !s.column_chunk.meta_data.__isset.dictionary_page_offset) + s.column_chunk.meta_data.__set_dictionary_page_offset(s.column_chunk.meta_data.total_compressed_size); + + s.column_chunk.meta_data.total_uncompressed_size += header.uncompressed_page_size + header_size; + s.column_chunk.meta_data.total_compressed_size += header.compressed_page_size + header_size; +} + +template +void writeColumnImpl( + ColumnChunkWriteState & s, const WriteOptions & options, WriteBuffer & out, Converter && converter) +{ + size_t num_values = s.max_def > 0 ? s.def.size() : s.primitive_column->size(); + auto encoding = options.encoding; + + typename Converter::Statistics page_statistics; + typename Converter::Statistics total_statistics; + + bool use_dictionary = options.use_dictionary_encoding && !s.is_bool; + + std::optional fixed_string_descr; + if constexpr (std::is_same::value) + { + /// This just communicates one number to MakeTypedEncoder(): the fixed string length. + fixed_string_descr.emplace(parquet::schema::PrimitiveNode::Make( + "", parquet::Repetition::REQUIRED, parquet::Type::FIXED_LEN_BYTE_ARRAY, + parquet::ConvertedType::NONE, static_cast(converter.fixedStringSize())), 0, 0); + + if constexpr (std::is_same::value) + page_statistics.fixed_string_size = converter.fixedStringSize(); + } + + /// Could use an arena here (by passing a custom MemoryPool), to reuse memory across pages. + /// Alternatively, we could avoid using arrow's dictionary encoding code and leverage + /// ColumnLowCardinality instead. It would work basically the same way as what this function + /// currently does: add values to the ColumnRowCardinality (instead of `encoder`) in batches, + /// checking dictionary size after each batch. That might be faster. + auto encoder = parquet::MakeTypedEncoder( + // ignored if using dictionary + static_cast(encoding), + use_dictionary, fixed_string_descr ? &*fixed_string_descr : nullptr); + + struct PageData + { + parq::PageHeader header; + PODArray data; + }; + std::vector dict_encoded_pages; // can't write them out until we have full dictionary + + /// Reused across pages to reduce number of allocations and improve locality. + PODArray encoded; + PODArray compressed_maybe; + + /// Start of current page. + size_t def_offset = 0; // index in def and rep + size_t data_offset = 0; // index in primitive_column + + auto flush_page = [&](size_t def_count, size_t data_count) + { + encoded.clear(); + + /// Concatenate encoded rep, def, and data. + + if (s.max_rep > 0) + encodeRepDefLevelsRLE(s.rep.data() + def_offset, def_count, s.max_rep, encoded); + if (s.max_def > 0) + encodeRepDefLevelsRLE(s.def.data() + def_offset, def_count, s.max_def, encoded); + + std::shared_ptr values = encoder->FlushValues(); // resets it for next page + + encoded.resize(encoded.size() + values->size()); + memcpy(encoded.data() + encoded.size() - values->size(), values->data(), values->size()); + values.reset(); + + if (encoded.size() > INT32_MAX) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Uncompressed page is too big: {}", encoded.size()); + + size_t uncompressed_size = encoded.size(); + auto & compressed = compress(encoded, compressed_maybe, s.compression); + + if (compressed.size() > INT32_MAX) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Compressed page is too big: {}", compressed.size()); + + parq::PageHeader header; + header.__set_type(parq::PageType::DATA_PAGE); + header.__set_uncompressed_page_size(static_cast(uncompressed_size)); + header.__set_compressed_page_size(static_cast(compressed.size())); + header.__isset.data_page_header = true; + auto & d = header.data_page_header; + d.__set_num_values(static_cast(def_count)); + d.__set_encoding(use_dictionary ? parq::Encoding::RLE_DICTIONARY : encoding); + d.__set_definition_level_encoding(parq::Encoding::RLE); + d.__set_repetition_level_encoding(parq::Encoding::RLE); + /// We could also put checksum in `header.crc`, but apparently no one uses it: + /// https://issues.apache.org/jira/browse/PARQUET-594 + + if (options.write_page_statistics) + { + d.__set_statistics(page_statistics.get(options)); + + if (s.max_def == 1 && s.max_rep == 0) + d.statistics.__set_null_count(static_cast(def_count - data_count)); + } + + total_statistics.merge(page_statistics); + page_statistics.clear(); + + if (use_dictionary) + { + dict_encoded_pages.push_back({.header = std::move(header), .data = {}}); + std::swap(dict_encoded_pages.back().data, compressed); + } + else + { + writePage(header, compressed, s, out); + } + + def_offset += def_count; + data_offset += data_count; + }; + + auto flush_dict = [&] -> bool + { + auto * dict_encoder = dynamic_cast *>(encoder.get()); + int dict_size = dict_encoder->dict_encoded_size(); + + encoded.resize(static_cast(dict_size)); + dict_encoder->WriteDict(reinterpret_cast(encoded.data())); + + auto & compressed = compress(encoded, compressed_maybe, s.compression); + + if (compressed.size() > INT32_MAX) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Compressed dictionary page is too big: {}", compressed.size()); + + parq::PageHeader header; + header.__set_type(parq::PageType::DICTIONARY_PAGE); + header.__set_uncompressed_page_size(dict_size); + header.__set_compressed_page_size(static_cast(compressed.size())); + header.__isset.dictionary_page_header = true; + header.dictionary_page_header.__set_num_values(dict_encoder->num_entries()); + header.dictionary_page_header.__set_encoding(parq::Encoding::PLAIN); + + writePage(header, compressed, s, out); + + for (auto & p : dict_encoded_pages) + writePage(p.header, p.data, s, out); + + dict_encoded_pages.clear(); + encoder.reset(); + + return true; + }; + + auto is_dict_too_big = [&] { + auto * dict_encoder = dynamic_cast *>(encoder.get()); + int dict_size = dict_encoder->dict_encoded_size(); + return static_cast(dict_size) >= options.dictionary_size_limit; + }; + + while (def_offset < num_values) + { + /// Pick enough data for a page. + size_t next_def_offset = def_offset; + size_t next_data_offset = data_offset; + while (true) + { + /// Bite off a batch of defs and corresponding data values. + size_t def_count = std::min(options.write_batch_size, num_values - next_def_offset); + size_t data_count = 0; + if (s.max_def == 0) + data_count = def_count; + else + for (size_t i = 0; i < def_count; ++i) + data_count += s.def[next_def_offset + i] == s.max_def; + + /// Encode the data (but not the levels yet), so that we can estimate its encoded size. + const typename ParquetDType::c_type * converted = converter.getBatch(next_data_offset, data_count); + + if (options.write_page_statistics || options.write_column_chunk_statistics) +/// Workaround for clang bug: https://github.com/llvm/llvm-project/issues/63630 +#ifdef MEMORY_SANITIZER +#pragma clang loop vectorize(disable) +#endif + for (size_t i = 0; i < data_count; ++i) + page_statistics.add(converted[i]); + + encoder->Put(converted, static_cast(data_count)); + + next_def_offset += def_count; + next_data_offset += data_count; + + if (use_dictionary && is_dict_too_big()) + { + /// Fallback to non-dictionary encoding. + /// + /// Discard encoded data and start over. + /// This is different from what arrow does: arrow writes out the dictionary-encoded + /// data, then uses non-dictionary encoding for later pages. + /// Starting over seems better: it produces slightly smaller files (I saw 1-4%) in + /// exchange for slight decrease in speed (I saw < 5%). This seems like a good + /// trade because encoding speed is much less important than decoding (as evidenced + /// by arrow not supporting parallel encoding, even though it's easy to support). + + def_offset = 0; + data_offset = 0; + dict_encoded_pages.clear(); + use_dictionary = false; + +#ifndef NDEBUG + /// Arrow's DictEncoderImpl destructor asserts that FlushValues() was called, so we + /// call it even though we don't need its output. + encoder->FlushValues(); +#endif + + encoder = parquet::MakeTypedEncoder( + static_cast(encoding), /* use_dictionary */ false, + fixed_string_descr ? &*fixed_string_descr : nullptr); + break; + } + + if (next_def_offset == num_values || + static_cast(encoder->EstimatedDataEncodedSize()) >= options.data_page_size) + { + flush_page(next_def_offset - def_offset, next_data_offset - data_offset); + break; + } + } + } + + if (use_dictionary) + flush_dict(); + + chassert(data_offset == s.primitive_column->size()); + + if (options.write_column_chunk_statistics) + { + s.column_chunk.meta_data.__set_statistics(total_statistics.get(options)); + + if (s.max_def == 1 && s.max_rep == 0) + s.column_chunk.meta_data.statistics.__set_null_count(static_cast(def_offset - data_offset)); + } + + /// Report which encodings we've used. + if (s.max_rep > 0 || s.max_def > 0) + addToEncodingsUsed(s, parq::Encoding::RLE); // levels + if (use_dictionary) + { + addToEncodingsUsed(s, parq::Encoding::PLAIN); // dictionary itself + addToEncodingsUsed(s, parq::Encoding::RLE_DICTIONARY); // ids + } + else + { + addToEncodingsUsed(s, encoding); + } +} + +} + +void writeColumnChunkBody(ColumnChunkWriteState & s, const WriteOptions & options, WriteBuffer & out) +{ + s.column_chunk.meta_data.__set_num_values(s.max_def > 0 ? s.def.size() : s.primitive_column->size()); + + /// We'll be updating these as we go. + s.column_chunk.meta_data.__set_encodings({}); + s.column_chunk.meta_data.__set_total_compressed_size(0); + s.column_chunk.meta_data.__set_total_uncompressed_size(0); + s.column_chunk.meta_data.__set_data_page_offset(-1); + + s.primitive_column = s.primitive_column->convertToFullColumnIfLowCardinality(); + + switch (s.primitive_column->getDataType()) + { + /// Numeric conversion to Int32 or Int64. + #define N(source_type, parquet_dtype) \ + writeColumnImpl(s, options, out, \ + ConverterNumeric, parquet::parquet_dtype::c_type>( \ + s.primitive_column)) + + case TypeIndex::UInt8: + if (s.is_bool) + writeColumnImpl(s, options, out, + ConverterNumeric, bool, bool>(s.primitive_column)); + else + N(UInt8, Int32Type); + break; + case TypeIndex::UInt16 : N(UInt16, Int32Type); break; + case TypeIndex::UInt32 : N(UInt32, Int32Type); break; + case TypeIndex::UInt64 : N(UInt64, Int64Type); break; + case TypeIndex::Int8 : N(Int8, Int32Type); break; + case TypeIndex::Int16 : N(Int16, Int32Type); break; + case TypeIndex::Int32 : N(Int32, Int32Type); break; + case TypeIndex::Int64 : N(Int64, Int64Type); break; + + case TypeIndex::Enum8: N(Int8, Int32Type); break; + case TypeIndex::Enum16: N(Int16, Int32Type); break; + case TypeIndex::Date: N(UInt16, Int32Type); break; + case TypeIndex::Date32: N(Int32, Int32Type); break; + case TypeIndex::DateTime: N(UInt32, Int32Type); break; + + #undef N + + case TypeIndex::Float32: + writeColumnImpl( + s, options, out, ConverterNumeric, Float32, Float32>( + s.primitive_column)); + break; + + case TypeIndex::Float64: + writeColumnImpl( + s, options, out, ConverterNumeric, Float64, Float64>( + s.primitive_column)); + break; + + case TypeIndex::DateTime64: + if (s.datetime64_multiplier == 1) + writeColumnImpl( + s, options, out, ConverterNumeric, Int64, Int64>( + s.primitive_column)); + else + writeColumnImpl( + s, options, out, ConverterDateTime64WithMultiplier( + s.primitive_column, s.datetime64_multiplier)); + break; + + case TypeIndex::IPv4: + writeColumnImpl( + s, options, out, ConverterNumeric, Int32, UInt32>( + s.primitive_column)); + break; + + case TypeIndex::String: + writeColumnImpl( + s, options, out, ConverterString(s.primitive_column)); + break; + + case TypeIndex::FixedString: + if (options.output_fixed_string_as_fixed_byte_array) + writeColumnImpl( + s, options, out, ConverterFixedString(s.primitive_column)); + else + writeColumnImpl( + s, options, out, ConverterFixedStringAsString(s.primitive_column)); + break; + + #define F(source_type) \ + writeColumnImpl( \ + s, options, out, ConverterNumberAsFixedString(s.primitive_column)) + case TypeIndex::UInt128: F(UInt128); break; + case TypeIndex::UInt256: F(UInt256); break; + case TypeIndex::Int128: F(Int128); break; + case TypeIndex::Int256: F(Int256); break; + case TypeIndex::IPv6: F(IPv6); break; + #undef F + + #define D(source_type) \ + writeColumnImpl( \ + s, options, out, ConverterDecimal(s.primitive_column)) + case TypeIndex::Decimal32: D(Decimal32); break; + case TypeIndex::Decimal64: D(Decimal64); break; + case TypeIndex::Decimal128: D(Decimal128); break; + case TypeIndex::Decimal256: D(Decimal256); break; + #undef D + + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column type: {}", s.primitive_column->getFamilyName()); + } + + /// Free some memory. + s.primitive_column = {}; + s.def = {}; + s.rep = {}; +} + +void writeFileHeader(WriteBuffer & out) +{ + /// Write the magic bytes. We're a wizard now. + out.write("PAR1", 4); +} + +parq::ColumnChunk finalizeColumnChunkAndWriteFooter( + size_t offset_in_file, ColumnChunkWriteState s, const WriteOptions &, WriteBuffer & out) +{ + if (s.column_chunk.meta_data.data_page_offset != -1) + s.column_chunk.meta_data.data_page_offset += offset_in_file; + if (s.column_chunk.meta_data.__isset.dictionary_page_offset) + s.column_chunk.meta_data.dictionary_page_offset += offset_in_file; + s.column_chunk.file_offset = offset_in_file + s.column_chunk.meta_data.total_compressed_size; + + serializeThriftStruct(s.column_chunk, out); + + return s.column_chunk; +} + +parq::RowGroup makeRowGroup(std::vector column_chunks, size_t num_rows) +{ + parq::RowGroup r; + r.__set_num_rows(num_rows); + r.__set_columns(column_chunks); + r.__set_total_compressed_size(0); + for (auto & c : r.columns) + { + r.total_byte_size += c.meta_data.total_uncompressed_size; + r.total_compressed_size += c.meta_data.total_compressed_size; + } + if (!r.columns.empty()) + { + auto & m = r.columns[0].meta_data; + r.__set_file_offset(m.__isset.dictionary_page_offset ? m.dictionary_page_offset : m.data_page_offset); + } + return r; +} + +void writeFileFooter(std::vector row_groups, SchemaElements schema, const WriteOptions & options, WriteBuffer & out) +{ + parq::FileMetaData meta; + meta.version = 2; + meta.schema = std::move(schema); + meta.row_groups = std::move(row_groups); + for (auto & r : meta.row_groups) + meta.num_rows += r.num_rows; + meta.__set_created_by(VERSION_NAME " " VERSION_DESCRIBE); + + if (options.write_page_statistics || options.write_column_chunk_statistics) + { + meta.__set_column_orders({}); + for (auto & s : meta.schema) + if (!s.__isset.num_children) + meta.column_orders.emplace_back(); + for (auto & c : meta.column_orders) + c.__set_TYPE_ORDER({}); + } + + size_t footer_size = serializeThriftStruct(meta, out); + + if (footer_size > INT32_MAX) + throw Exception(ErrorCodes::LIMIT_EXCEEDED, "Parquet file metadata too big: {}", footer_size); + + writeIntBinary(static_cast(footer_size), out); + out.write("PAR1", 4); +} + +} diff --git a/src/Processors/Formats/Impl/Parquet/Write.h b/src/Processors/Formats/Impl/Parquet/Write.h new file mode 100644 index 00000000000..24733ac276b --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/Write.h @@ -0,0 +1,137 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB::Parquet +{ + +/// A good resource for learning how Parquet format works is +/// contrib/arrow/cpp/src/parquet/parquet.thrift + +struct WriteOptions +{ + bool output_string_as_string = false; + bool output_fixed_string_as_fixed_byte_array = true; + + CompressionMethod compression = CompressionMethod::Lz4; + + size_t data_page_size = 1024 * 1024; + size_t write_batch_size = 1024; + + bool use_dictionary_encoding = true; + size_t dictionary_size_limit = 1024 * 1024; + /// If using dictionary, this encoding is used as a fallback when dictionary gets too big. + /// Otherwise, this is used for everything. + parquet::format::Encoding::type encoding = parquet::format::Encoding::PLAIN; + + bool write_page_statistics = true; + bool write_column_chunk_statistics = true; + size_t max_statistics_size = 4096; +}; + +/// Information about a primitive column (leaf of the schema tree) to write to Parquet file. +struct ColumnChunkWriteState +{ + /// After writeColumnChunkBody(), offsets in this struct are relative to the start of column chunk. + /// Then finalizeColumnChunkAndWriteFooter() fixes them up before writing to file. + parquet::format::ColumnChunk column_chunk; + + ColumnPtr primitive_column; + CompressionMethod compression; // must match what's inside column_chunk + Int64 datetime64_multiplier = 1; // for converting e.g. seconds to milliseconds + bool is_bool = false; // bool vs UInt8 have the same column type but are encoded differently + + /// Repetition and definition levels. Produced by prepareColumnForWrite(). + /// def is empty iff max_def == 0, which means no arrays or nullables. + /// rep is empty iff max_rep == 0, which means no arrays. + PaddedPODArray def; // definition levels + PaddedPODArray rep; // repetition levels + /// Max possible levels, according to schema. Actual max in def/rep may be smaller. + UInt8 max_def = 0; + UInt8 max_rep = 0; + + ColumnChunkWriteState() = default; + /// Prevent accidental copying. + ColumnChunkWriteState(ColumnChunkWriteState &&) = default; + ColumnChunkWriteState & operator=(ColumnChunkWriteState &&) = default; + + /// Estimated memory usage. + size_t allocatedBytes() const + { + size_t r = def.allocated_bytes() + rep.allocated_bytes(); + if (primitive_column) + r += primitive_column->allocatedBytes(); + return r; + } +}; + +using SchemaElements = std::vector; +using ColumnChunkWriteStates = std::vector; + +/// Parquet file consists of row groups, which consist of column chunks. +/// +/// Column chunks can be encoded mostly independently of each other, in parallel. +/// But there are two small complications: +/// 1. One ClickHouse column can translate to multiple leaf columns in parquet. +/// E.g. tuples and maps. +/// If all primitive columns are in one big tuple, we'd like to encode them in parallel too, +/// even though they're one top-level ClickHouse column. +/// 2. At the end of each encoded column chunk there's a footer (struct ColumnMetaData) that +/// contains some absolute offsets in the file. We can't encode it until we know the exact +/// position in the file where the column chunk will go. So these footers have to be serialized +/// sequentially, after we know sizes of all previous column chunks. +/// +/// With that in mind, here's how to write a parquet file: +/// +/// (1) writeFileHeader() +/// (2) For each row group: +/// | (3) For each ClickHouse column: +/// | (4) Call prepareColumnForWrite(). +/// | It'll produce one or more ColumnChunkWriteStates, corresponding to primitive columns that +/// | we need to write. +/// | It'll also produce SchemaElements as a byproduct, describing the logical types and +/// | groupings of the physical columns (e.g. tuples, arrays, maps). +/// | (5) For each ColumnChunkWriteState: +/// | (6) Call writeColumnChunkBody() to write the actual data to the given WriteBuffer. +/// | (7) Call finalizeColumnChunkAndWriteFooter() to write the footer of the column chunk. +/// | (8) Call makeRowGroup() using the ColumnChunk metadata structs from previous step. +/// (9) Call writeFileFooter() using the row groups from previous step and SchemaElements from +/// convertSchema(). +/// +/// Steps (4) and (6) can be parallelized, both within and across row groups. + +/// Parquet schema is a tree of SchemaElements, flattened into a list in depth-first order. +/// Leaf nodes correspond to physical columns of primitive types. Inner nodes describe logical +/// groupings of those columns, e.g. tuples or structs. +SchemaElements convertSchema(const Block & sample, const WriteOptions & options); + +void prepareColumnForWrite( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates * out_columns_to_write, SchemaElements * out_schema = nullptr); + +void writeFileHeader(WriteBuffer & out); + +/// Encodes a column chunk, without the footer. +/// The ColumnChunkWriteState-s should then passed to finalizeColumnChunkAndWriteFooter(). +void writeColumnChunkBody(ColumnChunkWriteState & s, const WriteOptions & options, WriteBuffer & out); + +/// Unlike most of the column chunk data, the footer (`ColumnMetaData`) needs to know its absolute +/// offset in the file. So we encode it separately, after all previous row groups and column chunks +/// have been encoded. +/// (If you're wondering if the 8-byte offset values can be patched inside the encoded blob - no, +/// they're varint-encoded and can't be padded to a fixed length.) +/// `offset_in_file` is the absolute position in the file where the writeColumnChunkBody()'s output +/// starts. +/// Returns a ColumnChunk to add to the RowGroup. +parquet::format::ColumnChunk finalizeColumnChunkAndWriteFooter( + size_t offset_in_file, ColumnChunkWriteState s, const WriteOptions & options, WriteBuffer & out); + +parquet::format::RowGroup makeRowGroup(std::vector column_chunks, size_t num_rows); + +void writeFileFooter(std::vector row_groups, SchemaElements schema, const WriteOptions & options, WriteBuffer & out); + +} diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 3dde8ad6a6c..8f8c909d042 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -14,11 +14,15 @@ #include #include #include +#include #include "ArrowBufferedStreams.h" #include "ArrowColumnToCHColumn.h" #include "ArrowFieldIndexUtil.h" #include #include +#include +#include +#include namespace CurrentMetrics { @@ -33,6 +37,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int CANNOT_READ_ALL_DATA; + extern const int CANNOT_PARSE_NUMBER; } #define THROW_ARROW_NOT_OK(status) \ @@ -42,6 +47,322 @@ namespace ErrorCodes throw Exception::createDeprecated(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ } while (false) +/// Decode min/max value from column chunk statistics. +/// +/// There are two questionable decisions in this implementation: +/// * We parse the value from the encoded byte string instead of casting the parquet::Statistics +/// to parquet::TypedStatistics and taking the value from there. +/// * We dispatch based on the parquet logical+converted+physical type instead of the ClickHouse type. +/// The idea is that this is similar to what we'll have to do when reimplementing Parquet parsing in +/// ClickHouse instead of using Arrow (for speed). So, this is an exercise in parsing Parquet manually. +static std::optional decodePlainParquetValueSlow(const std::string & data, parquet::Type::type physical_type, const parquet::ColumnDescriptor & descr) +{ + using namespace parquet; + + auto decode_integer = [&](bool signed_) -> UInt64 { + size_t size; + switch (physical_type) + { + case parquet::Type::type::BOOLEAN: size = 1; break; + case parquet::Type::type::INT32: size = 4; break; + case parquet::Type::type::INT64: size = 8; break; + default: throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected physical type for number"); + } + if (data.size() != size) + throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected size: {}", data.size()); + + UInt64 val = 0; + memcpy(&val, data.data(), size); + + /// Sign-extend. + if (signed_ && size < 8 && (val >> (size * 8 - 1)) != 0) + val |= 0 - (1ul << (size * 8)); + + return val; + }; + + /// Decimal. + do // while (false) + { + Int32 scale; + if (descr.logical_type() && descr.logical_type()->is_decimal()) + scale = assert_cast(*descr.logical_type()).scale(); + else if (descr.converted_type() == ConvertedType::type::DECIMAL) + scale = descr.type_scale(); + else + break; + + size_t size; + bool big_endian = false; + switch (physical_type) + { + case Type::type::BOOLEAN: size = 1; break; + case Type::type::INT32: size = 4; break; + case Type::type::INT64: size = 8; break; + + case Type::type::FIXED_LEN_BYTE_ARRAY: + big_endian = true; + size = data.size(); + break; + default: throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected decimal physical type"); + } + /// Note that size is not necessarily a power of two. + /// E.g. spark turns 8-byte unsigned integers into 9-byte signed decimals. + if (data.size() != size || size < 1 || size > 32) + throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected decimal size: {} (actual {})", size, data.size()); + + /// For simplicity, widen all decimals to 256-bit. It should compare correctly with values + /// of different bitness. + Int256 val = 0; + memcpy(&val, data.data(), size); + if (big_endian) + std::reverse(reinterpret_cast(&val), reinterpret_cast(&val) + size); + /// Sign-extend. + if (size < 32 && (val >> (size * 8 - 1)) != 0) + val |= ~((Int256(1) << (size * 8)) - 1); + + return Field(DecimalField(Decimal256(val), static_cast(scale))); + } + while (false); + + /// Timestamp (decimal). + { + Int32 scale = -1; + bool is_timestamp = true; + if (descr.logical_type() && (descr.logical_type()->is_time() || descr.logical_type()->is_timestamp())) + { + LogicalType::TimeUnit::unit unit = descr.logical_type()->is_time() + ? assert_cast(*descr.logical_type()).time_unit() + : assert_cast(*descr.logical_type()).time_unit(); + switch (unit) + { + case LogicalType::TimeUnit::unit::MILLIS: scale = 3; break; + case LogicalType::TimeUnit::unit::MICROS: scale = 6; break; + case LogicalType::TimeUnit::unit::NANOS: scale = 9; break; + default: throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unknown time unit"); + } + } + else switch (descr.converted_type()) + { + case ConvertedType::type::TIME_MILLIS: scale = 3; break; + case ConvertedType::type::TIME_MICROS: scale = 6; break; + case ConvertedType::type::TIMESTAMP_MILLIS: scale = 3; break; + case ConvertedType::type::TIMESTAMP_MICROS: scale = 6; break; + default: is_timestamp = false; + } + + if (is_timestamp) + { + Int64 val = static_cast(decode_integer(/* signed */ true)); + return Field(DecimalField(Decimal64(val), scale)); + } + } + + /// Floats. + + if (physical_type == Type::type::FLOAT) + { + if (data.size() != 4) + throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected float size"); + Float32 val; + memcpy(&val, data.data(), data.size()); + return Field(val); + } + + if (physical_type == Type::type::DOUBLE) + { + if (data.size() != 8) + throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected float size"); + Float64 val; + memcpy(&val, data.data(), data.size()); + return Field(val); + } + + /// Strings. + + if (physical_type == Type::type::BYTE_ARRAY || physical_type == Type::type::FIXED_LEN_BYTE_ARRAY) + { + /// Arrow's parquet decoder handles missing min/max values slightly incorrectly. + /// In a parquet file, min and max have separate is_set flags, i.e. one may be missing even + /// if the other is set. Arrow decoder ORs (!) these two flags together into one: HasMinMax(). + /// So, if exactly one of {min, max} is missing, Arrow reports it as empty string, with no + /// indication that it's actually missing. + /// + /// How can exactly one of {min, max} be missing? This happens if one of the two strings + /// exceeds the length limit for stats. Repro: + /// + /// insert into function file('t.parquet') select arrayStringConcat(range(number*1000000)) from numbers(2) settings output_format_parquet_use_custom_encoder=0 + /// select tupleElement(tupleElement(row_groups[1], 'columns')[1], 'statistics') from file('t.parquet', ParquetMetadata) + /// + /// Here the row group contains two strings: one empty, one very long. But the statistics + /// reported by arrow are indistinguishable from statistics if all strings were empty. + /// (Min and max are the last two tuple elements in the output of the second query. Notice + /// how they're empty strings instead of NULLs.) + /// + /// So we have to be conservative and treat empty string as unknown. + /// This is unfortunate because it's probably common for string columns to have lots of empty + /// values, and filter pushdown would probably often be useful in that case. + /// + /// TODO: Remove this workaround either when we implement our own Parquet decoder that + /// doesn't have this bug, or if it's fixed in Arrow. + if (data.empty()) + return std::nullopt; + + return Field(data); + } + + /// This one's deprecated in Parquet. + if (physical_type == Type::type::INT96) + throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Parquet INT96 type is deprecated and not supported"); + + /// Integers. + + bool signed_ = true; + if (descr.logical_type() && descr.logical_type()->is_int()) + signed_ = assert_cast(*descr.logical_type()).is_signed(); + else + signed_ = descr.converted_type() != ConvertedType::type::UINT_8 && + descr.converted_type() != ConvertedType::type::UINT_16 && + descr.converted_type() != ConvertedType::type::UINT_32 && + descr.converted_type() != ConvertedType::type::UINT_64; + + UInt64 val = decode_integer(signed_); + Field field = signed_ ? Field(static_cast(val)) : Field(val); + return field; +} + +/// Range of values for each column, based on statistics in the Parquet metadata. +/// This is lower/upper bounds, not necessarily exact min and max, e.g. the min/max can be just +/// missing in the metadata. +static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaData & file, int row_group_idx, const Block & header, const FormatSettings & format_settings) +{ + auto column_name_for_lookup = [&](std::string column_name) -> std::string + { + if (format_settings.parquet.case_insensitive_column_matching) + boost::to_lower(column_name); + return column_name; + }; + + std::unique_ptr row_group = file.RowGroup(row_group_idx); + + std::unordered_map> name_to_statistics; + for (int i = 0; i < row_group->num_columns(); ++i) + { + auto c = row_group->ColumnChunk(i); + auto s = c->statistics(); + if (!s) + continue; + + auto path = c->path_in_schema()->ToDotVector(); + if (path.size() != 1) + continue; // compound types not supported + + name_to_statistics.emplace(column_name_for_lookup(path[0]), s); + } + + /// +-----+ + /// / /| + /// +-----+ | + /// | | + + /// | |/ + /// +-----+ + std::vector hyperrectangle(header.columns(), Range::createWholeUniverse()); + + for (size_t idx = 0; idx < header.columns(); ++idx) + { + const std::string & name = header.getByPosition(idx).name; + auto it = name_to_statistics.find(column_name_for_lookup(name)); + if (it == name_to_statistics.end()) + continue; + auto stats = it->second; + + auto default_value = [&]() -> Field + { + DataTypePtr type = header.getByPosition(idx).type; + if (type->lowCardinality()) + type = assert_cast(*type).getDictionaryType(); + if (type->isNullable()) + type = assert_cast(*type).getNestedType(); + return type->getDefault(); + }; + + /// Only primitive fields are supported, not arrays, maps, tuples, or Nested. + /// Arrays, maps, and Nested can't be meaningfully supported because Parquet only has min/max + /// across all *elements* of the array, not min/max array itself. + /// Same limitation for tuples, but maybe it would make sense to have some kind of tuple + /// expansion in KeyCondition to accept ranges per element instead of whole tuple. + + std::optional min; + std::optional max; + if (stats->HasMinMax()) + { + try + { + min = decodePlainParquetValueSlow(stats->EncodeMin(), stats->physical_type(), *stats->descr()); + max = decodePlainParquetValueSlow(stats->EncodeMax(), stats->physical_type(), *stats->descr()); + } + catch (Exception & e) + { + e.addMessage(" (When parsing Parquet statistics for column {}, physical type {}, {}. Please report an issue and use input_format_parquet_filter_push_down = false to work around.)", name, static_cast(stats->physical_type()), stats->descr()->ToString()); + throw; + } + } + + /// In Range, NULL is represented as positive or negative infinity (represented by a special + /// kind of Field, different from floating-point infinities). + + bool always_null = stats->descr()->max_definition_level() != 0 && + stats->HasNullCount() && stats->num_values() == 0; + bool can_be_null = stats->descr()->max_definition_level() != 0 && + (!stats->HasNullCount() || stats->null_count() != 0); + bool null_as_default = format_settings.null_as_default && !isNullableOrLowCardinalityNullable(header.getByPosition(idx).type); + + if (always_null) + { + /// Single-point range containing either the default value of one of the infinities. + if (null_as_default) + hyperrectangle[idx].right = hyperrectangle[idx].left = default_value(); + else + hyperrectangle[idx].right = hyperrectangle[idx].left; + continue; + } + + if (can_be_null) + { + if (null_as_default) + { + /// Make sure the range contains the default value. + Field def = default_value(); + if (min.has_value() && applyVisitor(FieldVisitorAccurateLess(), def, *min)) + min = def; + if (max.has_value() && applyVisitor(FieldVisitorAccurateLess(), *max, def)) + max = def; + } + else + { + /// Make sure the range reaches infinity on at least one side. + if (min.has_value() && max.has_value()) + min.reset(); + } + } + else + { + /// If the column doesn't have nulls, exclude both infinities. + if (!min.has_value()) + hyperrectangle[idx].left_included = false; + if (!max.has_value()) + hyperrectangle[idx].right_included = false; + } + + if (min.has_value()) + hyperrectangle[idx].left = std::move(min.value()); + if (max.has_value()) + hyperrectangle[idx].right = std::move(max.value()); + } + + return hyperrectangle; +} + ParquetBlockInputFormat::ParquetBlockInputFormat( ReadBuffer & buf, const Block & header_, @@ -59,7 +380,22 @@ ParquetBlockInputFormat::ParquetBlockInputFormat( pool = std::make_unique(CurrentMetrics::ParquetDecoderThreads, CurrentMetrics::ParquetDecoderThreadsActive, max_decoding_threads); } -ParquetBlockInputFormat::~ParquetBlockInputFormat() = default; +ParquetBlockInputFormat::~ParquetBlockInputFormat() +{ + is_stopped = true; + if (pool) + pool->wait(); +} + +void ParquetBlockInputFormat::setQueryInfo(const SelectQueryInfo & query_info, ContextPtr context) +{ + /// When analyzer is enabled, query_info.filter_asts is missing sets and maybe some type casts, + /// so don't use it. I'm not sure how to support analyzer here: https://github.com/ClickHouse/ClickHouse/issues/53536 + if (format_settings.parquet.filter_push_down && !context->getSettingsRef().allow_experimental_analyzer) + key_condition.emplace(query_info, context, getPort().getHeader().getNames(), + std::make_shared(std::make_shared( + getPort().getHeader().getColumnsWithTypeAndName()))); +} void ParquetBlockInputFormat::initializeIfNeeded() { @@ -79,17 +415,37 @@ void ParquetBlockInputFormat::initializeIfNeeded() std::shared_ptr schema; THROW_ARROW_NOT_OK(parquet::arrow::FromParquetSchema(metadata->schema(), &schema)); - row_groups.resize(metadata->num_row_groups()); - ArrowFieldIndexUtil field_util( format_settings.parquet.case_insensitive_column_matching, format_settings.parquet.allow_missing_columns); column_indices = field_util.findRequiredIndices(getPort().getHeader(), *schema); + + int num_row_groups = metadata->num_row_groups(); + row_group_batches.reserve(num_row_groups); + + for (int row_group = 0; row_group < num_row_groups; ++row_group) + { + if (skip_row_groups.contains(row_group)) + continue; + + if (key_condition.has_value() && + !key_condition->checkInHyperrectangle( + getHyperrectangleForRowGroup(*metadata, row_group, getPort().getHeader(), format_settings), + getPort().getHeader().getDataTypes()).can_be_true) + continue; + + if (row_group_batches.empty() || row_group_batches.back().total_bytes_compressed >= min_bytes_for_seek) + row_group_batches.emplace_back(); + + row_group_batches.back().row_groups_idxs.push_back(row_group); + row_group_batches.back().total_rows += metadata->RowGroup(row_group)->num_rows(); + row_group_batches.back().total_bytes_compressed += metadata->RowGroup(row_group)->total_compressed_size(); + } } -void ParquetBlockInputFormat::initializeRowGroupReader(size_t row_group_idx) +void ParquetBlockInputFormat::initializeRowGroupBatchReader(size_t row_group_batch_idx) { - auto & row_group = row_groups[row_group_idx]; + auto & row_group_batch = row_group_batches[row_group_batch_idx]; parquet::ArrowReaderProperties properties; properties.set_use_threads(false); @@ -135,34 +491,30 @@ void ParquetBlockInputFormat::initializeRowGroupReader(size_t row_group_idx) builder.Open(arrow_file, /* not to be confused with ArrowReaderProperties */ parquet::default_reader_properties(), metadata)); builder.properties(properties); // TODO: Pass custom memory_pool() to enable memory accounting with non-jemalloc allocators. - THROW_ARROW_NOT_OK(builder.Build(&row_group.file_reader)); + THROW_ARROW_NOT_OK(builder.Build(&row_group_batch.file_reader)); THROW_ARROW_NOT_OK( - row_group.file_reader->GetRecordBatchReader({static_cast(row_group_idx)}, column_indices, &row_group.record_batch_reader)); + row_group_batch.file_reader->GetRecordBatchReader(row_group_batch.row_groups_idxs, column_indices, &row_group_batch.record_batch_reader)); - row_group.arrow_column_to_ch_column = std::make_unique( + row_group_batch.arrow_column_to_ch_column = std::make_unique( getPort().getHeader(), "Parquet", - format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns, format_settings.null_as_default, format_settings.parquet.case_insensitive_column_matching); - - row_group.row_group_bytes_uncompressed = metadata->RowGroup(static_cast(row_group_idx))->total_compressed_size(); - row_group.row_group_rows = metadata->RowGroup(static_cast(row_group_idx))->num_rows(); } -void ParquetBlockInputFormat::scheduleRowGroup(size_t row_group_idx) +void ParquetBlockInputFormat::scheduleRowGroup(size_t row_group_batch_idx) { chassert(!mutex.try_lock()); - auto & status = row_groups[row_group_idx].status; - chassert(status == RowGroupState::Status::NotStarted || status == RowGroupState::Status::Paused); + auto & status = row_group_batches[row_group_batch_idx].status; + chassert(status == RowGroupBatchState::Status::NotStarted || status == RowGroupBatchState::Status::Paused); - status = RowGroupState::Status::Running; + status = RowGroupBatchState::Status::Running; pool->scheduleOrThrowOnError( - [this, row_group_idx, thread_group = CurrentThread::getGroup()]() + [this, row_group_batch_idx, thread_group = CurrentThread::getGroup()]() { if (thread_group) CurrentThread::attachToGroupIfDetached(thread_group); @@ -172,7 +524,7 @@ void ParquetBlockInputFormat::scheduleRowGroup(size_t row_group_idx) { setThreadName("ParquetDecoder"); - threadFunction(row_group_idx); + threadFunction(row_group_batch_idx); } catch (...) { @@ -183,44 +535,44 @@ void ParquetBlockInputFormat::scheduleRowGroup(size_t row_group_idx) }); } -void ParquetBlockInputFormat::threadFunction(size_t row_group_idx) +void ParquetBlockInputFormat::threadFunction(size_t row_group_batch_idx) { std::unique_lock lock(mutex); - auto & row_group = row_groups[row_group_idx]; - chassert(row_group.status == RowGroupState::Status::Running); + auto & row_group_batch = row_group_batches[row_group_batch_idx]; + chassert(row_group_batch.status == RowGroupBatchState::Status::Running); while (true) { - if (is_stopped || row_group.num_pending_chunks >= max_pending_chunks_per_row_group) + if (is_stopped || row_group_batch.num_pending_chunks >= max_pending_chunks_per_row_group_batch) { - row_group.status = RowGroupState::Status::Paused; + row_group_batch.status = RowGroupBatchState::Status::Paused; return; } - decodeOneChunk(row_group_idx, lock); + decodeOneChunk(row_group_batch_idx, lock); - if (row_group.status == RowGroupState::Status::Done) + if (row_group_batch.status == RowGroupBatchState::Status::Done) return; } } -void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_idx, std::unique_lock & lock) +void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_batch_idx, std::unique_lock & lock) { - auto & row_group = row_groups[row_group_idx]; - chassert(row_group.status != RowGroupState::Status::Done); + auto & row_group_batch = row_group_batches[row_group_batch_idx]; + chassert(row_group_batch.status != RowGroupBatchState::Status::Done); chassert(lock.owns_lock()); SCOPE_EXIT({ chassert(lock.owns_lock() || std::uncaught_exceptions()); }); lock.unlock(); auto end_of_row_group = [&] { - row_group.arrow_column_to_ch_column.reset(); - row_group.record_batch_reader.reset(); - row_group.file_reader.reset(); + row_group_batch.arrow_column_to_ch_column.reset(); + row_group_batch.record_batch_reader.reset(); + row_group_batch.file_reader.reset(); lock.lock(); - row_group.status = RowGroupState::Status::Done; + row_group_batch.status = RowGroupBatchState::Status::Done; // We may be able to schedule more work now, but can't call scheduleMoreWorkIfNeeded() right // here because we're running on the same thread pool, so it'll deadlock if thread limit is @@ -228,23 +580,10 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_idx, std::unique_l condvar.notify_all(); }; - if (!row_group.record_batch_reader) - { - if (skip_row_groups.contains(static_cast(row_group_idx))) - { - // Pretend that the row group is empty. - // (We could avoid scheduling the row group on a thread in the first place. But the - // skip_row_groups feature is mostly unused, so it's better to be a little inefficient - // than to add a bunch of extra mostly-dead code for this.) - end_of_row_group(); - return; - } + if (!row_group_batch.record_batch_reader) + initializeRowGroupBatchReader(row_group_batch_idx); - initializeRowGroupReader(row_group_idx); - } - - - auto batch = row_group.record_batch_reader->Next(); + auto batch = row_group_batch.record_batch_reader->Next(); if (!batch.ok()) throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", batch.status().ToString()); @@ -256,44 +595,50 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_idx, std::unique_l auto tmp_table = arrow::Table::FromRecordBatches({*batch}); - size_t approx_chunk_original_size = static_cast(std::ceil(static_cast(row_group.row_group_bytes_uncompressed) / row_group.row_group_rows * (*tmp_table)->num_rows())); - PendingChunk res = {.chunk_idx = row_group.next_chunk_idx, .row_group_idx = row_group_idx, .approx_original_chunk_size = approx_chunk_original_size}; + size_t approx_chunk_original_size = static_cast(std::ceil(static_cast(row_group_batch.total_bytes_compressed) / row_group_batch.total_rows * (*tmp_table)->num_rows())); + PendingChunk res = { + .chunk = {}, + .block_missing_values = {}, + .chunk_idx = row_group_batch.next_chunk_idx, + .row_group_batch_idx = row_group_batch_idx, + .approx_original_chunk_size = approx_chunk_original_size + }; /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &res.block_missing_values : nullptr; - row_group.arrow_column_to_ch_column->arrowTableToCHChunk(res.chunk, *tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); + row_group_batch.arrow_column_to_ch_column->arrowTableToCHChunk(res.chunk, *tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); lock.lock(); - ++row_group.next_chunk_idx; - ++row_group.num_pending_chunks; + ++row_group_batch.next_chunk_idx; + ++row_group_batch.num_pending_chunks; pending_chunks.push(std::move(res)); condvar.notify_all(); } -void ParquetBlockInputFormat::scheduleMoreWorkIfNeeded(std::optional row_group_touched) +void ParquetBlockInputFormat::scheduleMoreWorkIfNeeded(std::optional row_group_batch_touched) { - while (row_groups_completed < row_groups.size()) + while (row_group_batches_completed < row_group_batches.size()) { - auto & row_group = row_groups[row_groups_completed]; - if (row_group.status != RowGroupState::Status::Done || row_group.num_pending_chunks != 0) + auto & row_group = row_group_batches[row_group_batches_completed]; + if (row_group.status != RowGroupBatchState::Status::Done || row_group.num_pending_chunks != 0) break; - ++row_groups_completed; + ++row_group_batches_completed; } if (pool) { - while (row_groups_started - row_groups_completed < max_decoding_threads && - row_groups_started < row_groups.size()) - scheduleRowGroup(row_groups_started++); + while (row_group_batches_started - row_group_batches_completed < max_decoding_threads && + row_group_batches_started < row_group_batches.size()) + scheduleRowGroup(row_group_batches_started++); - if (row_group_touched) + if (row_group_batch_touched) { - auto & row_group = row_groups[*row_group_touched]; - if (row_group.status == RowGroupState::Status::Paused && - row_group.num_pending_chunks < max_pending_chunks_per_row_group) - scheduleRowGroup(*row_group_touched); + auto & row_group = row_group_batches[*row_group_batch_touched]; + if (row_group.status == RowGroupBatchState::Status::Paused && + row_group.num_pending_chunks < max_pending_chunks_per_row_group_batch) + scheduleRowGroup(*row_group_batch_touched); } } } @@ -302,6 +647,12 @@ Chunk ParquetBlockInputFormat::generate() { initializeIfNeeded(); + if (is_stopped || row_group_batches_completed == row_group_batches.size()) + return {}; + + if (need_only_count) + return getChunkForCount(row_group_batches[row_group_batches_completed++].total_rows); + std::unique_lock lock(mutex); while (true) @@ -318,30 +669,30 @@ Chunk ParquetBlockInputFormat::generate() if (!pending_chunks.empty() && (!format_settings.parquet.preserve_order || - pending_chunks.top().row_group_idx == row_groups_completed)) + pending_chunks.top().row_group_batch_idx == row_group_batches_completed)) { PendingChunk chunk = std::move(const_cast(pending_chunks.top())); pending_chunks.pop(); - auto & row_group = row_groups[chunk.row_group_idx]; + auto & row_group = row_group_batches[chunk.row_group_batch_idx]; chassert(row_group.num_pending_chunks != 0); chassert(chunk.chunk_idx == row_group.next_chunk_idx - row_group.num_pending_chunks); --row_group.num_pending_chunks; - scheduleMoreWorkIfNeeded(chunk.row_group_idx); + scheduleMoreWorkIfNeeded(chunk.row_group_batch_idx); previous_block_missing_values = std::move(chunk.block_missing_values); previous_approx_bytes_read_for_chunk = chunk.approx_original_chunk_size; return std::move(chunk.chunk); } - if (row_groups_completed == row_groups.size()) + if (row_group_batches_completed == row_group_batches.size()) return {}; if (pool) condvar.wait(lock); else - decodeOneChunk(row_groups_completed, lock); + decodeOneChunk(row_group_batches_completed, lock); } } @@ -354,12 +705,12 @@ void ParquetBlockInputFormat::resetParser() arrow_file.reset(); metadata.reset(); column_indices.clear(); - row_groups.clear(); + row_group_batches.clear(); while (!pending_chunks.empty()) pending_chunks.pop(); - row_groups_completed = 0; + row_group_batches_completed = 0; previous_block_missing_values.clear(); - row_groups_started = 0; + row_group_batches_started = 0; background_exception = nullptr; is_stopped = false; @@ -378,12 +729,19 @@ ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings { } +void ParquetSchemaReader::initializeIfNeeded() +{ + if (arrow_file) + return; + + std::atomic is_stopped{0}; + arrow_file = asArrowFile(in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES, /* avoid_buffering */ true); + metadata = parquet::ReadMetaData(arrow_file); +} + NamesAndTypesList ParquetSchemaReader::readSchema() { - std::atomic is_stopped{0}; - auto file = asArrowFile(in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES, /* avoid_buffering */ true); - - auto metadata = parquet::ReadMetaData(file); + initializeIfNeeded(); std::shared_ptr schema; THROW_ARROW_NOT_OK(parquet::arrow::FromParquetSchema(metadata->schema(), &schema)); @@ -395,6 +753,12 @@ NamesAndTypesList ParquetSchemaReader::readSchema() return header.getNamesAndTypesList(); } +std::optional ParquetSchemaReader::readNumberOrRows() +{ + initializeIfNeeded(); + return metadata->num_rows(); +} + void registerInputFormatParquet(FormatFactory & factory) { factory.registerRandomAccessInputFormat( @@ -407,7 +771,7 @@ void registerInputFormatParquet(FormatFactory & factory) size_t /* max_download_threads */, size_t max_parsing_threads) { - size_t min_bytes_for_seek = is_remote_fs ? read_settings.remote_read_min_bytes_for_seek : 8 * 1024; + size_t min_bytes_for_seek = is_remote_fs ? read_settings.remote_read_min_bytes_for_seek : settings.parquet.local_read_min_bytes_for_seek; return std::make_shared( buf, sample, @@ -415,7 +779,6 @@ void registerInputFormatParquet(FormatFactory & factory) max_parsing_threads, min_bytes_for_seek); }); - factory.markFormatSupportsSubcolumns("Parquet"); factory.markFormatSupportsSubsetOfColumns("Parquet"); } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index a14c51f8b9f..c102dbee0f4 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace parquet { class FileMetaData; } namespace parquet::arrow { class FileReader; } @@ -52,8 +53,11 @@ public: const FormatSettings & format_settings, size_t max_decoding_threads, size_t min_bytes_for_seek); + ~ParquetBlockInputFormat() override; + void setQueryInfo(const SelectQueryInfo & query_info, ContextPtr context) override; + void resetParser() override; String getName() const override { return "ParquetBlockInputFormat"; } @@ -71,14 +75,14 @@ private: } void initializeIfNeeded(); - void initializeRowGroupReader(size_t row_group_idx); + void initializeRowGroupBatchReader(size_t row_group_batch_idx); - void decodeOneChunk(size_t row_group_idx, std::unique_lock & lock); + void decodeOneChunk(size_t row_group_batch_idx, std::unique_lock & lock); - void scheduleMoreWorkIfNeeded(std::optional row_group_touched = std::nullopt); - void scheduleRowGroup(size_t row_group_idx); + void scheduleMoreWorkIfNeeded(std::optional row_group_batch_touched = std::nullopt); + void scheduleRowGroup(size_t row_group_batch_idx); - void threadFunction(size_t row_group_idx); + void threadFunction(size_t row_group_batch_idx); // Data layout in the file: // @@ -165,7 +169,7 @@ private: // * The max_pending_chunks_per_row_group limit could be based on actual memory usage too. // Useful for preserve_order. - struct RowGroupState + struct RowGroupBatchState { // Transitions: // @@ -202,8 +206,10 @@ private: size_t next_chunk_idx = 0; size_t num_pending_chunks = 0; - size_t row_group_bytes_uncompressed = 0; - size_t row_group_rows = 0; + size_t total_rows = 0; + size_t total_bytes_compressed = 0; + + std::vector row_groups_idxs; // These are only used by the decoding thread, so don't require locking the mutex. std::unique_ptr file_reader; @@ -217,7 +223,7 @@ private: Chunk chunk; BlockMissingValues block_missing_values; size_t chunk_idx; // within row group - size_t row_group_idx; + size_t row_group_batch_idx; size_t approx_original_chunk_size; // For priority_queue. @@ -230,8 +236,8 @@ private: bool operator()(const PendingChunk & a, const PendingChunk & b) const { auto tuplificate = [this](const PendingChunk & c) - { return row_group_first ? std::tie(c.row_group_idx, c.chunk_idx) - : std::tie(c.chunk_idx, c.row_group_idx); }; + { return row_group_first ? std::tie(c.row_group_batch_idx, c.chunk_idx) + : std::tie(c.chunk_idx, c.row_group_batch_idx); }; return tuplificate(a) > tuplificate(b); } }; @@ -241,14 +247,17 @@ private: const std::unordered_set & skip_row_groups; size_t max_decoding_threads; size_t min_bytes_for_seek; - const size_t max_pending_chunks_per_row_group = 2; + const size_t max_pending_chunks_per_row_group_batch = 2; - // RandomAccessFile is thread safe, so we share it among threads. - // FileReader is not, so each thread creates its own. + /// RandomAccessFile is thread safe, so we share it among threads. + /// FileReader is not, so each thread creates its own. std::shared_ptr arrow_file; std::shared_ptr metadata; - // indices of columns to read from Parquet file + /// Indices of columns to read from Parquet file. std::vector column_indices; + /// Pushed-down filter that we'll use to skip row groups. + std::optional key_condition; + // Window of active row groups: // @@ -264,16 +273,16 @@ private: // Wakes up the generate() call, if any. std::condition_variable condvar; - std::vector row_groups; + std::vector row_group_batches; std::priority_queue, PendingChunk::Compare> pending_chunks; - size_t row_groups_completed = 0; + size_t row_group_batches_completed = 0; // These are only used when max_decoding_threads > 1. - size_t row_groups_started = 0; + size_t row_group_batches_started = 0; std::unique_ptr pool; BlockMissingValues previous_block_missing_values; - size_t previous_approx_bytes_read_for_chunk; + size_t previous_approx_bytes_read_for_chunk = 0; std::exception_ptr background_exception = nullptr; std::atomic is_stopped{0}; @@ -286,9 +295,14 @@ public: ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_); NamesAndTypesList readSchema() override; + std::optional readNumberOrRows() override; private: + void initializeIfNeeded(); + const FormatSettings format_settings; + std::shared_ptr arrow_file; + std::shared_ptr metadata; }; } diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 91840cd2c50..fbf8b3a7c87 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -3,14 +3,23 @@ #if USE_PARQUET #include +#include #include #include "ArrowBufferedStreams.h" #include "CHColumnToArrowColumn.h" +namespace CurrentMetrics +{ + extern const Metric ParquetEncoderThreads; + extern const Metric ParquetEncoderThreadsActive; +} + namespace DB { +using namespace Parquet; + namespace ErrorCodes { extern const int UNKNOWN_EXCEPTION; @@ -59,19 +68,229 @@ namespace if (method == FormatSettings::ParquetCompression::GZIP) return parquet::Compression::type::GZIP; - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported parquet compression method"); } - } ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) : IOutputFormat(header_, out_), format_settings{format_settings_} { + if (format_settings.parquet.use_custom_encoder) + { + if (format_settings.parquet.parallel_encoding && format_settings.max_threads > 1) + pool = std::make_unique( + CurrentMetrics::ParquetEncoderThreads, CurrentMetrics::ParquetEncoderThreadsActive, + format_settings.max_threads); + + using C = FormatSettings::ParquetCompression; + switch (format_settings.parquet.output_compression_method) + { + case C::NONE: options.compression = CompressionMethod::None; break; + case C::SNAPPY: options.compression = CompressionMethod::Snappy; break; + case C::ZSTD: options.compression = CompressionMethod::Zstd; break; + case C::LZ4: options.compression = CompressionMethod::Lz4; break; + case C::GZIP: options.compression = CompressionMethod::Gzip; break; + case C::BROTLI: options.compression = CompressionMethod::Brotli; break; + } + options.output_string_as_string = format_settings.parquet.output_string_as_string; + options.output_fixed_string_as_fixed_byte_array = format_settings.parquet.output_fixed_string_as_fixed_byte_array; + options.data_page_size = format_settings.parquet.data_page_size; + options.write_batch_size = format_settings.parquet.write_batch_size; + + schema = convertSchema(header_, options); + } } -void ParquetBlockOutputFormat::consumeStaged() +ParquetBlockOutputFormat::~ParquetBlockOutputFormat() { - const size_t columns_num = staging_chunks.at(0).getNumColumns(); + if (pool) + { + is_stopped = true; + pool->wait(); + } +} + +void ParquetBlockOutputFormat::consume(Chunk chunk) +{ + /// Poll background tasks. + if (pool) + { + std::unique_lock lock(mutex); + while (true) + { + /// If some row groups are ready to be written to the file, write them. + reapCompletedRowGroups(lock); + + if (background_exception) + std::rethrow_exception(background_exception); + + if (is_stopped) + return; + + /// If there's too much work in flight, wait for some of it to complete. + if (row_groups.size() < 2) + break; + if (bytes_in_flight <= format_settings.parquet.row_group_bytes * 4 && + task_queue.size() <= format_settings.max_threads * 4) + break; + + condvar.wait(lock); + } + } + + /// Do something like SquashingTransform to produce big enough row groups. + /// Because the real SquashingTransform is only used for INSERT, not for SELECT ... INTO OUTFILE. + /// The latter doesn't even have a pipeline where a transform could be inserted, so it's more + /// convenient to do the squashing here. It's also parallelized here. + + if (chunk.getNumRows() != 0) + { + staging_rows += chunk.getNumRows(); + staging_bytes += chunk.bytes(); + staging_chunks.push_back(std::move(chunk)); + } + + const size_t target_rows = std::max(static_cast(1), format_settings.parquet.row_group_rows); + + if (staging_rows < target_rows && + staging_bytes < format_settings.parquet.row_group_bytes) + return; + + /// In the rare case that more than `row_group_rows` rows arrived in one chunk, split the + /// staging chunk into multiple row groups. + if (staging_rows >= target_rows * 2) + { + /// Increase row group size slightly (by < 2x) to avoid a small row group at the end. + size_t num_row_groups = std::max(static_cast(1), staging_rows / target_rows); + size_t row_group_size = (staging_rows - 1) / num_row_groups + 1; // round up + + Chunk concatenated = std::move(staging_chunks[0]); + for (size_t i = 1; i < staging_chunks.size(); ++i) + concatenated.append(staging_chunks[i]); + staging_chunks.clear(); + + for (size_t offset = 0; offset < staging_rows; offset += row_group_size) + { + size_t count = std::min(row_group_size, staging_rows - offset); + MutableColumns columns = concatenated.cloneEmptyColumns(); + for (size_t i = 0; i < columns.size(); ++i) + columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count); + + Chunks piece; + piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo()); + writeRowGroup(std::move(piece)); + } + } + else + { + writeRowGroup(std::move(staging_chunks)); + } + + staging_chunks.clear(); + staging_rows = 0; + staging_bytes = 0; +} + +void ParquetBlockOutputFormat::finalizeImpl() +{ + if (!staging_chunks.empty()) + writeRowGroup(std::move(staging_chunks)); + + if (format_settings.parquet.use_custom_encoder) + { + if (pool) + { + std::unique_lock lock(mutex); + + /// Wait for background work to complete. + while (true) + { + reapCompletedRowGroups(lock); + + if (background_exception) + std::rethrow_exception(background_exception); + + if (is_stopped) + return; + + if (row_groups.empty()) + break; + + condvar.wait(lock); + } + } + + if (row_groups_complete.empty()) + { + base_offset = out.count(); + writeFileHeader(out); + } + writeFileFooter(std::move(row_groups_complete), schema, options, out); + } + else + { + if (!file_writer) + { + Block header = materializeBlock(getPort(PortKind::Main).getHeader()); + std::vector chunks; + chunks.push_back(Chunk(header.getColumns(), 0)); + writeRowGroup(std::move(chunks)); + } + + if (file_writer) + { + auto status = file_writer->Close(); + if (!status.ok()) + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while closing a table: {}", status.ToString()); + } + } +} + +void ParquetBlockOutputFormat::resetFormatterImpl() +{ + if (pool) + { + is_stopped = true; + pool->wait(); + is_stopped = false; + } + + background_exception = nullptr; + threads_running = 0; + task_queue.clear(); + row_groups.clear(); + file_writer.reset(); + row_groups_complete.clear(); + staging_chunks.clear(); + staging_rows = 0; + staging_bytes = 0; +} + +void ParquetBlockOutputFormat::onCancel() +{ + is_stopped = true; +} + +void ParquetBlockOutputFormat::writeRowGroup(std::vector chunks) +{ + if (pool) + writeRowGroupInParallel(std::move(chunks)); + else if (!format_settings.parquet.use_custom_encoder) + writeUsingArrow(std::move(chunks)); + else + { + Chunk concatenated = std::move(chunks[0]); + for (size_t i = 1; i < chunks.size(); ++i) + concatenated.append(chunks[i]); + chunks.clear(); + + writeRowGroupInOneThread(std::move(concatenated)); + } +} + +void ParquetBlockOutputFormat::writeUsingArrow(std::vector chunks) +{ + const size_t columns_num = chunks.at(0).getNumColumns(); std::shared_ptr arrow_table; if (!ch_column_to_arrow_column) @@ -85,7 +304,7 @@ void ParquetBlockOutputFormat::consumeStaged() format_settings.parquet.output_fixed_string_as_fixed_byte_array); } - ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, staging_chunks, columns_num); + ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, chunks, columns_num); if (!file_writer) { @@ -112,64 +331,234 @@ void ParquetBlockOutputFormat::consumeStaged() file_writer = std::move(result.ValueOrDie()); } - // TODO: calculate row_group_size depending on a number of rows and table size - - // allow slightly bigger than row_group_size to avoid a very small tail row group - auto status = file_writer->WriteTable(*arrow_table, std::max(format_settings.parquet.row_group_rows, staging_rows)); + auto status = file_writer->WriteTable(*arrow_table, INT64_MAX); if (!status.ok()) throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while writing a table: {}", status.ToString()); } -void ParquetBlockOutputFormat::consume(Chunk chunk) +void ParquetBlockOutputFormat::writeRowGroupInOneThread(Chunk chunk) { - /// Do something like SquashingTransform to produce big enough row groups. - /// Because the real SquashingTransform is only used for INSERT, not for SELECT ... INTO OUTFILE. - /// The latter doesn't even have a pipeline where a transform could be inserted, so it's more - /// convenient to do the squashing here. - staging_rows += chunk.getNumRows(); - staging_bytes += chunk.bytes(); - staging_chunks.push_back(std::move(chunk)); - chassert(staging_chunks.back().getNumColumns() == staging_chunks.front().getNumColumns()); - if (staging_rows < format_settings.parquet.row_group_rows && - staging_bytes < format_settings.parquet.row_group_bytes) - { + if (chunk.getNumRows() == 0) return; - } - else + + const Block & header = getPort(PortKind::Main).getHeader(); + Parquet::ColumnChunkWriteStates columns_to_write; + chassert(header.columns() == chunk.getNumColumns()); + for (size_t i = 0; i < header.columns(); ++i) + prepareColumnForWrite( + chunk.getColumns()[i], header.getByPosition(i).type, header.getByPosition(i).name, + options, &columns_to_write); + + if (row_groups_complete.empty()) { - consumeStaged(); - staging_chunks.clear(); - staging_rows = 0; - staging_bytes = 0; + base_offset = out.count(); + writeFileHeader(out); + } + + std::vector column_chunks; + for (auto & s : columns_to_write) + { + size_t offset = out.count() - base_offset; + writeColumnChunkBody(s, options, out); + auto c = finalizeColumnChunkAndWriteFooter(offset, std::move(s), options, out); + column_chunks.push_back(std::move(c)); + } + + auto r = makeRowGroup(std::move(column_chunks), chunk.getNumRows()); + row_groups_complete.push_back(std::move(r)); +} + +void ParquetBlockOutputFormat::writeRowGroupInParallel(std::vector chunks) +{ + std::unique_lock lock(mutex); + + const Block & header = getPort(PortKind::Main).getHeader(); + + RowGroupState & r = row_groups.emplace_back(); + r.column_chunks.resize(header.columns()); + r.tasks_in_flight = r.column_chunks.size(); + + std::vector columnses; + for (auto & chunk : chunks) + { + chassert(header.columns() == chunk.getNumColumns()); + r.num_rows += chunk.getNumRows(); + columnses.push_back(chunk.detachColumns()); + } + + for (size_t i = 0; i < header.columns(); ++i) + { + Task & t = task_queue.emplace_back(&r, i, this); + t.column_type = header.getByPosition(i).type; + t.column_name = header.getByPosition(i).name; + + /// Defer concatenating the columns to the threads. + size_t bytes = 0; + for (size_t j = 0; j < chunks.size(); ++j) + { + auto & col = columnses[j][i]; + bytes += col->allocatedBytes(); + t.column_pieces.push_back(std::move(col)); + } + t.mem.set(bytes); + } + + startMoreThreadsIfNeeded(lock); +} + +void ParquetBlockOutputFormat::reapCompletedRowGroups(std::unique_lock & lock) +{ + while (!row_groups.empty() && row_groups.front().tasks_in_flight == 0 && !is_stopped) + { + RowGroupState & r = row_groups.front(); + + /// Write to the file. + + lock.unlock(); + + if (row_groups_complete.empty()) + { + base_offset = out.count(); + writeFileHeader(out); + } + + std::vector metadata; + for (auto & cols : r.column_chunks) + { + for (ColumnChunk & col : cols) + { + size_t offset = out.count() - base_offset; + + out.write(col.serialized.data(), col.serialized.size()); + auto m = finalizeColumnChunkAndWriteFooter(offset, std::move(col.state), options, out); + + metadata.push_back(std::move(m)); + } + } + + row_groups_complete.push_back(makeRowGroup(std::move(metadata), r.num_rows)); + + lock.lock(); + + row_groups.pop_front(); } } -void ParquetBlockOutputFormat::finalizeImpl() +void ParquetBlockOutputFormat::startMoreThreadsIfNeeded(const std::unique_lock &) { - if (!file_writer && staging_chunks.empty()) + /// Speculate that all current are already working on tasks. + size_t to_add = std::min(task_queue.size(), format_settings.max_threads - threads_running); + for (size_t i = 0; i < to_add; ++i) { - Block header = materializeBlock(getPort(PortKind::Main).getHeader()); + auto job = [this, thread_group = CurrentThread::getGroup()]() + { + if (thread_group) + CurrentThread::attachToGroupIfDetached(thread_group); + SCOPE_EXIT_SAFE(if (thread_group) CurrentThread::detachFromGroupIfNotDetached();); - consume(Chunk(header.getColumns(), 0)); // this will make staging_chunks non-empty + try + { + setThreadName("ParquetEncoder"); + + threadFunction(); + } + catch (...) + { + std::lock_guard lock(mutex); + background_exception = std::current_exception(); + condvar.notify_all(); + --threads_running; + } + }; + + if (threads_running == 0) + { + /// First thread. We need it to succeed; otherwise we may get stuck. + pool->scheduleOrThrowOnError(job); + ++threads_running; + } + else + { + /// More threads. This may be called from inside the thread pool, so avoid waiting; + /// otherwise it may deadlock. + if (!pool->trySchedule(job)) + break; + } } - - if (!staging_chunks.empty()) - { - consumeStaged(); - staging_chunks.clear(); - staging_rows = 0; - staging_bytes = 0; - } - - auto status = file_writer->Close(); - if (!status.ok()) - throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while closing a table: {}", status.ToString()); } -void ParquetBlockOutputFormat::resetFormatterImpl() +void ParquetBlockOutputFormat::threadFunction() { - file_writer.reset(); + std::unique_lock lock(mutex); + + while (true) + { + if (task_queue.empty() || is_stopped) + { + /// The check and the decrement need to be in the same critical section, to make sure + /// we never get stuck with tasks but no threads. + --threads_running; + return; + } + + auto task = std::move(task_queue.front()); + task_queue.pop_front(); + + if (task.column_type) + { + lock.unlock(); + + IColumn::MutablePtr concatenated = IColumn::mutate(std::move(task.column_pieces[0])); + for (size_t i = 1; i < task.column_pieces.size(); ++i) + { + auto & c = task.column_pieces[i]; + concatenated->insertRangeFrom(*c, 0, c->size()); + c.reset(); + } + task.column_pieces.clear(); + + std::vector subcolumns; + prepareColumnForWrite( + std::move(concatenated), task.column_type, task.column_name, options, &subcolumns); + + lock.lock(); + + for (size_t i = 0; i < subcolumns.size(); ++i) + { + task.row_group->column_chunks[task.column_idx].emplace_back(this); + task.row_group->tasks_in_flight += 1; + + auto & t = task_queue.emplace_back(task.row_group, task.column_idx, this); + t.subcolumn_idx = i; + t.state = std::move(subcolumns[i]); + t.mem.set(t.state.allocatedBytes()); + } + + startMoreThreadsIfNeeded(lock); + } + else + { + lock.unlock(); + + PODArray serialized; + { + WriteBufferFromVector buf(serialized); + writeColumnChunkBody(task.state, options, buf); + } + + lock.lock(); + + auto & c = task.row_group->column_chunks[task.column_idx][task.subcolumn_idx]; + c.state = std::move(task.state); + c.serialized = std::move(serialized); + c.mem.set(c.serialized.size() + c.state.allocatedBytes()); + } + + --task.row_group->tasks_in_flight; + + condvar.notify_all(); + } } void registerOutputFormatParquet(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h index 482c778bc52..aededc39dc4 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h @@ -2,8 +2,11 @@ #include "config.h" #if USE_PARQUET -# include -# include + +#include +#include +#include +#include namespace arrow { @@ -28,25 +31,129 @@ class ParquetBlockOutputFormat : public IOutputFormat { public: ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); + ~ParquetBlockOutputFormat() override; String getName() const override { return "ParquetBlockOutputFormat"; } String getContentType() const override { return "application/octet-stream"; } private: - void consumeStaged(); + struct MemoryToken + { + ParquetBlockOutputFormat * parent; + size_t bytes = 0; + + explicit MemoryToken(ParquetBlockOutputFormat * p, size_t b = 0) : parent(p) + { + set(b); + } + + MemoryToken(MemoryToken && t) + : parent(std::exchange(t.parent, nullptr)), bytes(std::exchange(t.bytes, 0)) {} + + MemoryToken & operator=(MemoryToken && t) + { + parent = std::exchange(t.parent, nullptr); + bytes = std::exchange(t.bytes, 0); + return *this; + } + + ~MemoryToken() + { + set(0); + } + + void set(size_t new_size) + { + if (new_size == bytes) + return; + parent->bytes_in_flight += new_size - bytes; // overflow is fine + bytes = new_size; + } + }; + + struct ColumnChunk + { + Parquet::ColumnChunkWriteState state; + PODArray serialized; + + MemoryToken mem; + + ColumnChunk(ParquetBlockOutputFormat * p) : mem(p) {} + }; + + struct RowGroupState + { + size_t tasks_in_flight = 0; + std::vector> column_chunks; + size_t num_rows = 0; + }; + + struct Task + { + RowGroupState * row_group; + size_t column_idx; + size_t subcolumn_idx = 0; + + MemoryToken mem; + + /// If not null, we need to call prepareColumnForWrite(). + /// Otherwise we need to call writeColumnChunkBody(). + DataTypePtr column_type; + std::string column_name; + std::vector column_pieces; + + Parquet::ColumnChunkWriteState state; + + Task(RowGroupState * rg, size_t ci, ParquetBlockOutputFormat * p) + : row_group(rg), column_idx(ci), mem(p) {} + }; + void consume(Chunk) override; void finalizeImpl() override; void resetFormatterImpl() override; + void onCancel() override; + void writeRowGroup(std::vector chunks); + void writeUsingArrow(std::vector chunks); + void writeRowGroupInOneThread(Chunk chunk); + void writeRowGroupInParallel(std::vector chunks); + + void threadFunction(); + void startMoreThreadsIfNeeded(const std::unique_lock & lock); + + /// Called in single-threaded fashion. Writes to the file. + void reapCompletedRowGroups(std::unique_lock & lock); + + const FormatSettings format_settings; + + /// Chunks to squash together to form a row group. std::vector staging_chunks; size_t staging_rows = 0; size_t staging_bytes = 0; - const FormatSettings format_settings; - std::unique_ptr file_writer; std::unique_ptr ch_column_to_arrow_column; + + Parquet::WriteOptions options; + Parquet::SchemaElements schema; + std::vector row_groups_complete; + size_t base_offset = 0; + + + std::mutex mutex; + std::condition_variable condvar; // wakes up consume() + std::unique_ptr pool; + + std::atomic_bool is_stopped{false}; + std::exception_ptr background_exception = nullptr; + + /// Invariant: if there's at least one task then there's at least one thread. + size_t threads_running = 0; + std::atomic bytes_in_flight{0}; + + std::deque task_queue; + std::deque row_groups; }; } diff --git a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp index 229a0630328..043e6d2260c 100644 --- a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp @@ -180,7 +180,7 @@ Chunk ParquetMetadataInputFormat::generate() else if (name == names[3]) { auto column = types[3]->createColumn(); - /// Version сan be only PARQUET_1_0 or PARQUET_2_LATEST (which is 2.6). + /// Version can be only PARQUET_1_0 or PARQUET_2_LATEST (which is 2.6). String version = metadata->version() == parquet::ParquetVersion::PARQUET_1_0 ? "1.0" : "2.6"; assert_cast(*column).insertData(version.data(), version.size()); res.addColumn(std::move(column)); @@ -504,7 +504,6 @@ void registerInputFormatParquetMetadata(FormatFactory & factory) { return std::make_shared(buf, sample, settings); }); - factory.markFormatSupportsSubcolumns("ParquetMetadata"); factory.markFormatSupportsSubsetOfColumns("ParquetMetadata"); } diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 14648e68f94..6fa891297f6 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -134,7 +134,8 @@ void PrettyBlockOutputFormat::write(Chunk chunk, PortKind port_kind) { if (total_rows >= format_settings.pretty.max_rows) { - total_rows += chunk.getNumRows(); + if (port_kind != PortKind::PartialResult) + total_rows += chunk.getNumRows(); return; } if (mono_block) @@ -315,7 +316,8 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind } writeString(bottom_separator_s, out); - total_rows += num_rows; + if (port_kind != PortKind::PartialResult) + total_rows += num_rows; } @@ -388,6 +390,34 @@ void PrettyBlockOutputFormat::consumeExtremes(Chunk chunk) write(std::move(chunk), PortKind::Extremes); } +void PrettyBlockOutputFormat::clearLastLines(size_t lines_number) +{ + /// http://en.wikipedia.org/wiki/ANSI_escape_code + #define MOVE_TO_PREV_LINE "\033[A" + #define CLEAR_TO_END_OF_LINE "\033[K" + + static const char * clear_prev_line = MOVE_TO_PREV_LINE \ + CLEAR_TO_END_OF_LINE; + + /// Move cursor to the beginning of line + writeCString("\r", out); + + for (size_t line = 0; line < lines_number; ++line) + { + writeCString(clear_prev_line, out); + } +} + +void PrettyBlockOutputFormat::consumePartialResult(Chunk chunk) +{ + if (prev_partial_block_rows > 0) + /// number of rows + header line + footer line + clearLastLines(prev_partial_block_rows + 2); + + prev_partial_block_rows = chunk.getNumRows(); + write(std::move(chunk), PortKind::PartialResult); +} + void PrettyBlockOutputFormat::writeMonoChunkIfNeeded() { diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h index dfb23ac63f9..92466dce3ff 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h @@ -28,7 +28,12 @@ protected: void consumeTotals(Chunk) override; void consumeExtremes(Chunk) override; + void clearLastLines(size_t lines_number); + void consumePartialResult(Chunk) override; + size_t total_rows = 0; + size_t prev_partial_block_rows = 0; + size_t row_number_width = 7; // "10000. " const FormatSettings format_settings; @@ -55,6 +60,7 @@ protected: void resetFormatterImpl() override { total_rows = 0; + prev_partial_block_rows = 0; } private: diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index 2ba9ec725e2..3a04d86b1ad 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -194,7 +194,8 @@ void PrettyCompactBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind po writeBottom(max_widths); - total_rows += num_rows; + if (port_kind != PortKind::PartialResult) + total_rows += num_rows; } diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp index 6098923a195..62d70689ddf 100644 --- a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp @@ -14,7 +14,7 @@ ProtobufListInputFormat::ProtobufListInputFormat( ReadBuffer & in_, const Block & header_, const Params & params_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool flatten_google_wrappers_) : IRowInputFormat(header_, in_, params_) , reader(std::make_unique(in_)) @@ -22,7 +22,7 @@ ProtobufListInputFormat::ProtobufListInputFormat( header_.getNames(), header_.getDataTypes(), missing_column_indices, - *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::Yes), + *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes), /* with_length_delimiter = */ true, /* with_envelope = */ true, flatten_google_wrappers_, @@ -57,6 +57,30 @@ bool ProtobufListInputFormat::readRow(MutableColumns & columns, RowReadExtension return true; } +size_t ProtobufListInputFormat::countRows(size_t max_block_size) +{ + if (getTotalRows() == 0) + reader->startMessage(true); + + if (reader->eof()) + { + reader->endMessage(false); + return 0; + } + + size_t num_rows = 0; + while (!reader->eof() && num_rows < max_block_size) + { + int tag; + reader->readFieldNumber(tag); + reader->startNestedMessage(); + reader->endNestedMessage(); + ++num_rows; + } + + return num_rows; +} + ProtobufListSchemaReader::ProtobufListSchemaReader(const FormatSettings & format_settings) : schema_info( format_settings.schema.format_schema, @@ -84,7 +108,7 @@ void registerInputFormatProtobufList(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings, "Protobuf", true), settings.protobuf.input_flatten_google_wrappers); + ProtobufSchemaInfo(settings, "Protobuf", sample, settings.protobuf.use_autogenerated_schema), settings.protobuf.input_flatten_google_wrappers); }); factory.markFormatSupportsSubsetOfColumns("ProtobufList"); factory.registerAdditionalInfoForSchemaCacheGetter( diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.h b/src/Processors/Formats/Impl/ProtobufListInputFormat.h index ba2e8014878..4d0478087b6 100644 --- a/src/Processors/Formats/Impl/ProtobufListInputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.h @@ -28,7 +28,7 @@ public: ReadBuffer & in_, const Block & header_, const Params & params_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool flatten_google_wrappers_); String getName() const override { return "ProtobufListInputFormat"; } @@ -38,6 +38,9 @@ public: private: bool readRow(MutableColumns & columns, RowReadExtension & row_read_extension) override; + bool supportsCountRows() const override { return true; } + size_t countRows(size_t max_block_size) override; + std::unique_ptr reader; std::vector missing_column_indices; std::unique_ptr serializer; diff --git a/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp index 815b9ebb61d..ae0b9db7357 100644 --- a/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp @@ -2,7 +2,6 @@ #if USE_PROTOBUF # include -# include # include # include # include @@ -13,14 +12,14 @@ namespace DB ProtobufListOutputFormat::ProtobufListOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool defaults_for_nullable_google_wrappers_) : IRowOutputFormat(header_, out_) , writer(std::make_unique(out)) , serializer(ProtobufSerializer::create( header_.getNames(), header_.getDataTypes(), - *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::Yes), + *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes), /* with_length_delimiter = */ true, /* with_envelope = */ true, defaults_for_nullable_google_wrappers_, @@ -55,7 +54,7 @@ void registerOutputFormatProtobufList(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared( - buf, header, FormatSchemaInfo(settings, "Protobuf", true), + buf, header, ProtobufSchemaInfo(settings, "Protobuf", header, settings.protobuf.use_autogenerated_schema), settings.protobuf.output_nullables_with_google_wrappers); }); } diff --git a/src/Processors/Formats/Impl/ProtobufListOutputFormat.h b/src/Processors/Formats/Impl/ProtobufListOutputFormat.h index d85018c0351..e7765590d51 100644 --- a/src/Processors/Formats/Impl/ProtobufListOutputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufListOutputFormat.h @@ -4,10 +4,10 @@ #if USE_PROTOBUF # include +# include namespace DB { -class FormatSchemaInfo; class ProtobufWriter; class ProtobufSerializer; @@ -26,7 +26,7 @@ public: ProtobufListOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool defaults_for_nullable_google_wrappers_); String getName() const override { return "ProtobufListOutputFormat"; } diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index 126f3673571..3cba8004f23 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -11,9 +11,9 @@ namespace DB { ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, - const FormatSchemaInfo & schema_info_, bool with_length_delimiter_, bool flatten_google_wrappers_) + const ProtobufSchemaInfo & schema_info_, bool with_length_delimiter_, bool flatten_google_wrappers_) : IRowInputFormat(header_, in_, params_) - , message_descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::No)) + , message_descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No)) , with_length_delimiter(with_length_delimiter_) , flatten_google_wrappers(flatten_google_wrappers_) { @@ -78,6 +78,22 @@ void ProtobufRowInputFormat::resetParser() reader.reset(); } +size_t ProtobufRowInputFormat::countRows(size_t max_block_size) +{ + if (!reader) + createReaderAndSerializer(); + + size_t num_rows = 0; + while (!reader->eof() && num_rows < max_block_size) + { + reader->startMessage(with_length_delimiter); + reader->endMessage(false); + ++num_rows; + } + + return num_rows; +} + void registerInputFormatProtobuf(FormatFactory & factory) { for (bool with_length_delimiter : {false, true}) @@ -89,7 +105,7 @@ void registerInputFormatProtobuf(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings, "Protobuf", true), + ProtobufSchemaInfo(settings, "Protobuf", sample, settings.protobuf.use_autogenerated_schema), with_length_delimiter, settings.protobuf.input_flatten_google_wrappers); }); diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h index 5c042f7c5ab..3f118227928 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h @@ -33,7 +33,7 @@ public: ReadBuffer & in_, const Block & header_, const Params & params_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool with_length_delimiter_, bool flatten_google_wrappers_); @@ -47,6 +47,9 @@ private: bool allowSyncAfterError() const override; void syncAfterError() override; + bool supportsCountRows() const override { return true; } + size_t countRows(size_t max_block_size) override; + void createReaderAndSerializer(); std::unique_ptr reader; diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp index 129c9ca3156..7b4cc1bf0be 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp @@ -3,7 +3,6 @@ #if USE_PROTOBUF # include # include -# include # include # include # include @@ -20,7 +19,7 @@ namespace ErrorCodes ProtobufRowOutputFormat::ProtobufRowOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, const FormatSettings & settings_, bool with_length_delimiter_) : IRowOutputFormat(header_, out_) @@ -28,7 +27,7 @@ ProtobufRowOutputFormat::ProtobufRowOutputFormat( , serializer(ProtobufSerializer::create( header_.getNames(), header_.getDataTypes(), - *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::No), + *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No), with_length_delimiter_, /* with_envelope = */ false, settings_.protobuf.output_nullables_with_google_wrappers, @@ -61,7 +60,7 @@ void registerOutputFormatProtobuf(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared( - buf, header, FormatSchemaInfo(settings, "Protobuf", true), + buf, header, ProtobufSchemaInfo(settings, "Protobuf", header, settings.protobuf.use_autogenerated_schema), settings, with_length_delimiter); }); } diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h index f6ff5bae999..213e1c785fd 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h @@ -4,11 +4,11 @@ #if USE_PROTOBUF # include +# include namespace DB { class DB; -class FormatSchemaInfo; class ProtobufSerializer; class ProtobufWriter; class WriteBuffer; @@ -30,7 +30,7 @@ public: ProtobufRowOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, const FormatSettings & settings_, bool with_length_delimiter_); diff --git a/src/Processors/Formats/Impl/RawBLOBRowInputFormat.cpp b/src/Processors/Formats/Impl/RawBLOBRowInputFormat.cpp index 8bc9bb5e2a3..6e8000af563 100644 --- a/src/Processors/Formats/Impl/RawBLOBRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RawBLOBRowInputFormat.cpp @@ -39,6 +39,15 @@ bool RawBLOBRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & return false; } +size_t RawBLOBRowInputFormat::countRows(size_t) +{ + if (done_count_rows) + return 0; + + done_count_rows = true; + return 1; +} + void registerInputFormatRawBLOB(FormatFactory & factory) { factory.registerInputFormat("RawBLOB", []( diff --git a/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h b/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h index 6fc1f277015..1336da56179 100644 --- a/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h +++ b/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h @@ -22,6 +22,11 @@ public: private: bool readRow(MutableColumns & columns, RowReadExtension &) override; + + bool supportsCountRows() const override { return true; } + size_t countRows(size_t max_block_size) override; + + bool done_count_rows = false; }; class RawBLOBSchemaReader: public IExternalSchemaReader diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index d902a8be6a7..8e94a568b1e 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -143,7 +143,7 @@ RegexpSchemaReader::RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & { } -DataTypes RegexpSchemaReader::readRowAndGetDataTypes() +std::optional RegexpSchemaReader::readRowAndGetDataTypes() { if (buf.eof()) return {}; diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h index 2469774aaf9..7417d48d8c1 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h @@ -79,7 +79,7 @@ public: RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings); private: - DataTypes readRowAndGetDataTypes() override; + std::optional readRowAndGetDataTypes() override; void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index d3021110b46..f4f92583473 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -216,6 +216,18 @@ void TSKVRowInputFormat::resetParser() name_buf.clear(); } +size_t TSKVRowInputFormat::countRows(size_t max_block_size) +{ + size_t num_rows = 0; + while (!in->eof() && num_rows < max_block_size) + { + skipToUnescapedNextLineOrEOF(*in); + ++num_rows; + } + + return num_rows; +} + TSKVSchemaReader::TSKVSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : IRowWithNamesSchemaReader(in_, format_settings_, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::Escaped)) { diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.h b/src/Processors/Formats/Impl/TSKVRowInputFormat.h index 5130ee5e827..6ed553fdc74 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.h +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.h @@ -36,6 +36,9 @@ private: bool allowSyncAfterError() const override { return true; } void syncAfterError() override; + bool supportsCountRows() const override { return true; } + size_t countRows(size_t max_block_size) override; + const FormatSettings format_settings; /// Buffer for the read from the stream the field name. Used when you have to copy it. diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 2239c8539e3..3205adc2a48 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -300,6 +300,56 @@ bool TabSeparatedFormatReader::checkForSuffix() return false; } +void TabSeparatedFormatReader::skipRow() +{ + ReadBuffer & istr = *buf; + while (!istr.eof()) + { + char * pos; + if (is_raw) + pos = find_first_symbols<'\r', '\n'>(istr.position(), istr.buffer().end()); + else + pos = find_first_symbols<'\\', '\r', '\n'>(istr.position(), istr.buffer().end()); + + istr.position() = pos; + + if (istr.position() > istr.buffer().end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug."); + else if (pos == istr.buffer().end()) + continue; + + if (!is_raw && *istr.position() == '\\') + { + ++istr.position(); + if (!istr.eof()) + ++istr.position(); + continue; + } + + if (*istr.position() == '\n') + { + ++istr.position(); + if (!istr.eof() && *istr.position() == '\r') + ++istr.position(); + return; + } + else if (*istr.position() == '\r') + { + ++istr.position(); + if (!istr.eof() && *istr.position() == '\n') + { + ++istr.position(); + return; + } + } + } +} + +bool TabSeparatedFormatReader::checkForEndOfRow() +{ + return buf->eof() || *buf->position() == '\n'; +} + TabSeparatedSchemaReader::TabSeparatedSchemaReader( ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesSchemaReader( @@ -315,19 +365,22 @@ TabSeparatedSchemaReader::TabSeparatedSchemaReader( { } -std::pair, DataTypes> TabSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes() +std::optional, DataTypes>> TabSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes() { if (buf.eof()) return {}; auto fields = reader.readRow(); auto data_types = tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule()); - return {fields, data_types}; + return std::make_pair(fields, data_types); } -DataTypes TabSeparatedSchemaReader::readRowAndGetDataTypesImpl() +std::optional TabSeparatedSchemaReader::readRowAndGetDataTypesImpl() { - return readRowAndGetFieldsAndDataTypes().second; + auto fields_with_types = readRowAndGetFieldsAndDataTypes(); + if (!fields_with_types) + return {}; + return std::move(fields_with_types->second); } void registerInputFormatTabSeparated(FormatFactory & factory) @@ -411,11 +464,6 @@ static std::pair fileSegmentationEngineTabSeparatedImpl(ReadBuffer continue; } - ++number_of_rows; - if ((number_of_rows >= min_rows) - && ((memory.size() + static_cast(pos - in.position()) >= min_bytes) || (number_of_rows == max_rows))) - need_more_data = false; - if (*pos == '\n') { ++pos; @@ -427,7 +475,14 @@ static std::pair fileSegmentationEngineTabSeparatedImpl(ReadBuffer ++pos; if (loadAtPosition(in, memory, pos) && *pos == '\n') ++pos; + else + continue; } + + ++number_of_rows; + if ((number_of_rows >= min_rows) + && ((memory.size() + static_cast(pos - in.position()) >= min_bytes) || (number_of_rows == max_rows))) + need_more_data = false; } saveUpToPosition(in, memory, pos); diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h index 8df57675cf5..d84e8f3e0ac 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h @@ -33,6 +33,7 @@ private: void syncAfterError() override; bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\t'; } + bool supportsCountRows() const override { return true; } std::unique_ptr buf; }; @@ -59,6 +60,8 @@ public: std::vector readTypes() override { return readHeaderRow(); } std::vector readHeaderRow() { return readRowImpl(); } + void skipRow() override; + template String readFieldIntoString(); @@ -76,6 +79,9 @@ public: void setReadBuffer(ReadBuffer & in_) override; bool checkForSuffix() override; + bool checkForEndOfRow() override; + + bool allowVariableNumberOfColumns() const override { return format_settings.tsv.allow_variable_number_of_columns; } private: template @@ -92,8 +98,10 @@ public: TabSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings); private: - DataTypes readRowAndGetDataTypesImpl() override; - std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() override; + bool allowVariableNumberOfColumns() const override { return format_settings.tsv.allow_variable_number_of_columns; } + + std::optional readRowAndGetDataTypesImpl() override; + std::optional, DataTypes>> readRowAndGetFieldsAndDataTypes() override; PeekableReadBuffer buf; TabSeparatedFormatReader reader; diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index 8a09e800fa7..b065e00f5d1 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -490,7 +490,7 @@ TemplateSchemaReader::TemplateSchemaReader( setColumnNames(row_format.column_names); } -DataTypes TemplateSchemaReader::readRowAndGetDataTypes() +std::optional TemplateSchemaReader::readRowAndGetDataTypes() { if (first_row) format_reader.readPrefix(); diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h index 8f9088e2c47..2752cb13e50 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -119,7 +119,7 @@ public: std::string row_between_delimiter, const FormatSettings & format_settings_); - DataTypes readRowAndGetDataTypes() override; + std::optional readRowAndGetDataTypes() override; private: void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 3a65a6fe4ea..8126c472f70 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -54,66 +54,8 @@ ValuesBlockInputFormat::ValuesBlockInputFormat( { } -Chunk ValuesBlockInputFormat::generate() -{ - if (total_rows == 0) - readPrefix(); - - const Block & header = getPort().getHeader(); - MutableColumns columns = header.cloneEmptyColumns(); - block_missing_values.clear(); - size_t chunk_start = getDataOffsetMaybeCompressed(*buf); - - for (size_t rows_in_block = 0; rows_in_block < params.max_block_size; ++rows_in_block) - { - try - { - skipWhitespaceIfAny(*buf); - if (buf->eof() || *buf->position() == ';') - break; - readRow(columns, rows_in_block); - } - catch (Exception & e) - { - if (isParseError(e.code())) - e.addMessage(" at row " + std::to_string(total_rows)); - throw; - } - } - - approx_bytes_read_for_chunk = getDataOffsetMaybeCompressed(*buf) - chunk_start; - - /// Evaluate expressions, which were parsed using templates, if any - for (size_t i = 0; i < columns.size(); ++i) - { - if (!templates[i] || !templates[i]->rowsCount()) - continue; - - const auto & expected_type = header.getByPosition(i).type; - if (columns[i]->empty()) - columns[i] = IColumn::mutate(templates[i]->evaluateAll(block_missing_values, i, expected_type)); - else - { - ColumnPtr evaluated = templates[i]->evaluateAll(block_missing_values, i, expected_type, columns[i]->size()); - columns[i]->insertRangeFrom(*evaluated, 0, evaluated->size()); - } - } - - if (columns.empty() || columns[0]->empty()) - { - readSuffix(); - return {}; - } - - for (const auto & column : columns) - column->finalize(); - - size_t rows_in_block = columns[0]->size(); - return Chunk{std::move(columns), rows_in_block}; -} - /// Can be used in fileSegmentationEngine for parallel parsing of Values -static bool skipToNextRow(PeekableReadBuffer * buf, size_t min_chunk_bytes, int balance) +bool ValuesBlockInputFormat::skipToNextRow(ReadBuffer * buf, size_t min_chunk_bytes, int balance) { skipWhitespaceIfAny(*buf); if (buf->eof() || *buf->position() == ';') @@ -156,6 +98,80 @@ static bool skipToNextRow(PeekableReadBuffer * buf, size_t min_chunk_bytes, int return true; } +Chunk ValuesBlockInputFormat::generate() +{ + if (total_rows == 0) + readPrefix(); + + const Block & header = getPort().getHeader(); + MutableColumns columns = header.cloneEmptyColumns(); + block_missing_values.clear(); + size_t chunk_start = getDataOffsetMaybeCompressed(*buf); + + size_t rows_in_block = 0; + for (; rows_in_block < params.max_block_size; ++rows_in_block) + { + try + { + skipWhitespaceIfAny(*buf); + if (buf->eof() || *buf->position() == ';') + break; + if (need_only_count) + skipToNextRow(buf.get(), 1, 0); + else + readRow(columns, rows_in_block); + } + catch (Exception & e) + { + if (isParseError(e.code())) + e.addMessage(" at row " + std::to_string(total_rows)); + throw; + } + } + + approx_bytes_read_for_chunk = getDataOffsetMaybeCompressed(*buf) - chunk_start; + + if (need_only_count) + { + if (!rows_in_block) + { + readSuffix(); + return {}; + } + + total_rows += rows_in_block; + return getChunkForCount(rows_in_block); + } + + /// Evaluate expressions, which were parsed using templates, if any + for (size_t i = 0; i < columns.size(); ++i) + { + if (!templates[i] || !templates[i]->rowsCount()) + continue; + + const auto & expected_type = header.getByPosition(i).type; + if (columns[i]->empty()) + columns[i] = IColumn::mutate(templates[i]->evaluateAll(block_missing_values, i, expected_type)); + else + { + ColumnPtr evaluated = templates[i]->evaluateAll(block_missing_values, i, expected_type, columns[i]->size()); + columns[i]->insertRangeFrom(*evaluated, 0, evaluated->size()); + } + } + + if (columns.empty() || columns[0]->empty()) + { + readSuffix(); + return {}; + } + + for (const auto & column : columns) + column->finalize(); + + size_t rows = columns[0]->size(); + return Chunk{std::move(columns), rows}; +} + /// We need continuous memory containing the expression to use Lexer /// Note that this is both reading and tokenizing until the end of the row /// This is doing unnecessary work if the rest of the columns can be read with tryReadValue (which doesn't require tokens) @@ -638,7 +654,7 @@ ValuesSchemaReader::ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & { } -DataTypes ValuesSchemaReader::readRowAndGetDataTypes() +std::optional ValuesSchemaReader::readRowAndGetDataTypes() { if (first_row) { diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h index d540a24fa70..e8c3b555994 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h @@ -41,6 +41,9 @@ public: const BlockMissingValues & getMissingValues() const override { return block_missing_values; } size_t getApproxBytesReadForChunk() const override { return approx_bytes_read_for_chunk; } + + static bool skipToNextRow(ReadBuffer * buf, size_t min_chunk_bytes, int balance); + private: ValuesBlockInputFormat(std::unique_ptr buf_, const Block & header_, const RowInputFormatParams & params_, const FormatSettings & format_settings_); @@ -71,6 +74,8 @@ private: void readPrefix(); void readSuffix(); + size_t countRows(size_t max_block_size); + std::unique_ptr buf; std::optional token_iterator{}; std::optional tokens{}; @@ -96,7 +101,7 @@ private: Serializations serializations; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; }; class ValuesSchemaReader : public IRowSchemaReader @@ -105,7 +110,7 @@ public: ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings); private: - DataTypes readRowAndGetDataTypes() override; + std::optional readRowAndGetDataTypes() override; PeekableReadBuffer buf; ParserExpression parser; diff --git a/src/Processors/Formats/LazyOutputFormat.h b/src/Processors/Formats/LazyOutputFormat.h index 9cf609ed2d7..bbcfdbb7193 100644 --- a/src/Processors/Formats/LazyOutputFormat.h +++ b/src/Processors/Formats/LazyOutputFormat.h @@ -14,8 +14,8 @@ class LazyOutputFormat : public IOutputFormat { public: - explicit LazyOutputFormat(const Block & header) - : IOutputFormat(header, out), queue(2) {} + explicit LazyOutputFormat(const Block & header, bool is_partial_result_protocol_active = false) + : IOutputFormat(header, out, is_partial_result_protocol_active), queue(2) {} String getName() const override { return "LazyOutputFormat"; } @@ -49,6 +49,7 @@ protected: void consumeTotals(Chunk chunk) override { totals = std::move(chunk); } void consumeExtremes(Chunk chunk) override { extremes = std::move(chunk); } + void consumePartialResult(Chunk chunk) override { consume(std::move(chunk)); } private: diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index fb49779e0af..a6514257dd3 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -58,8 +58,8 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes( , is_binary(is_binary_) , with_names(with_names_) , with_types(with_types_) - , format_reader(std::move(format_reader_)) , try_detect_header(try_detect_header_) + , format_reader(std::move(format_reader_)) { column_indexes_by_names = getPort().getHeader().getNamesToIndexesMap(); } @@ -212,8 +212,24 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE format_reader->skipRowStartDelimiter(); ext.read_columns.resize(data_types.size()); - for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) + size_t file_column = 0; + for (; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) { + if (format_reader->allowVariableNumberOfColumns() && format_reader->checkForEndOfRow()) + { + while (file_column < column_mapping->column_indexes_for_input_fields.size()) + { + const auto & rem_column_index = column_mapping->column_indexes_for_input_fields[file_column]; + if (rem_column_index) + columns[*rem_column_index]->insertDefault(); + ++file_column; + } + break; + } + + if (file_column != 0) + format_reader->skipFieldDelimiter(); + const auto & column_index = column_mapping->column_indexes_for_input_fields[file_column]; const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); if (column_index) @@ -225,22 +241,6 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE column_mapping->names_of_columns[file_column]); else format_reader->skipField(file_column); - - if (!is_last_file_column) - { - if (format_reader->allowVariableNumberOfColumns() && format_reader->checkForEndOfRow()) - { - ++file_column; - while (file_column < column_mapping->column_indexes_for_input_fields.size()) - { - const auto & rem_column_index = column_mapping->column_indexes_for_input_fields[file_column]; - columns[*rem_column_index]->insertDefault(); - ++file_column; - } - } - else - format_reader->skipFieldDelimiter(); - } } if (format_reader->allowVariableNumberOfColumns() && !format_reader->checkForEndOfRow()) @@ -248,7 +248,7 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE do { format_reader->skipFieldDelimiter(); - format_reader->skipField(1); + format_reader->skipField(file_column++); } while (!format_reader->checkForEndOfRow()); } @@ -264,6 +264,30 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE return true; } +size_t RowInputFormatWithNamesAndTypes::countRows(size_t max_block_size) +{ + if (unlikely(end_of_stream)) + return 0; + + size_t num_rows = 0; + bool is_first_row = getTotalRows() == 0 && !with_names && !with_types && !is_header_detected; + while (!format_reader->checkForSuffix() && num_rows < max_block_size) + { + if (likely(!is_first_row)) + format_reader->skipRowBetweenDelimiter(); + else + is_first_row = false; + + format_reader->skipRow(); + ++num_rows; + } + + if (num_rows == 0 || num_rows < max_block_size) + end_of_stream = true; + + return num_rows; +} + void RowInputFormatWithNamesAndTypes::resetParser() { RowInputFormatWithDiagnosticInfo::resetParser(); @@ -419,12 +443,14 @@ namespace void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector & column_names, std::vector & type_names) { - auto [first_row_values, first_row_types] = readRowAndGetFieldsAndDataTypes(); + auto first_row = readRowAndGetFieldsAndDataTypes(); /// No data. - if (first_row_values.empty()) + if (!first_row) return; + const auto & [first_row_values, first_row_types] = *first_row; + /// The first row contains non String elements, it cannot be a header. if (!checkIfAllTypesAreString(first_row_types)) { @@ -432,15 +458,17 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector & return; } - auto [second_row_values, second_row_types] = readRowAndGetFieldsAndDataTypes(); + auto second_row = readRowAndGetFieldsAndDataTypes(); /// Data contains only 1 row, don't treat it as a header. - if (second_row_values.empty()) + if (!second_row) { buffered_types = first_row_types; return; } + const auto & [second_row_values, second_row_types] = *second_row; + DataTypes data_types; bool second_row_can_be_type_names = checkIfAllTypesAreString(second_row_types) && checkIfAllValuesAreTypeNames(readNamesFromFields(second_row_values)); size_t row = 2; @@ -450,15 +478,16 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector & } else { - data_types = readRowAndGetDataTypes(); + auto data_types_maybe = readRowAndGetDataTypes(); /// Data contains only 2 rows. - if (data_types.empty()) + if (!data_types_maybe) { second_row_can_be_type_names = false; data_types = second_row_types; } else { + data_types = *data_types_maybe; ++row; } } @@ -490,10 +519,10 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector & return; } - auto next_row_types = readRowAndGetDataTypes(); + auto next_row_types_maybe = readRowAndGetDataTypes(); /// Check if there are no more rows in data. It means that all rows contains only String values and Nulls, /// so, the first two rows with all String elements can be real data and we cannot use them as a header. - if (next_row_types.empty()) + if (!next_row_types_maybe) { /// Buffer first data types from the first row, because it doesn't contain Nulls. buffered_types = first_row_types; @@ -502,11 +531,11 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector & ++row; /// Combine types from current row and from previous rows. - chooseResultColumnTypes(*this, data_types, next_row_types, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV), default_colum_names, row); + chooseResultColumnTypes(*this, data_types, *next_row_types_maybe, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV), default_colum_names, row); } } -DataTypes FormatWithNamesAndTypesSchemaReader::readRowAndGetDataTypes() +std::optional FormatWithNamesAndTypesSchemaReader::readRowAndGetDataTypes() { /// Check if we tried to detect a header and have buffered types from read rows. if (!buffered_types.empty()) diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index b5103d3db39..c263b3b9666 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -56,6 +56,8 @@ protected: private: bool readRow(MutableColumns & columns, RowReadExtension & ext) override; + size_t countRows(size_t max_block_size) override; + bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override; void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override; @@ -64,11 +66,11 @@ private: bool is_binary; bool with_names; bool with_types; - std::unique_ptr format_reader; bool try_detect_header; bool is_header_detected = false; protected: + std::unique_ptr format_reader; Block::NameMap column_indexes_by_names; }; @@ -109,6 +111,16 @@ public: /// Skip the whole row with types. virtual void skipTypes() = 0; + virtual size_t countRows(size_t /*max_block_size*/) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method countRows is not implemented for format reader"); + } + + virtual void skipRow() + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method skipRow is not implemented for format reader"); + } + /// Skip delimiters, if any. virtual void skipPrefixBeforeHeader() {} virtual void skipRowStartDelimiter() {} @@ -119,9 +131,10 @@ public: /// Check suffix. virtual bool checkForSuffix() { return in->eof(); } + /// Check if we are at the end of row, not between fields. virtual bool checkForEndOfRow() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method checkForEndOfRow is not implemented"); } - virtual bool allowVariableNumberOfColumns() { return false; } + virtual bool allowVariableNumberOfColumns() const { return false; } const FormatSettings & getFormatSettings() const { return format_settings; } @@ -160,15 +173,15 @@ public: NamesAndTypesList readSchema() override; protected: - virtual DataTypes readRowAndGetDataTypes() override; + virtual std::optional readRowAndGetDataTypes() override; - virtual DataTypes readRowAndGetDataTypesImpl() + virtual std::optional readRowAndGetDataTypesImpl() { throw Exception{ErrorCodes::NOT_IMPLEMENTED, "Method readRowAndGetDataTypesImpl is not implemented"}; } - /// Return column fields with inferred types. In case of no more rows, return empty vectors. - virtual std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() + /// Return column fields with inferred types. In case of no more rows, return nullopt. + virtual std::optional, DataTypes>> readRowAndGetFieldsAndDataTypes() { throw Exception{ErrorCodes::NOT_IMPLEMENTED, "Method readRowAndGetFieldsAndDataTypes is not implemented"}; } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 00d5b2ee089..4136fc5a5f2 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { @@ -14,14 +13,6 @@ IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_ { } -InputPort * IAccumulatingTransform::addTotalsPort() -{ - if (inputs.size() > 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Totals port was already added to IAccumulatingTransform"); - - return &inputs.emplace_back(getInputPort().getHeader(), this); -} - IAccumulatingTransform::Status IAccumulatingTransform::prepare() { /// Check can output. diff --git a/src/Processors/IAccumulatingTransform.h b/src/Processors/IAccumulatingTransform.h index b51753199c3..67063da4e11 100644 --- a/src/Processors/IAccumulatingTransform.h +++ b/src/Processors/IAccumulatingTransform.h @@ -36,10 +36,6 @@ public: Status prepare() override; void work() override; - /// Adds additional port for totals. - /// If added, totals will have been ready by the first generate() call (in totals chunk). - InputPort * addTotalsPort(); - InputPort & getInputPort() { return input; } OutputPort & getOutputPort() { return output; } }; diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp index 8b160153733..2f294a32531 100644 --- a/src/Processors/IProcessor.cpp +++ b/src/Processors/IProcessor.cpp @@ -40,5 +40,10 @@ std::string IProcessor::statusToName(Status status) UNREACHABLE(); } +ProcessorPtr IProcessor::getPartialResultProcessorPtr(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) +{ + return current_processor->getPartialResultProcessor(current_processor, partial_result_limit, partial_result_duration_ms); +} + } diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 34322acb2af..51a0bb1c121 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -164,6 +164,8 @@ public: static std::string statusToName(Status status); + static ProcessorPtr getPartialResultProcessorPtr(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms); + /** Method 'prepare' is responsible for all cheap ("instantaneous": O(1) of data volume, no wait) calculations. * * It may access input and output ports, @@ -235,6 +237,22 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'expandPipeline' is not implemented for {} processor", getName()); } + enum class PartialResultStatus + { + /// Processor currently doesn't support work with the partial result pipeline. + NotSupported, + + /// Processor can be skipped in the partial result pipeline. + SkipSupported, + + /// Processor creates a light-weight copy of itself in the partial result pipeline. + /// The copy can create snapshots of the original processor or transform small blocks of data in the same way as the original processor + FullSupported, + }; + + virtual bool isPartialResultProcessor() const { return false; } + virtual PartialResultStatus getPartialResultProcessorSupportStatus() const { return PartialResultStatus::NotSupported; } + /// In case if query was cancelled executor will wait till all processors finish their jobs. /// Generally, there is no reason to check this flag. However, it may be reasonable for long operations (e.g. i/o). bool isCancelled() const { return is_cancelled.load(std::memory_order_acquire); } @@ -343,6 +361,7 @@ public: uint64_t read_rows = 0; uint64_t read_bytes = 0; uint64_t total_rows_approx = 0; + uint64_t total_bytes = 0; }; struct ReadProgress @@ -368,6 +387,11 @@ public: protected: virtual void onCancel() {} + virtual ProcessorPtr getPartialResultProcessor(const ProcessorPtr & /*current_processor*/, UInt64 /*partial_result_limit*/, UInt64 /*partial_result_duration_ms*/) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getPartialResultProcessor' is not implemented for {} processor", getName()); + } + private: /// For: /// - elapsed_us diff --git a/src/Processors/ISource.cpp b/src/Processors/ISource.cpp index 6a88d3973a1..68749c47453 100644 --- a/src/Processors/ISource.cpp +++ b/src/Processors/ISource.cpp @@ -66,12 +66,14 @@ void ISource::progress(size_t read_rows, size_t read_bytes) { //std::cerr << "========= Progress " << read_rows << " from " << getName() << std::endl << StackTrace().toString() << std::endl; read_progress_was_set = true; + std::lock_guard lock(read_progress_mutex); read_progress.read_rows += read_rows; read_progress.read_bytes += read_bytes; } std::optional ISource::getReadProgress() { + std::lock_guard lock(read_progress_mutex); if (finished && read_progress.read_bytes == 0 && read_progress.total_rows_approx == 0) return {}; @@ -85,6 +87,18 @@ std::optional ISource::getReadProgress() return ReadProgress{res_progress, empty_limits}; } +void ISource::addTotalRowsApprox(size_t value) +{ + std::lock_guard lock(read_progress_mutex); + read_progress.total_rows_approx += value; +} + +void ISource::addTotalBytes(size_t value) +{ + std::lock_guard lock(read_progress_mutex); + read_progress.total_bytes += value; +} + void ISource::work() { try diff --git a/src/Processors/ISource.h b/src/Processors/ISource.h index 292f79ba348..767a73d0924 100644 --- a/src/Processors/ISource.h +++ b/src/Processors/ISource.h @@ -2,6 +2,9 @@ #include +#include +#include + namespace DB { @@ -9,8 +12,9 @@ namespace DB class ISource : public IProcessor { private: + std::mutex read_progress_mutex; ReadProgressCounters read_progress; - bool read_progress_was_set = false; + std::atomic_bool read_progress_was_set = false; bool auto_progress; protected: @@ -42,7 +46,8 @@ public: /// Default implementation for all the sources. std::optional getReadProgress() final; - void addTotalRowsApprox(size_t value) { read_progress.total_rows_approx += value; } + void addTotalRowsApprox(size_t value); + void addTotalBytes(size_t value); }; using SourcePtr = std::shared_ptr; diff --git a/src/Processors/LimitTransform.cpp b/src/Processors/LimitTransform.cpp index 5e24062d67a..b2bf3c28eee 100644 --- a/src/Processors/LimitTransform.cpp +++ b/src/Processors/LimitTransform.cpp @@ -1,5 +1,5 @@ #include - +#include namespace DB { @@ -180,7 +180,6 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data) return Status::NeedData; data.current_chunk = input.pull(true); - auto rows = data.current_chunk.getNumRows(); if (rows_before_limit_at_least && !data.input_port_has_counter) @@ -367,5 +366,11 @@ bool LimitTransform::sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort return true; } +ProcessorPtr LimitTransform::getPartialResultProcessor(const ProcessorPtr & /*current_processor*/, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) +{ + const auto & header = inputs.front().getHeader(); + return std::make_shared(header, partial_result_limit, partial_result_duration_ms, limit, offset); +} + } diff --git a/src/Processors/LimitTransform.h b/src/Processors/LimitTransform.h index 33ff968985f..cfacc9634f9 100644 --- a/src/Processors/LimitTransform.h +++ b/src/Processors/LimitTransform.h @@ -55,6 +55,8 @@ private: ColumnRawPtrs extractSortColumns(const Columns & columns) const; bool sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, UInt64 current_chunk_row_num) const; + ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; + public: LimitTransform( const Block & header_, UInt64 limit_, UInt64 offset_, size_t num_streams = 1, @@ -73,6 +75,14 @@ public: void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit_at_least.swap(counter); } void setInputPortHasCounter(size_t pos) { ports_data[pos].input_port_has_counter = true; } + + PartialResultStatus getPartialResultProcessorSupportStatus() const override + { + /// Currently LimitPartialResultTransform support only single-thread work. + bool is_partial_result_supported = inputs.size() == 1 && outputs.size() == 1; + + return is_partial_result_supported ? PartialResultStatus::FullSupported : PartialResultStatus::NotSupported; + } }; } diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index 418bf5e3f13..a3ff7a4ef87 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -163,12 +163,8 @@ static bool compareRetentions(const Retention & a, const Retention & b) { return false; } - String error_msg = "age and precision should only grow up: " - + std::to_string(a.age) + ":" + std::to_string(a.precision) + " vs " - + std::to_string(b.age) + ":" + std::to_string(b.precision); - throw Exception::createDeprecated( - error_msg, - DB::ErrorCodes::BAD_ARGUMENTS); + throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Age and precision should only grow up: {}:{} vs {}:{}", + a.age, a.precision, b.age, b.precision); } bool operator==(const Retention & a, const Retention & b) diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index db770de858c..4d2443b1e46 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -21,10 +21,14 @@ ReplacingSortedAlgorithm::ReplacingSortedAlgorithm( size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes, - bool cleanup_) + bool cleanup_, + size_t * cleanedup_rows_count_) : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs) - , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows, max_block_size_bytes), cleanup(cleanup_) + , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows, max_block_size_bytes) + , cleanup(cleanup_) + , cleanedup_rows_count(cleanedup_rows_count_) { + if (!is_deleted_column.empty()) is_deleted_column_number = header_.getPositionByName(is_deleted_column); if (!version_column.empty()) @@ -74,10 +78,13 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() /// Write the data for the previous primary key. if (!selected_row.empty()) { - if (is_deleted_column_number!=-1) + if (is_deleted_column_number != -1) { - if (!(cleanup && assert_cast(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num])) + uint8_t value = assert_cast(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num]; + if (!cleanup || !value) insertRow(); + else if (cleanedup_rows_count != nullptr) + *cleanedup_rows_count += current_row_sources.size(); } else insertRow(); @@ -91,7 +98,7 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() if (out_row_sources_buf) current_row_sources.emplace_back(current.impl->order, true); - if ((is_deleted_column_number!=-1)) + if (is_deleted_column_number != -1) { const UInt8 is_deleted = assert_cast(*current->all_columns[is_deleted_column_number]).getData()[current->getRow()]; if ((is_deleted != 1) && (is_deleted != 0)) @@ -129,10 +136,13 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() /// We will write the data for the last primary key. if (!selected_row.empty()) { - if (is_deleted_column_number!=-1) + if (is_deleted_column_number != -1) { - if (!(cleanup && assert_cast(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num])) + uint8_t value = assert_cast(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num]; + if (!cleanup || !value) insertRow(); + else if (cleanedup_rows_count != nullptr) + *cleanedup_rows_count += current_row_sources.size(); } else insertRow(); diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index 4d8de55b032..6ee138aca88 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -27,7 +27,8 @@ public: size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false, - bool cleanup = false); + bool cleanup = false, + size_t * cleanedup_rows_count = nullptr); Status merge() override; @@ -37,6 +38,7 @@ private: ssize_t is_deleted_column_number = -1; ssize_t version_column_number = -1; bool cleanup = false; + size_t * cleanedup_rows_count = nullptr; using RowRef = detail::RowRefWithOwnedChunk; static constexpr size_t max_row_refs = 2; /// last, current. diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h index 9cd2f29a862..7e293db1aa8 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.h +++ b/src/Processors/Merges/ReplacingSortedTransform.h @@ -19,7 +19,8 @@ public: size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false, - bool cleanup = false) + bool cleanup = false, + size_t * cleanedup_rows_count = nullptr) : IMergingTransform( num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, header, @@ -31,7 +32,8 @@ public: max_block_size_bytes, out_row_sources_buf_, use_average_block_sizes, - cleanup) + cleanup, + cleanedup_rows_count) { } diff --git a/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp b/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp index fb3ed7f80fc..60ac30389a1 100644 --- a/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp +++ b/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp @@ -9,7 +9,12 @@ namespace DB BuildQueryPipelineSettings BuildQueryPipelineSettings::fromContext(ContextPtr from) { BuildQueryPipelineSettings settings; - settings.actions_settings = ExpressionActionsSettings::fromSettings(from->getSettingsRef(), CompileExpressions::yes); + + const auto & context_settings = from->getSettingsRef(); + settings.partial_result_limit = context_settings.max_rows_in_partial_result; + settings.partial_result_duration_ms = context_settings.partial_result_update_duration_ms.totalMilliseconds(); + + settings.actions_settings = ExpressionActionsSettings::fromSettings(context_settings, CompileExpressions::yes); settings.process_list_element = from->getProcessListElement(); settings.progress_callback = from->getProgressCallback(); return settings; diff --git a/src/Processors/QueryPlan/BuildQueryPipelineSettings.h b/src/Processors/QueryPlan/BuildQueryPipelineSettings.h index 3b5e4e06953..0410bf925d1 100644 --- a/src/Processors/QueryPlan/BuildQueryPipelineSettings.h +++ b/src/Processors/QueryPlan/BuildQueryPipelineSettings.h @@ -19,6 +19,9 @@ struct BuildQueryPipelineSettings QueryStatusPtr process_list_element; ProgressCallback progress_callback = nullptr; + UInt64 partial_result_limit = 0; + UInt64 partial_result_duration_ms = 0; + const ExpressionActionsSettings & getActionsSettings() const { return actions_settings; } static BuildQueryPipelineSettings fromContext(ContextPtr from); }; diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp index c54d32c1385..ca46f92eeb4 100644 --- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp +++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp @@ -141,7 +141,7 @@ void CreateSetAndFilterOnTheFlyStep::transformPipeline(QueryPipelineBuilder & pi /// Add balancing transform auto idx = position == JoinTableSide::Left ? PingPongProcessor::First : PingPongProcessor::Second; auto stream_balancer = std::make_shared(input_header, num_ports, max_rows_in_set, idx); - stream_balancer->setDescription(getStepDescription()); + stream_balancer->setDescription("Reads rows from two streams evenly"); /// Regular inputs just bypass data for respective ports connectAllInputs(ports, stream_balancer->getInputs(), num_ports); @@ -163,7 +163,7 @@ void CreateSetAndFilterOnTheFlyStep::transformPipeline(QueryPipelineBuilder & pi { auto & port = *output_it++; auto transform = std::make_shared(port.getHeader(), column_names, filtering_set); - transform->setDescription(this->getStepDescription()); + transform->setDescription("Filter rows using other join table side's set"); connect(port, transform->getInputPort()); result_transforms.emplace_back(std::move(transform)); } @@ -201,5 +201,9 @@ void CreateSetAndFilterOnTheFlyStep::updateOutputStream() output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits()); } +bool CreateSetAndFilterOnTheFlyStep::isColumnPartOfSetKey(const String & column_name) const +{ + return std::find(column_names.begin(), column_names.end(), column_name) != column_names.end(); +} } diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h index b363991c2f6..023901dba02 100644 --- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h +++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h @@ -35,6 +35,8 @@ public: SetWithStatePtr getSet() const { return own_set; } + bool isColumnPartOfSetKey(const String & column_name) const; + /// Set for another stream. void setFiltering(SetWithStatePtr filtering_set_) { filtering_set = filtering_set_; } diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index b251eec2d28..bacfb7e352e 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -44,11 +44,7 @@ std::unique_ptr createLocalPlan( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t shard_num, - size_t shard_count, - size_t replica_num, - size_t replica_count, - std::shared_ptr coordinator, - UUID group_uuid) + size_t shard_count) { checkStackSize(); @@ -67,26 +63,6 @@ std::unique_ptr createLocalPlan( .setShardInfo(static_cast(shard_num), static_cast(shard_count)) .ignoreASTOptimizations(); - /// There are much things that are needed for coordination - /// during reading with parallel replicas - if (coordinator) - { - new_context->parallel_reading_coordinator = coordinator; - new_context->setClientInterface(ClientInfo::Interface::LOCAL); - new_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); - new_context->setReplicaInfo(true, replica_count, replica_num); - new_context->setConnectionClientVersion(DBMS_VERSION_MAJOR, DBMS_VERSION_MINOR, DBMS_VERSION_PATCH, DBMS_TCP_PROTOCOL_VERSION); - new_context->setParallelReplicasGroupUUID(group_uuid); - new_context->setMergeTreeAllRangesCallback([coordinator](InitialAllRangesAnnouncement announcement) - { - coordinator->handleInitialAllRangesAnnouncement(announcement); - }); - new_context->setMergeTreeReadTaskCallback([coordinator](ParallelReadRequest request) -> std::optional - { - return coordinator->handleRequest(request); - }); - } - if (context->getSettingsRef().allow_experimental_analyzer) { auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options); diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.h b/src/Processors/QueryPlan/DistributedCreateLocalPlan.h index c08b9bdf67e..1f62d05b8de 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.h +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.h @@ -19,10 +19,5 @@ std::unique_ptr createLocalPlan( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t shard_num, - size_t shard_count, - size_t replica_num, - size_t replica_count, - std::shared_ptr coordinator, - UUID group_uuid = UUIDHelpers::Nil); - + size_t shard_count); } diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index afdff44020f..b132d27670d 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -30,7 +30,7 @@ static Block checkHeaders(const DataStreams & input_streams_) } IntersectOrExceptStep::IntersectOrExceptStep( - DataStreams input_streams_ , Operator operator_ , size_t max_threads_) + DataStreams input_streams_, Operator operator_, size_t max_threads_) : header(checkHeaders(input_streams_)) , current_operator(operator_) , max_threads(max_threads_) diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 2ff8f161e99..63a5eeb51d2 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -2,6 +2,9 @@ #include #include #include +#include +#include +#include #include namespace DB @@ -54,7 +57,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines bool JoinStep::allowPushDownToRight() const { - return join->pipelineType() == JoinPipelineType::YShaped; + return join->pipelineType() == JoinPipelineType::YShaped || join->pipelineType() == JoinPipelineType::FillRightFirst; } void JoinStep::describePipeline(FormatSettings & settings) const @@ -62,6 +65,36 @@ void JoinStep::describePipeline(FormatSettings & settings) const IQueryPlanStep::describePipeline(processors, settings); } +void JoinStep::describeActions(FormatSettings & settings) const +{ + String prefix(settings.offset, ' '); + + const auto & table_join = join->getTableJoin(); + settings.out << prefix << "Type: " << toString(table_join.kind()) << '\n'; + settings.out << prefix << "Strictness: " << toString(table_join.strictness()) << '\n'; + settings.out << prefix << "Algorithm: " << join->getName() << '\n'; + + if (table_join.strictness() == JoinStrictness::Asof) + settings.out << prefix << "ASOF inequality: " << toString(table_join.getAsofInequality()) << '\n'; + + if (!table_join.getClauses().empty()) + settings.out << prefix << "Clauses: " << table_join.formatClauses(table_join.getClauses(), true /*short_format*/) << '\n'; +} + +void JoinStep::describeActions(JSONBuilder::JSONMap & map) const +{ + const auto & table_join = join->getTableJoin(); + map.add("Type", toString(table_join.kind())); + map.add("Strictness", toString(table_join.strictness())); + map.add("Algorithm", join->getName()); + + if (table_join.strictness() == JoinStrictness::Asof) + map.add("ASOF inequality", toString(table_join.getAsofInequality())); + + if (!table_join.getClauses().empty()) + map.add("Clauses", table_join.formatClauses(table_join.getClauses(), true /*short_format*/)); +} + void JoinStep::updateInputStream(const DataStream & new_input_stream_, size_t idx) { if (idx == 0) diff --git a/src/Processors/QueryPlan/JoinStep.h b/src/Processors/QueryPlan/JoinStep.h index e7185f36588..369ee9bec8b 100644 --- a/src/Processors/QueryPlan/JoinStep.h +++ b/src/Processors/QueryPlan/JoinStep.h @@ -27,6 +27,9 @@ public: void describePipeline(FormatSettings & settings) const override; + void describeActions(JSONBuilder::JSONMap & map) const override; + void describeActions(FormatSettings & settings) const override; + const JoinPtr & getJoin() const { return join; } bool allowPushDownToRight() const; diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index 6ecec1359c5..2230e50425c 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -16,7 +16,7 @@ void optimizeTreeFirstPass(const QueryPlanOptimizationSettings & settings, Query void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes); /// Third pass is used to apply filters such as key conditions and skip indexes to the storages that support them. /// After that it add CreateSetsStep for the subqueries that has not be used in the filters. -void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes); +void optimizeTreeThirdPass(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::Nodes & nodes); /// Optimization (first pass) is a function applied to QueryPlan::Node. /// It can read and update subtree of specified node. @@ -113,7 +113,7 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes); void optimizeAggregationInOrder(QueryPlan::Node & node, QueryPlan::Nodes &); bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections); bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes); -bool addPlansForSets(QueryPlan::Node & node, QueryPlan::Nodes & nodes); +bool addPlansForSets(QueryPlan & plan, QueryPlan::Node & node, QueryPlan::Nodes & nodes); /// Enable memory bound merging of aggregation states for remote queries /// in case it was enabled for local plan diff --git a/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp b/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp index e9100ae9d02..47df05301c9 100644 --- a/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp +++ b/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp @@ -6,7 +6,7 @@ namespace DB::QueryPlanOptimizations { -bool addPlansForSets(QueryPlan::Node & node, QueryPlan::Nodes & nodes) +bool addPlansForSets(QueryPlan & root_plan, QueryPlan::Node & node, QueryPlan::Nodes & nodes) { auto * delayed = typeid_cast(node.step.get()); if (!delayed) @@ -23,7 +23,9 @@ bool addPlansForSets(QueryPlan::Node & node, QueryPlan::Nodes & nodes) { input_streams.push_back(plan->getCurrentDataStream()); node.children.push_back(plan->getRootNode()); - nodes.splice(nodes.end(), QueryPlan::detachNodes(std::move(*plan))); + auto [add_nodes, add_resources] = QueryPlan::detachNodesAndResources(std::move(*plan)); + nodes.splice(nodes.end(), std::move(add_nodes)); + root_plan.addResources(std::move(add_resources)); } auto creating_sets = std::make_unique(std::move(input_streams)); diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 4336de41b7b..3b31a809f9d 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -341,6 +341,10 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (table_join.kind() != JoinKind::Inner && table_join.kind() != JoinKind::Cross && table_join.kind() != kind) return 0; + /// There is no ASOF Right join, so we're talking about pushing to the right side + if (kind == JoinKind::Right && table_join.strictness() == JoinStrictness::Asof) + return 0; + bool is_left = kind == JoinKind::Left; const auto & input_header = is_left ? child->getInputStreams().front().header : child->getInputStreams().back().header; const auto & res_header = child->getOutputStream().header; @@ -424,8 +428,15 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes return updated_steps; } - if (auto updated_steps = simplePushDownOverStep(parent_node, nodes, child)) - return updated_steps; + if (const auto * join_filter_set_step = typeid_cast(child.get())) + { + const auto & filter_column_name = assert_cast(parent_node->step.get())->getFilterColumnName(); + bool can_remove_filter = !join_filter_set_step->isColumnPartOfSetKey(filter_column_name); + + Names allowed_inputs = child->getOutputStream().header.getNames(); + if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs, can_remove_filter)) + return updated_steps; + } if (auto * union_step = typeid_cast(child.get())) { diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index b13dda9a8f0..0caedff67a5 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -181,7 +181,7 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s "No projection is used when optimize_use_projections = 1 and force_optimize_projection = 1"); } -void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes) +void optimizeTreeThirdPass(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::Nodes & nodes) { Stack stack; stack.push_back({.node = &root}); @@ -205,7 +205,7 @@ void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes) source_step_with_filter->applyFilters(); } - addPlansForSets(*frame.node, nodes); + addPlansForSets(plan, *frame.node, nodes); stack.pop_back(); } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index eab4d3f5d43..0599a0fa369 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -143,12 +143,12 @@ std::optional matchAggregateFunctions( argument_types.clear(); const auto & candidate = info.aggregates[idx]; - /// Note: this check is a bit strict. - /// We check that aggregate function names, argument types and parameters are equal. /// In some cases it's possible only to check that states are equal, /// e.g. for quantile(0.3)(...) and quantile(0.5)(...). - /// But also functions sum(...) and sumIf(...) will have equal states, - /// and we can't replace one to another from projection. + /// + /// Note we already checked that aggregate function names are equal, + /// so that functions sum(...) and sumIf(...) with equal states will + /// not match. if (!candidate.function->getStateType()->equals(*aggregate.function->getStateType())) { // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Cannot match agg func {} vs {} by state {} vs {}", @@ -249,12 +249,24 @@ static void appendAggregateFunctions( auto & input = inputs[match.description]; if (!input) - input = &proj_dag.addInput(match.description->column_name, std::move(type)); + input = &proj_dag.addInput(match.description->column_name, type); const auto * node = input; if (node->result_name != aggregate.column_name) - node = &proj_dag.addAlias(*node, aggregate.column_name); + { + if (DataTypeAggregateFunction::strictEquals(type, node->result_type)) + { + node = &proj_dag.addAlias(*node, aggregate.column_name); + } + else + { + /// Cast to aggregate types specified in query if it's not + /// strictly the same as the one specified in projection. This + /// is required to generate correct results during finalization. + node = &proj_dag.addCast(*node, type, aggregate.column_name); + } + } proj_dag_outputs.push_back(node); } diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 533fbde1e13..61c6422de5a 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -254,6 +254,32 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +static void reorderColumns(ActionsDAG & dag, const Block & header, const std::string & filter_column) +{ + std::unordered_map inputs_map; + for (const auto * input : dag.getInputs()) + inputs_map[input->result_name] = input; + + for (const auto & col : header) + { + auto & input = inputs_map[col.name]; + if (!input) + input = &dag.addInput(col); + } + + ActionsDAG::NodeRawConstPtrs new_outputs; + new_outputs.reserve(header.columns() + 1); + + new_outputs.push_back(&dag.findInOutputs(filter_column)); + for (const auto & col : header) + { + auto & input = inputs_map[col.name]; + new_outputs.push_back(input); + } + + dag.getOutputs() = std::move(new_outputs); +} + Pipes buildPipesForReadingByPKRanges( const KeyDescription & primary_key, ExpressionActionsPtr sorting_expr, @@ -279,6 +305,7 @@ Pipes buildPipesForReadingByPKRanges( continue; auto syntax_result = TreeRewriter(context).analyze(filter_function, primary_key.expression->getRequiredColumnsWithTypes()); auto actions = ExpressionAnalyzer(filter_function, syntax_result, context).getActionsDAG(false); + reorderColumns(*actions, pipes[i].getHeader(), filter_function->getColumnName()); ExpressionActionsPtr expression_actions = std::make_shared(std::move(actions)); auto description = fmt::format( "filter values in [{}, {})", i ? ::toString(borders[i - 1]) : "-inf", i < borders.size() ? ::toString(borders[i]) : "+inf"); diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 687260441ff..5d38bfb42c4 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -168,6 +168,7 @@ QueryPipelineBuilderPtr QueryPlan::buildQueryPipeline( QueryPipelineBuilderPtr last_pipeline; + bool has_partial_result_setting = build_pipeline_settings.partial_result_duration_ms > 0; std::stack stack; stack.push(Frame{.node = root}); @@ -195,6 +196,9 @@ QueryPipelineBuilderPtr QueryPlan::buildQueryPipeline( } else stack.push(Frame{.node = frame.node->children[next_child]}); + + if (has_partial_result_setting && last_pipeline && !last_pipeline->isPartialResultActive()) + last_pipeline->activatePartialResult(build_pipeline_settings.partial_result_limit, build_pipeline_settings.partial_result_duration_ms); } last_pipeline->setProgressCallback(build_pipeline_settings.progress_callback); @@ -482,7 +486,7 @@ void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_sett QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes); QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes); - QueryPlanOptimizations::optimizeTreeThirdPass(*root, nodes); + QueryPlanOptimizations::optimizeTreeThirdPass(*this, *root, nodes); updateDataStreams(*root); } @@ -542,9 +546,9 @@ void QueryPlan::explainEstimate(MutableColumns & columns) } } -QueryPlan::Nodes QueryPlan::detachNodes(QueryPlan && plan) +std::pair QueryPlan::detachNodesAndResources(QueryPlan && plan) { - return std::move(plan.nodes); + return {std::move(plan.nodes), std::move(plan.resources)}; } } diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index d89bdc534be..62d658ddccd 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -98,6 +98,9 @@ public: void setMaxThreads(size_t max_threads_) { max_threads = max_threads_; } size_t getMaxThreads() const { return max_threads; } + void setConcurrencyControl(bool concurrency_control_) { concurrency_control = concurrency_control_; } + bool getConcurrencyControl() const { return concurrency_control; } + /// Tree node. Step and it's children. struct Node { @@ -108,7 +111,7 @@ public: using Nodes = std::list; Node * getRootNode() const { return root; } - static Nodes detachNodes(QueryPlan && plan); + static std::pair detachNodesAndResources(QueryPlan && plan); private: QueryPlanResourceHolder resources; @@ -120,6 +123,7 @@ private: /// Those fields are passed to QueryPipeline. size_t max_threads = 0; + bool concurrency_control = false; }; std::string debugExplainStep(const IQueryPlanStep & step); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 2d2412f7e36..b201909c4f4 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -23,12 +23,12 @@ #include #include #include -#include #include -#include #include +#include +#include +#include #include -#include #include #include #include @@ -137,6 +137,69 @@ static bool checkAllPartsOnRemoteFS(const RangesInDataParts & parts) return true; } +/// build sort description for output stream +static void updateSortDescriptionForOutputStream( + DataStream & output_stream, const Names & sorting_key_columns, const int sort_direction, InputOrderInfoPtr input_order_info, PrewhereInfoPtr prewhere_info) +{ + /// Updating sort description can be done after PREWHERE actions are applied to the header. + /// Aftert PREWHERE actions are applied, column names in header can differ from storage column names due to aliases + /// To mitigate it, we're trying to build original header and use it to deduce sorting description + /// TODO: this approach is fragile, it'd be more robust to update sorting description for the whole plan during plan optimization + Block original_header = output_stream.header.cloneEmpty(); + if (prewhere_info) + { + if (prewhere_info->prewhere_actions) + { + FindOriginalNodeForOutputName original_column_finder(prewhere_info->prewhere_actions); + for (auto & column : original_header) + { + const auto * original_node = original_column_finder.find(column.name); + if (original_node) + column.name = original_node->result_name; + } + } + + if (prewhere_info->row_level_filter) + { + FindOriginalNodeForOutputName original_column_finder(prewhere_info->row_level_filter); + for (auto & column : original_header) + { + const auto * original_node = original_column_finder.find(column.name); + if (original_node) + column.name = original_node->result_name; + } + } + } + + SortDescription sort_description; + const Block & header = output_stream.header; + for (const auto & sorting_key : sorting_key_columns) + { + const auto it = std::find_if( + original_header.begin(), original_header.end(), [&sorting_key](const auto & column) { return column.name == sorting_key; }); + if (it == original_header.end()) + break; + + const size_t column_pos = std::distance(original_header.begin(), it); + sort_description.emplace_back((header.begin() + column_pos)->name, sort_direction); + } + + if (!sort_description.empty()) + { + if (input_order_info) + { + output_stream.sort_scope = DataStream::SortScope::Stream; + const size_t used_prefix_of_sorting_key_size = input_order_info->used_prefix_of_sorting_key_size; + if (sort_description.size() > used_prefix_of_sorting_key_size) + sort_description.resize(used_prefix_of_sorting_key_size); + } + else + output_stream.sort_scope = DataStream::SortScope::Chunk; + } + + output_stream.sort_description = std::move(sort_description); +} + void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, const SelectQueryInfo & query_info_) const { @@ -188,7 +251,7 @@ ReadFromMergeTree::ReadFromMergeTree( Poco::Logger * log_, MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr_, bool enable_parallel_reading) - : SourceStepWithFilter(DataStream{.header = IMergeTreeSelectAlgorithm::transformHeader( + : SourceStepWithFilter(DataStream{.header = MergeTreeSelectProcessor::transformHeader( storage_snapshot_->getSampleBlockForColumns(real_column_names_), getPrewhereInfoFromQueryInfo(query_info_), data_.getPartitionValueType(), @@ -205,10 +268,11 @@ ReadFromMergeTree::ReadFromMergeTree( , storage_snapshot(std::move(storage_snapshot_)) , metadata_for_reading(storage_snapshot->getMetadataForQuery()) , context(std::move(context_)) - , max_block_size(max_block_size_) + , block_size{ + .max_block_size_rows = max_block_size_, + .preferred_block_size_bytes = context->getSettingsRef().preferred_block_size_bytes, + .preferred_max_column_in_block_size_bytes = context->getSettingsRef().preferred_max_column_in_block_size_bytes} , requested_num_streams(num_streams_) - , preferred_block_size_bytes(context->getSettingsRef().preferred_block_size_bytes) - , preferred_max_column_in_block_size_bytes(context->getSettingsRef().preferred_max_column_in_block_size_bytes) , sample_factor_column_queried(sample_factor_column_queried_) , max_block_numbers_to_read(std::move(max_block_numbers_to_read_)) , log(log_) @@ -218,7 +282,7 @@ ReadFromMergeTree::ReadFromMergeTree( if (sample_factor_column_queried) { /// Only _sample_factor virtual column is added by ReadFromMergeTree - /// Other virtual columns are added by MergeTreeBaseSelectProcessor. + /// Other virtual columns are added by MergeTreeSelectProcessor. auto type = std::make_shared(); output_stream->header.insert({type->createColumn(), type, "_sample_factor"}); } @@ -250,83 +314,62 @@ ReadFromMergeTree::ReadFromMergeTree( /// Add explicit description. setStepDescription(data.getStorageID().getFullNameNotQuoted()); - { /// build sort description for output stream - SortDescription sort_description; - const Names & sorting_key_columns = metadata_for_reading->getSortingKeyColumns(); - const Block & header = output_stream->header; - const int sort_direction = getSortDirection(); - for (const auto & column_name : sorting_key_columns) - { - if (std::find_if(header.begin(), header.end(), [&](ColumnWithTypeAndName const & col) { return col.name == column_name; }) - == header.end()) - break; - sort_description.emplace_back(column_name, sort_direction); - } - if (!sort_description.empty()) - { - if (query_info.getInputOrderInfo()) - { - output_stream->sort_scope = DataStream::SortScope::Stream; - const size_t used_prefix_of_sorting_key_size = query_info.getInputOrderInfo()->used_prefix_of_sorting_key_size; - if (sort_description.size() > used_prefix_of_sorting_key_size) - sort_description.resize(used_prefix_of_sorting_key_size); - } - else - output_stream->sort_scope = DataStream::SortScope::Chunk; - } - - output_stream->sort_description = std::move(sort_description); - } + updateSortDescriptionForOutputStream( + *output_stream, + storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(), + getSortDirection(), + query_info.getInputOrderInfo(), + prewhere_info); } Pipe ReadFromMergeTree::readFromPoolParallelReplicas( RangesInDataParts parts_with_range, Names required_columns, - size_t max_streams, - size_t min_marks_for_concurrent_read, - bool use_uncompressed_cache -) + PoolSettings pool_settings) { const auto & client_info = context->getClientInfo(); + auto extension = ParallelReadingExtension { .all_callback = all_ranges_callback.value(), .callback = read_task_callback.value(), .count_participating_replicas = client_info.count_participating_replicas, .number_of_current_replica = client_info.number_of_current_replica, - .columns_to_read = required_columns + .columns_to_read = required_columns, }; /// We have a special logic for local replica. It has to read less data, because in some cases it should /// merge states of aggregate functions or do some other important stuff other than reading from Disk. - min_marks_for_concurrent_read = static_cast(min_marks_for_concurrent_read * context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier); + pool_settings.min_marks_for_concurrent_read = static_cast(pool_settings.min_marks_for_concurrent_read * context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier); + size_t total_rows = parts_with_range.getRowsCountAllParts(); auto pool = std::make_shared( + std::move(extension), + std::move(parts_with_range), storage_snapshot, - max_streams, - extension, - parts_with_range, prewhere_info, actions_settings, reader_settings, required_columns, virt_column_names, - min_marks_for_concurrent_read); + pool_settings, + context); + + auto block_size_copy = block_size; + block_size_copy.min_marks_to_read = pool_settings.min_marks_for_concurrent_read; Pipes pipes; - const auto & settings = context->getSettingsRef(); - size_t total_rows = parts_with_range.getRowsCountAllParts(); - for (size_t i = 0; i < max_streams; ++i) + for (size_t i = 0; i < pool_settings.threads; ++i) { - auto algorithm = std::make_unique( - i, pool, min_marks_for_concurrent_read, max_block_size, - settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, - data, storage_snapshot, use_uncompressed_cache, - prewhere_info, actions_settings, reader_settings, virt_column_names); + auto algorithm = std::make_unique(i); - auto source = std::make_shared(std::move(algorithm)); + auto processor = std::make_unique( + pool, std::move(algorithm), data, prewhere_info, + actions_settings, block_size_copy, reader_settings, virt_column_names); + + auto source = std::make_shared(std::move(processor)); /// Set the approximate number of rows for the first source only /// In case of parallel processing on replicas do not set approximate rows at all. @@ -345,12 +388,8 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( Pipe ReadFromMergeTree::readFromPool( RangesInDataParts parts_with_range, Names required_columns, - size_t max_streams, - size_t min_marks_for_concurrent_read, - bool use_uncompressed_cache) + PoolSettings pool_settings) { - Pipes pipes; - size_t sum_marks = parts_with_range.getMarksCountAllParts(); size_t total_rows = parts_with_range.getRowsCountAllParts(); if (query_info.limit > 0 && query_info.limit < total_rows) @@ -361,11 +400,11 @@ Pipe ReadFromMergeTree::readFromPool( /// round min_marks_to_read up to nearest multiple of block_size expressed in marks /// If granularity is adaptive it doesn't make sense /// Maybe it will make sense to add settings `max_block_size_bytes` - if (max_block_size && !data.canUseAdaptiveGranularity()) + if (block_size.max_block_size_rows && !data.canUseAdaptiveGranularity()) { size_t fixed_index_granularity = data.getSettings()->index_granularity; - min_marks_for_concurrent_read = (min_marks_for_concurrent_read * fixed_index_granularity + max_block_size - 1) - / max_block_size * max_block_size / fixed_index_granularity; + pool_settings.min_marks_for_concurrent_read = (pool_settings.min_marks_for_concurrent_read * fixed_index_granularity + block_size.max_block_size_rows - 1) + / block_size.max_block_size_rows * block_size.max_block_size_rows / fixed_index_granularity; } bool all_parts_are_remote = true; @@ -379,34 +418,30 @@ Pipe ReadFromMergeTree::readFromPool( MergeTreeReadPoolPtr pool; - if ((all_parts_are_remote && settings.allow_prefetched_read_pool_for_remote_filesystem - && MergeTreePrefetchedReadPool::checkReadMethodAllowed(reader_settings.read_settings.remote_fs_method)) - || (all_parts_are_local && settings.allow_prefetched_read_pool_for_local_filesystem - && MergeTreePrefetchedReadPool::checkReadMethodAllowed(reader_settings.read_settings.local_fs_method))) + bool allow_prefetched_remote = all_parts_are_remote + && settings.allow_prefetched_read_pool_for_remote_filesystem + && MergeTreePrefetchedReadPool::checkReadMethodAllowed(reader_settings.read_settings.remote_fs_method); + + bool allow_prefetched_local = all_parts_are_local + && settings.allow_prefetched_read_pool_for_local_filesystem + && MergeTreePrefetchedReadPool::checkReadMethodAllowed(reader_settings.read_settings.local_fs_method); + + if (allow_prefetched_remote || allow_prefetched_local) { pool = std::make_shared( - max_streams, - sum_marks, - min_marks_for_concurrent_read, std::move(parts_with_range), storage_snapshot, prewhere_info, actions_settings, + reader_settings, required_columns, virt_column_names, - settings.preferred_block_size_bytes, - reader_settings, - context, - use_uncompressed_cache, - all_parts_are_remote, - *data.getSettings()); + pool_settings, + context); } else { pool = std::make_shared( - max_streams, - sum_marks, - min_marks_for_concurrent_read, std::move(parts_with_range), storage_snapshot, prewhere_info, @@ -414,22 +449,28 @@ Pipe ReadFromMergeTree::readFromPool( reader_settings, required_columns, virt_column_names, - context, - false); + pool_settings, + context); } - auto * logger = &Poco::Logger::get(data.getLogName() + " (SelectExecutor)"); - LOG_DEBUG(logger, "Reading approx. {} rows with {} streams", total_rows, max_streams); + LOG_DEBUG(log, "Reading approx. {} rows with {} streams", total_rows, pool_settings.threads); - for (size_t i = 0; i < max_streams; ++i) + /// The reason why we change this setting is because MergeTreeReadPool takes the full task + /// ignoring min_marks_to_read setting in case of remote disk (see MergeTreeReadPool::getTask). + /// In this case, we won't limit the number of rows to read based on adaptive granularity settings. + auto block_size_copy = block_size; + block_size_copy.min_marks_to_read = pool_settings.min_marks_for_concurrent_read; + + Pipes pipes; + for (size_t i = 0; i < pool_settings.threads; ++i) { - auto algorithm = std::make_unique( - i, pool, min_marks_for_concurrent_read, max_block_size, - settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, - data, storage_snapshot, use_uncompressed_cache, - prewhere_info, actions_settings, reader_settings, virt_column_names); + auto algorithm = std::make_unique(i); - auto source = std::make_shared(std::move(algorithm)); + auto processor = std::make_unique( + pool, std::move(algorithm), data, prewhere_info, + actions_settings, block_size_copy, reader_settings, virt_column_names); + + auto source = std::make_shared(std::move(processor)); if (i == 0) source->addTotalRowsApprox(total_rows); @@ -443,17 +484,65 @@ Pipe ReadFromMergeTree::readFromPool( return pipe; } -template -ProcessorPtr ReadFromMergeTree::createSource( - const RangesInDataPart & part, - const Names & required_columns, - bool use_uncompressed_cache, - bool has_limit_below_one_block, - MergeTreeInOrderReadPoolParallelReplicasPtr pool) +Pipe ReadFromMergeTree::readInOrder( + RangesInDataParts parts_with_ranges, + Names required_columns, + PoolSettings pool_settings, + ReadType read_type, + UInt64 limit) { - auto total_rows = part.getRowsCount(); - if (query_info.limit > 0 && query_info.limit < total_rows) - total_rows = query_info.limit; + /// For reading in order it makes sense to read only + /// one range per task to reduce number of read rows. + bool has_limit_below_one_block = read_type != ReadType::Default && limit && limit < block_size.max_block_size_rows; + MergeTreeReadPoolPtr pool; + + if (is_parallel_reading_from_replicas) + { + const auto & client_info = context->getClientInfo(); + ParallelReadingExtension extension + { + .all_callback = all_ranges_callback.value(), + .callback = read_task_callback.value(), + .count_participating_replicas = client_info.count_participating_replicas, + .number_of_current_replica = client_info.number_of_current_replica, + .columns_to_read = required_columns, + }; + + pool_settings.min_marks_for_concurrent_read = static_cast( + pool_settings.min_marks_for_concurrent_read * context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier); + + CoordinationMode mode = read_type == ReadType::InOrder + ? CoordinationMode::WithOrder + : CoordinationMode::ReverseOrder; + + pool = std::make_shared( + std::move(extension), + mode, + parts_with_ranges, + storage_snapshot, + prewhere_info, + actions_settings, + reader_settings, + required_columns, + virt_column_names, + pool_settings, + context); + } + else + { + pool = std::make_shared( + has_limit_below_one_block, + read_type, + parts_with_ranges, + storage_snapshot, + prewhere_info, + actions_settings, + reader_settings, + required_columns, + virt_column_names, + pool_settings, + context); + } /// Actually it means that parallel reading from replicas enabled /// and we have to collaborate with initiator. @@ -462,37 +551,34 @@ ProcessorPtr ReadFromMergeTree::createSource( /// because we don't know actual amount of read rows in case when limit is set. bool set_rows_approx = !is_parallel_reading_from_replicas && !reader_settings.read_in_order; - auto algorithm = std::make_unique( - data, storage_snapshot, part.data_part, part.alter_conversions, max_block_size, preferred_block_size_bytes, - preferred_max_column_in_block_size_bytes, required_columns, part.ranges, use_uncompressed_cache, prewhere_info, - actions_settings, reader_settings, pool, virt_column_names, part.part_index_in_query, has_limit_below_one_block); - - auto source = std::make_shared(std::move(algorithm)); - - if (set_rows_approx) - source->addTotalRowsApprox(total_rows); - - return source; -} - -Pipe ReadFromMergeTree::readInOrder( - RangesInDataParts parts_with_range, - Names required_columns, - ReadType read_type, - bool use_uncompressed_cache, - UInt64 limit, - MergeTreeInOrderReadPoolParallelReplicasPtr pool) -{ Pipes pipes; - /// For reading in order it makes sense to read only - /// one range per task to reduce number of read rows. - bool has_limit_below_one_block = read_type != ReadType::Default && limit && limit < max_block_size; - - for (const auto & part : parts_with_range) + for (size_t i = 0; i < parts_with_ranges.size(); ++i) { - auto source = read_type == ReadType::InReverseOrder - ? createSource(part, required_columns, use_uncompressed_cache, has_limit_below_one_block, pool) - : createSource(part, required_columns, use_uncompressed_cache, has_limit_below_one_block, pool); + const auto & part_with_ranges = parts_with_ranges[i]; + + UInt64 total_rows = part_with_ranges.getRowsCount(); + if (query_info.limit > 0 && query_info.limit < total_rows) + total_rows = query_info.limit; + + LOG_TRACE(log, "Reading {} ranges in{}order from part {}, approx. {} rows starting from {}", + part_with_ranges.ranges.size(), + read_type == ReadType::InReverseOrder ? " reverse " : " ", + part_with_ranges.data_part->name, total_rows, + part_with_ranges.data_part->index_granularity.getMarkStartingRow(part_with_ranges.ranges.front().begin)); + + MergeTreeSelectAlgorithmPtr algorithm; + if (read_type == ReadType::InReverseOrder) + algorithm = std::make_unique(i); + else + algorithm = std::make_unique(i); + + auto processor = std::make_unique( + pool, std::move(algorithm), data, prewhere_info, + actions_settings, block_size, reader_settings, virt_column_names); + + auto source = std::make_shared(std::move(processor)); + if (set_rows_approx) + source->addTotalRowsApprox(total_rows); pipes.emplace_back(std::move(source)); } @@ -511,16 +597,33 @@ Pipe ReadFromMergeTree::readInOrder( } Pipe ReadFromMergeTree::read( - RangesInDataParts parts_with_range, Names required_columns, ReadType read_type, - size_t max_streams, size_t min_marks_for_concurrent_read, bool use_uncompressed_cache) + RangesInDataParts parts_with_range, + Names required_columns, + ReadType read_type, + size_t max_streams, + size_t min_marks_for_concurrent_read, + bool use_uncompressed_cache) { + const auto & settings = context->getSettingsRef(); + size_t sum_marks = parts_with_range.getMarksCountAllParts(); + + PoolSettings pool_settings + { + .threads = max_streams, + .sum_marks = sum_marks, + .min_marks_for_concurrent_read = min_marks_for_concurrent_read, + .preferred_block_size_bytes = settings.preferred_block_size_bytes, + .use_uncompressed_cache = use_uncompressed_cache, + .use_const_size_tasks_for_remote_reading = settings.merge_tree_use_const_size_tasks_for_remote_reading, + }; + if (read_type == ReadType::ParallelReplicas) - return readFromPoolParallelReplicas(parts_with_range, required_columns, max_streams, min_marks_for_concurrent_read, use_uncompressed_cache); + return readFromPoolParallelReplicas(std::move(parts_with_range), std::move(required_columns), std::move(pool_settings)); if (read_type == ReadType::Default && max_streams > 1) - return readFromPool(parts_with_range, required_columns, max_streams, min_marks_for_concurrent_read, use_uncompressed_cache); + return readFromPool(std::move(parts_with_range), std::move(required_columns), std::move(pool_settings)); - auto pipe = readInOrder(parts_with_range, required_columns, read_type, use_uncompressed_cache, /*limit */0, /*pool*/nullptr); + auto pipe = readInOrder(parts_with_range, required_columns, pool_settings, read_type, /*limit=*/ 0); /// Use ConcatProcessor to concat sources together. /// It is needed to read in parts order (and so in PK order) if single thread is used. @@ -543,7 +646,6 @@ struct PartRangesReadInfo size_t index_granularity_bytes = 0; size_t max_marks_to_use_cache = 0; size_t min_marks_for_concurrent_read = 0; - bool use_uncompressed_cache = false; PartRangesReadInfo( @@ -621,8 +723,12 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreams(RangesInDataParts && parts_ auto read_type = is_parallel_reading_from_replicas ? ReadType::ParallelReplicas : ReadType::Default; - return read(std::move(parts_with_ranges), column_names, read_type, - num_streams, info.min_marks_for_concurrent_read, info.use_uncompressed_cache); + return read(std::move(parts_with_ranges), + column_names, + read_type, + num_streams, + info.min_marks_for_concurrent_read, + info.use_uncompressed_cache); } static ActionsDAGPtr createProjection(const Block & header) @@ -673,7 +779,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( /// Let's split ranges to avoid reading much data. auto split_ranges - = [rows_granularity = data_settings->index_granularity, my_max_block_size = max_block_size](const auto & ranges, int direction) + = [rows_granularity = data_settings->index_granularity, my_max_block_size = block_size.max_block_size_rows] + (const auto & ranges, int direction) { MarkRanges new_ranges; const size_t max_marks_in_range = (my_max_block_size + rows_granularity - 1) / rows_granularity; @@ -720,109 +827,94 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( const size_t min_marks_per_stream = (info.sum_marks - 1) / num_streams + 1; bool need_preliminary_merge = (parts_with_ranges.size() > settings.read_in_order_two_level_merge_threshold); - std::vector splitted_parts_and_ranges; - splitted_parts_and_ranges.reserve(num_streams); + const auto read_type = input_order_info->direction == 1 ? ReadType::InOrder : ReadType::InReverseOrder; - const auto read_type = input_order_info->direction == 1 - ? ReadFromMergeTree::ReadType::InOrder - : ReadFromMergeTree::ReadType::InReverseOrder; - - MergeTreeInOrderReadPoolParallelReplicasPtr pool; - - if (is_parallel_reading_from_replicas) + PoolSettings pool_settings { - const auto & client_info = context->getClientInfo(); - auto extension = ParallelReadingExtension - { - .all_callback = all_ranges_callback.value(), - .callback = read_task_callback.value(), - .count_participating_replicas = client_info.count_participating_replicas, - .number_of_current_replica = client_info.number_of_current_replica, - .columns_to_read = column_names - }; - - auto min_marks_for_concurrent_read = info.min_marks_for_concurrent_read; - min_marks_for_concurrent_read = static_cast(min_marks_for_concurrent_read * settings.parallel_replicas_single_task_marks_count_multiplier); - - pool = std::make_shared( - parts_with_ranges, - extension, - read_type == ReadFromMergeTree::ReadType::InOrder ? CoordinationMode::WithOrder : CoordinationMode::ReverseOrder, - min_marks_for_concurrent_read); - } - - - for (size_t i = 0; i < num_streams && !parts_with_ranges.empty(); ++i) - { - size_t need_marks = min_marks_per_stream; - RangesInDataParts new_parts; - - /// Loop over parts. - /// We will iteratively take part or some subrange of a part from the back - /// and assign a stream to read from it. - while (need_marks > 0 && !parts_with_ranges.empty()) - { - RangesInDataPart part = parts_with_ranges.back(); - parts_with_ranges.pop_back(); - size_t & marks_in_part = info.sum_marks_in_parts.back(); - - /// We will not take too few rows from a part. - if (marks_in_part >= info.min_marks_for_concurrent_read && need_marks < info.min_marks_for_concurrent_read) - need_marks = info.min_marks_for_concurrent_read; - - /// Do not leave too few rows in the part. - if (marks_in_part > need_marks && marks_in_part - need_marks < info.min_marks_for_concurrent_read) - need_marks = marks_in_part; - - MarkRanges ranges_to_get_from_part; - - /// We take full part if it contains enough marks or - /// if we know limit and part contains less than 'limit' rows. - bool take_full_part = marks_in_part <= need_marks || (input_order_info->limit && input_order_info->limit < part.getRowsCount()); - - /// We take the whole part if it is small enough. - if (take_full_part) - { - ranges_to_get_from_part = part.ranges; - - need_marks -= marks_in_part; - info.sum_marks_in_parts.pop_back(); - } - else - { - /// Loop through ranges in part. Take enough ranges to cover "need_marks". - while (need_marks > 0) - { - if (part.ranges.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected end of ranges while spreading marks among streams"); - - MarkRange & range = part.ranges.front(); - - const size_t marks_in_range = range.end - range.begin; - const size_t marks_to_get_from_range = std::min(marks_in_range, need_marks); - - ranges_to_get_from_part.emplace_back(range.begin, range.begin + marks_to_get_from_range); - range.begin += marks_to_get_from_range; - marks_in_part -= marks_to_get_from_range; - need_marks -= marks_to_get_from_range; - if (range.begin == range.end) - part.ranges.pop_front(); - } - parts_with_ranges.emplace_back(part); - } - - ranges_to_get_from_part = split_ranges(ranges_to_get_from_part, input_order_info->direction); - new_parts.emplace_back(part.data_part, part.alter_conversions, part.part_index_in_query, std::move(ranges_to_get_from_part)); - } - - splitted_parts_and_ranges.emplace_back(std::move(new_parts)); - } + .min_marks_for_concurrent_read = info.min_marks_for_concurrent_read, + .preferred_block_size_bytes = settings.preferred_block_size_bytes, + .use_uncompressed_cache = info.use_uncompressed_cache, + }; Pipes pipes; - for (auto & item : splitted_parts_and_ranges) + /// For parallel replicas the split will be performed on the initiator side. + if (is_parallel_reading_from_replicas) { - pipes.emplace_back(readInOrder(std::move(item), column_names, read_type, - info.use_uncompressed_cache, input_order_info->limit, pool)); + pipes.emplace_back(readInOrder(std::move(parts_with_ranges), column_names, pool_settings, read_type, input_order_info->limit)); + } + else + { + std::vector splitted_parts_and_ranges; + splitted_parts_and_ranges.reserve(num_streams); + + for (size_t i = 0; i < num_streams && !parts_with_ranges.empty(); ++i) + { + size_t need_marks = min_marks_per_stream; + RangesInDataParts new_parts; + + /// Loop over parts. + /// We will iteratively take part or some subrange of a part from the back + /// and assign a stream to read from it. + while (need_marks > 0 && !parts_with_ranges.empty()) + { + RangesInDataPart part = parts_with_ranges.back(); + parts_with_ranges.pop_back(); + size_t & marks_in_part = info.sum_marks_in_parts.back(); + + /// We will not take too few rows from a part. + if (marks_in_part >= info.min_marks_for_concurrent_read && need_marks < info.min_marks_for_concurrent_read) + need_marks = info.min_marks_for_concurrent_read; + + /// Do not leave too few rows in the part. + if (marks_in_part > need_marks && marks_in_part - need_marks < info.min_marks_for_concurrent_read) + need_marks = marks_in_part; + + MarkRanges ranges_to_get_from_part; + + /// We take full part if it contains enough marks or + /// if we know limit and part contains less than 'limit' rows. + bool take_full_part = marks_in_part <= need_marks || (input_order_info->limit && input_order_info->limit < part.getRowsCount()); + + /// We take the whole part if it is small enough. + if (take_full_part) + { + ranges_to_get_from_part = part.ranges; + + need_marks -= marks_in_part; + info.sum_marks_in_parts.pop_back(); + } + else + { + /// Loop through ranges in part. Take enough ranges to cover "need_marks". + while (need_marks > 0) + { + if (part.ranges.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected end of ranges while spreading marks among streams"); + + MarkRange & range = part.ranges.front(); + + const size_t marks_in_range = range.end - range.begin; + const size_t marks_to_get_from_range = std::min(marks_in_range, need_marks); + + ranges_to_get_from_part.emplace_back(range.begin, range.begin + marks_to_get_from_range); + range.begin += marks_to_get_from_range; + marks_in_part -= marks_to_get_from_range; + need_marks -= marks_to_get_from_range; + if (range.begin == range.end) + part.ranges.pop_front(); + } + parts_with_ranges.emplace_back(part); + } + + ranges_to_get_from_part = split_ranges(ranges_to_get_from_part, input_order_info->direction); + new_parts.emplace_back(part.data_part, part.alter_conversions, part.part_index_in_query, std::move(ranges_to_get_from_part)); + } + + splitted_parts_and_ranges.emplace_back(std::move(new_parts)); + } + + for (auto && item : splitted_parts_and_ranges) + pipes.emplace_back(readInOrder(std::move(item), column_names, pool_settings, read_type, input_order_info->limit)); } Block pipe_header; @@ -856,7 +948,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( if (pipe.numOutputPorts() > 1) { auto transform = std::make_shared( - pipe.getHeader(), pipe.numOutputPorts(), sort_description, max_block_size, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch); + pipe.getHeader(), pipe.numOutputPorts(), sort_description, block_size.max_block_size_rows, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch); pipe.addTransform(std::move(transform)); } @@ -889,7 +981,7 @@ static void addMergingFinal( const SortDescription & sort_description, MergeTreeData::MergingParams merging_params, Names partition_key_columns, - size_t max_block_size) + size_t max_block_size_rows) { const auto & header = pipe.getHeader(); size_t num_outputs = pipe.numOutputPorts(); @@ -902,31 +994,31 @@ static void addMergingFinal( { case MergeTreeData::MergingParams::Ordinary: return std::make_shared(header, num_outputs, - sort_description, max_block_size, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch); + sort_description, max_block_size_rows, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch); case MergeTreeData::MergingParams::Collapsing: return std::make_shared(header, num_outputs, - sort_description, merging_params.sign_column, true, max_block_size, /*max_block_size_bytes=*/0); + sort_description, merging_params.sign_column, true, max_block_size_rows, /*max_block_size_bytes=*/0); case MergeTreeData::MergingParams::Summing: return std::make_shared(header, num_outputs, - sort_description, merging_params.columns_to_sum, partition_key_columns, max_block_size, /*max_block_size_bytes=*/0); + sort_description, merging_params.columns_to_sum, partition_key_columns, max_block_size_rows, /*max_block_size_bytes=*/0); case MergeTreeData::MergingParams::Aggregating: return std::make_shared(header, num_outputs, - sort_description, max_block_size, /*max_block_size_bytes=*/0); + sort_description, max_block_size_rows, /*max_block_size_bytes=*/0); case MergeTreeData::MergingParams::Replacing: return std::make_shared(header, num_outputs, - sort_description, merging_params.is_deleted_column, merging_params.version_column, max_block_size, /*max_block_size_bytes=*/0, /*out_row_sources_buf_*/ nullptr, /*use_average_block_sizes*/ false, /*cleanup*/ !merging_params.is_deleted_column.empty()); + sort_description, merging_params.is_deleted_column, merging_params.version_column, max_block_size_rows, /*max_block_size_bytes=*/0, /*out_row_sources_buf_*/ nullptr, /*use_average_block_sizes*/ false, /*cleanup*/ !merging_params.is_deleted_column.empty()); case MergeTreeData::MergingParams::VersionedCollapsing: return std::make_shared(header, num_outputs, - sort_description, merging_params.sign_column, max_block_size, /*max_block_size_bytes=*/0); + sort_description, merging_params.sign_column, max_block_size_rows, /*max_block_size_bytes=*/0); case MergeTreeData::MergingParams::Graphite: return std::make_shared(header, num_outputs, - sort_description, max_block_size, /*max_block_size_bytes=*/0, merging_params.graphite_params, now); + sort_description, max_block_size_rows, /*max_block_size_bytes=*/0, merging_params.graphite_params, now); } UNREACHABLE(); @@ -996,7 +1088,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// MergeTreeReadPool and MergeTreeThreadSelectProcessor for parallel select. if (num_streams > 1 && settings.do_not_merge_across_partitions_select_final && std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && - parts_to_merge_ranges[range_index]->data_part->info.level > 0) + parts_to_merge_ranges[range_index]->data_part->info.level > 0 + && data.merging_params.is_deleted_column.empty()) { sum_marks_in_lonely_parts += parts_to_merge_ranges[range_index]->getMarksCount(); lonely_parts.push_back(std::move(*parts_to_merge_ranges[range_index])); @@ -1021,11 +1114,12 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( return this->read( std::move(parts), column_names, - ReadFromMergeTree::ReadType::InOrder, + ReadType::InOrder, 1 /* num_streams */, 0 /* min_marks_for_concurrent_read */, info.use_uncompressed_cache); }; + pipes = buildPipesForReadingByPKRanges( metadata_for_reading->getPrimaryKey(), sorting_expr, @@ -1037,7 +1131,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( else { pipes.emplace_back(read( - std::move(new_parts), column_names, ReadFromMergeTree::ReadType::InOrder, num_streams, 0, info.use_uncompressed_cache)); + std::move(new_parts), column_names, ReadType::InOrder, num_streams, 0, info.use_uncompressed_cache)); pipes.back().addSimpleTransform([sorting_expr](const Block & header) { return std::make_shared(header, sorting_expr); }); @@ -1052,7 +1146,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// with level > 0 then we won't postprocess this part if (settings.do_not_merge_across_partitions_select_final && std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && - parts_to_merge_ranges[range_index]->data_part->info.level > 0) + parts_to_merge_ranges[range_index]->data_part->info.level > 0 && + data.merging_params.is_deleted_column.empty()) { partition_pipes.emplace_back(Pipe::unitePipes(std::move(pipes))); continue; @@ -1077,7 +1172,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( sort_description, data.merging_params, partition_key_columns, - max_block_size); + block_size.max_block_size_rows); partition_pipes.emplace_back(Pipe::unitePipes(std::move(pipes))); } @@ -1097,7 +1192,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( if (sum_marks_in_lonely_parts < num_streams_for_lonely_parts * min_marks_for_concurrent_read && lonely_parts.size() < num_streams_for_lonely_parts) num_streams_for_lonely_parts = std::max((sum_marks_in_lonely_parts + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, lonely_parts.size()); - auto pipe = read(std::move(lonely_parts), column_names, ReadFromMergeTree::ReadType::Default, + auto pipe = read(std::move(lonely_parts), column_names, ReadType::Default, num_streams_for_lonely_parts, min_marks_for_concurrent_read, info.use_uncompressed_cache); /// Drop temporary columns, added by 'sorting_key_expr' @@ -1182,6 +1277,7 @@ static void buildIndexes( std::optional & indexes, ActionsDAGPtr filter_actions_dag, const MergeTreeData & data, + const MergeTreeData::DataPartsVector & parts, const ContextPtr & context, const SelectQueryInfo & query_info, const StorageMetadataPtr & metadata_snapshot) @@ -1204,7 +1300,7 @@ static void buildIndexes( context, primary_key_column_names, primary_key.expression, - array_join_name_set}, {}, {}, {}, false}); + array_join_name_set}, {}, {}, {}, false, {}}); } else { @@ -1212,7 +1308,7 @@ static void buildIndexes( query_info, context, primary_key_column_names, - primary_key.expression}, {}, {}, {}, false}); + primary_key.expression}, {}, {}, {}, false, {}}); } if (metadata_snapshot->hasPartitionKey()) @@ -1225,6 +1321,9 @@ static void buildIndexes( indexes->partition_pruner.emplace(metadata_snapshot, filter_actions_dag, context, false /* strict */); } + /// TODO Support row_policy_filter and additional_filters + indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, filter_actions_dag, context); + indexes->use_skip_indexes = settings.use_skip_indexes; bool final = query_info.isFinal(); @@ -1302,7 +1401,7 @@ static void buildIndexes( void ReadFromMergeTree::applyFilters() { auto filter_actions_dag = buildFilterDAG(context, prewhere_info, filter_nodes, query_info); - buildIndexes(indexes, filter_actions_dag, data, context, query_info, metadata_for_reading); + buildIndexes(indexes, filter_actions_dag, data, prepared_parts, context, query_info, metadata_for_reading); } MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( @@ -1380,11 +1479,6 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( size_t total_parts = parts.size(); - /// TODO Support row_policy_filter and additional_filters - auto part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, query_info.query, context); - if (part_values && part_values->empty()) - return std::make_shared(MergeTreeDataSelectAnalysisResult{.result = std::move(result)}); - result.column_names_to_read = real_column_names; /// If there are only virtual columns in the query, you must request at least one non-virtual one. @@ -1399,7 +1493,10 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( const Names & primary_key_column_names = primary_key.column_names; if (!indexes) - buildIndexes(indexes, query_info.filter_actions_dag, data, context, query_info, metadata_snapshot); + buildIndexes(indexes, query_info.filter_actions_dag, data, parts, context, query_info, metadata_snapshot); + + if (indexes->part_values && indexes->part_values->empty()) + return std::make_shared(MergeTreeDataSelectAnalysisResult{.result = std::move(result)}); if (settings.force_primary_key && indexes->key_condition.alwaysUnknownOrTrue()) { @@ -1423,7 +1520,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( indexes->minmax_idx_condition, parts, alter_conversions, - part_values, + indexes->part_values, metadata_snapshot_base, data, context, @@ -1559,11 +1656,19 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info { query_info.prewhere_info = prewhere_info_value; prewhere_info = prewhere_info_value; - output_stream = DataStream{.header = IMergeTreeSelectAlgorithm::transformHeader( + + output_stream = DataStream{.header = MergeTreeSelectProcessor::transformHeader( storage_snapshot->getSampleBlockForColumns(real_column_names), prewhere_info_value, data.getPartitionValueType(), virt_column_names)}; + + updateSortDescriptionForOutputStream( + *output_stream, + storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(), + getSortDirection(), + query_info.getInputOrderInfo(), + prewhere_info); } bool ReadFromMergeTree::requestOutputEachPartitionThroughSeparatePort() diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 3e3edd4dc5c..9b288196746 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -78,34 +78,16 @@ public: struct IndexStat { IndexType type; - std::string name; - std::string description; - std::string condition; - std::vector used_keys; + std::string name = {}; + std::string description = {}; + std::string condition = {}; + std::vector used_keys = {}; size_t num_parts_after; size_t num_granules_after; }; using IndexStats = std::vector; - - enum class ReadType - { - /// By default, read will use MergeTreeReadPool and return pipe with num_streams outputs. - /// If num_streams == 1, will read without pool, in order specified in parts. - Default, - /// Read in sorting key order. - /// Returned pipe will have the number of ports equals to parts.size(). - /// Parameter num_streams_ is ignored in this case. - /// User should add MergingSorted itself if needed. - InOrder, - /// The same as InOrder, but in reverse order. - /// For every part, read ranges and granules from end to begin. Also add ReverseTransform. - InReverseOrder, - /// A special type of reading where every replica - /// talks to a remote coordinator (which is located on the initiator node) - /// and who spreads marks and parts across them. - ParallelReplicas, - }; + using ReadType = MergeTreeReadType; struct AnalysisResult { @@ -113,7 +95,7 @@ public: MergeTreeDataSelectSamplingData sampling; IndexStats index_stats; Names column_names_to_read; - ReadFromMergeTree::ReadType read_type = ReadFromMergeTree::ReadType::Default; + ReadType read_type = ReadType::Default; UInt64 total_parts = 0; UInt64 parts_before_pk = 0; UInt64 selected_parts = 0; @@ -171,6 +153,7 @@ public: std::optional minmax_idx_condition; UsefulSkipIndexes skip_indexes; bool use_skip_indexes; + std::optional> part_values; }; static MergeTreeDataSelectAnalysisResultPtr selectRangesToRead( @@ -222,7 +205,7 @@ public: const MergeTreeData::DataPartsVector & getParts() const { return prepared_parts; } const MergeTreeData & getMergeTreeData() const { return data; } - size_t getMaxBlockSize() const { return max_block_size; } + size_t getMaxBlockSize() const { return block_size.max_block_size_rows; } size_t getNumStreams() const { return requested_num_streams; } bool isParallelReadingEnabled() const { return read_task_callback != std::nullopt; } @@ -270,12 +253,10 @@ private: StorageMetadataPtr metadata_for_reading; ContextPtr context; + const MergeTreeReadTask::BlockSizeParams block_size; - const size_t max_block_size; size_t requested_num_streams; size_t output_streams_limit = 0; - const size_t preferred_block_size_bytes; - const size_t preferred_max_column_in_block_size_bytes; const bool sample_factor_column_queried; /// Used for aggregation optimisation (see DB::QueryPlanOptimizations::tryAggregateEachPartitionIndependently). @@ -291,16 +272,14 @@ private: UInt64 selected_rows = 0; UInt64 selected_marks = 0; + using PoolSettings = MergeTreeReadPoolBase::PoolSettings; + Pipe read(RangesInDataParts parts_with_range, Names required_columns, ReadType read_type, size_t max_streams, size_t min_marks_for_concurrent_read, bool use_uncompressed_cache); - Pipe readFromPool(RangesInDataParts parts_with_ranges, Names required_columns, size_t max_streams, size_t min_marks_for_concurrent_read, bool use_uncompressed_cache); - Pipe readFromPoolParallelReplicas(RangesInDataParts parts_with_ranges, Names required_columns, size_t max_streams, size_t min_marks_for_concurrent_read, bool use_uncompressed_cache); - Pipe readInOrder(RangesInDataParts parts_with_range, Names required_columns, ReadType read_type, bool use_uncompressed_cache, UInt64 limit, MergeTreeInOrderReadPoolParallelReplicasPtr pool); + Pipe readFromPool(RangesInDataParts parts_with_range, Names required_columns, PoolSettings pool_settings); + Pipe readFromPoolParallelReplicas(RangesInDataParts parts_with_range, Names required_columns, PoolSettings pool_settings); + Pipe readInOrder(RangesInDataParts parts_with_ranges, Names required_columns, PoolSettings pool_settings, ReadType read_type, UInt64 limit); - template - ProcessorPtr createSource(const RangesInDataPart & part, const Names & required_columns, bool use_uncompressed_cache, bool has_limit_below_one_block, MergeTreeInOrderReadPoolParallelReplicasPtr pool); - - Pipe spreadMarkRanges( - RangesInDataParts && parts_with_ranges, size_t num_streams, AnalysisResult & result, ActionsDAGPtr & result_projection); + Pipe spreadMarkRanges(RangesInDataParts && parts_with_ranges, size_t num_streams, AnalysisResult & result, ActionsDAGPtr & result_projection); Pipe groupStreamsByPartition(AnalysisResult & result, ActionsDAGPtr & result_projection); diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 5cc13f45df4..f389e5a9e1e 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -103,7 +103,8 @@ ReadFromRemote::ReadFromRemote( Tables external_tables_, Poco::Logger * log_, UInt32 shard_count_, - std::shared_ptr storage_limits_) + std::shared_ptr storage_limits_, + const String & cluster_name_) : ISourceStep(DataStream{.header = std::move(header_)}) , shards(std::move(shards_)) , stage(stage_) @@ -116,6 +117,7 @@ ReadFromRemote::ReadFromRemote( , storage_limits(std::move(storage_limits_)) , log(log_) , shard_count(shard_count_) + , cluster_name(cluster_name_) { } @@ -162,7 +164,9 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream if (my_table_func_ptr) try_results = my_shard.shard_info.pool->getManyForTableFunction(timeouts, ¤t_settings, PoolMode::GET_MANY); else - try_results = my_shard.shard_info.pool->getManyChecked(timeouts, ¤t_settings, PoolMode::GET_MANY, my_main_table.getQualifiedName()); + try_results = my_shard.shard_info.pool->getManyChecked( + timeouts, ¤t_settings, PoolMode::GET_MANY, + my_shard.main_table ? my_shard.main_table.getQualifiedName() : my_main_table.getQualifiedName()); } catch (const Exception & ex) { @@ -183,7 +187,7 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream if (try_results.empty() || local_delay < max_remote_delay) { auto plan = createLocalPlan( - query, header, my_context, my_stage, my_shard.shard_info.shard_num, my_shard_count, 0, 0, /*coordinator=*/nullptr); + query, header, my_context, my_stage, my_shard.shard_info.shard_num, my_shard_count); return std::move(*plan->buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(my_context), @@ -232,16 +236,42 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact scalars["_shard_num"] = Block{{DataTypeUInt32().createColumnConst(1, shard.shard_info.shard_num), std::make_shared(), "_shard_num"}}; + if (context->getParallelReplicasMode() == Context::ParallelReplicasMode::READ_TASKS) + { + if (context->getSettingsRef().cluster_for_parallel_replicas.changed) + { + const String cluster_for_parallel_replicas = context->getSettingsRef().cluster_for_parallel_replicas; + if (cluster_for_parallel_replicas != cluster_name) + LOG_INFO(log, "cluster_for_parallel_replicas has been set for the query but has no effect: {}. Distributed table cluster is used: {}", + cluster_for_parallel_replicas, cluster_name); + } + + LOG_TRACE(&Poco::Logger::get("ReadFromRemote"), "Setting `cluster_for_parallel_replicas` to {}", cluster_name); + context->setSetting("cluster_for_parallel_replicas", cluster_name); + } + std::shared_ptr remote_query_executor; remote_query_executor = std::make_shared( shard.shard_info.pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage); remote_query_executor->setLogger(log); - remote_query_executor->setPoolMode(PoolMode::GET_MANY); + + if (context->getParallelReplicasMode() == Context::ParallelReplicasMode::READ_TASKS) + { + // when doing parallel reading from replicas (ParallelReplicasMode::READ_TASKS) on a shard: + // establish a connection to a replica on the shard, the replica will instantiate coordinator to manage parallel reading from replicas on the shard. + // The coordinator will return query result from the shard. + // Only one coordinator per shard is necessary. Therefore using PoolMode::GET_ONE to establish only one connection per shard. + // Using PoolMode::GET_MANY for this mode will(can) lead to instantiation of several coordinators (depends on max_parallel_replicas setting) + // each will execute parallel reading from replicas, so the query result will be multiplied by the number of created coordinators + remote_query_executor->setPoolMode(PoolMode::GET_ONE); + } + else + remote_query_executor->setPoolMode(PoolMode::GET_MANY); if (!table_func_ptr) - remote_query_executor->setMainTable(main_table); + remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table); pipes.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); addConvertingActions(pipes.back(), output_stream->header); @@ -275,7 +305,6 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( Block header_, QueryProcessingStage::Enum stage_, StorageID main_table_, - ASTPtr table_func_ptr_, ContextMutablePtr context_, ThrottlerPtr throttler_, Scalars scalars_, @@ -288,7 +317,6 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( , coordinator(std::move(coordinator_)) , stage(std::move(stage_)) , main_table(std::move(main_table_)) - , table_func_ptr(table_func_ptr_) , context(context_) , throttler(throttler_) , scalars(scalars_) diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index d4005d81f1b..a2486e1eaa1 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -22,6 +22,7 @@ using ThrottlerPtr = std::shared_ptr; class ReadFromRemote final : public ISourceStep { public: + /// @param main_table_ if Shards contains main_table then this parameter will be ignored ReadFromRemote( ClusterProxy::SelectStreamFactory::Shards shards_, Block header_, @@ -34,7 +35,8 @@ public: Tables external_tables_, Poco::Logger * log_, UInt32 shard_count_, - std::shared_ptr storage_limits_); + std::shared_ptr storage_limits_, + const String & cluster_name_); String getName() const override { return "ReadFromRemote"; } @@ -54,8 +56,9 @@ private: Tables external_tables; std::shared_ptr storage_limits; Poco::Logger * log; - UInt32 shard_count; + const String cluster_name; + void addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard); void addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard); }; @@ -71,7 +74,6 @@ public: Block header_, QueryProcessingStage::Enum stage_, StorageID main_table_, - ASTPtr table_func_ptr_, ContextMutablePtr context_, ThrottlerPtr throttler_, Scalars scalars_, @@ -95,7 +97,6 @@ private: ParallelReplicasReadingCoordinatorPtr coordinator; QueryProcessingStage::Enum stage; StorageID main_table; - ASTPtr table_func_ptr; ContextMutablePtr context; ThrottlerPtr throttler; Scalars scalars; diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h index 371a24ac6f2..a72cab05754 100644 --- a/src/Processors/QueryPlan/SortingStep.h +++ b/src/Processors/QueryPlan/SortingStep.h @@ -27,6 +27,8 @@ public: size_t max_bytes_before_external_sort = 0; TemporaryDataOnDiskScopePtr tmp_data = nullptr; size_t min_free_disk_space = 0; + UInt64 partial_result_limit = 0; + UInt64 partial_result_duration_ms = 0; explicit Settings(const Context & context); explicit Settings(size_t max_block_size_); diff --git a/src/Processors/ResizeProcessor.cpp b/src/Processors/ResizeProcessor.cpp index 8167fae9baf..57b878f7d39 100644 --- a/src/Processors/ResizeProcessor.cpp +++ b/src/Processors/ResizeProcessor.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { @@ -138,11 +137,11 @@ ResizeProcessor::Status ResizeProcessor::prepare() while (!is_end_input() && !is_end_output()) { auto output = get_next_out(); - auto input = get_next_input(); if (output == outputs.end()) return get_status_if_no_outputs(); + auto input = get_next_input(); if (input == inputs.end()) return get_status_if_no_inputs(); @@ -164,10 +163,7 @@ IProcessor::Status ResizeProcessor::prepare(const PortNumbers & updated_inputs, initialized = true; for (auto & input : inputs) - { - input.setNeeded(); input_ports.push_back({.port = &input, .status = InputStatus::NotActive}); - } for (auto & output : outputs) output_ports.push_back({.port = &output, .status = OutputStatus::NotActive}); @@ -197,6 +193,13 @@ IProcessor::Status ResizeProcessor::prepare(const PortNumbers & updated_inputs, } } + if (!is_reading_started && !waiting_outputs.empty()) + { + for (auto & input : inputs) + input.setNeeded(); + is_reading_started = true; + } + if (num_finished_outputs == outputs.size()) { for (auto & input : inputs) diff --git a/src/Processors/ResizeProcessor.h b/src/Processors/ResizeProcessor.h index 766c39172a2..61e35c54364 100644 --- a/src/Processors/ResizeProcessor.h +++ b/src/Processors/ResizeProcessor.h @@ -43,6 +43,7 @@ private: std::queue waiting_outputs; std::queue inputs_with_data; bool initialized = false; + bool is_reading_started = false; enum class OutputStatus { diff --git a/src/Processors/Sources/ConstChunkGenerator.h b/src/Processors/Sources/ConstChunkGenerator.h new file mode 100644 index 00000000000..a0e35ebbee5 --- /dev/null +++ b/src/Processors/Sources/ConstChunkGenerator.h @@ -0,0 +1,38 @@ +#pragma once + +#include + + +namespace DB +{ + +/// Source that generates chunks with constant columns and +/// size up to max_block_size with total rows total_num_rows. +class ConstChunkGenerator : public ISource +{ +public: + ConstChunkGenerator(Block header, size_t total_num_rows, size_t max_block_size_) + : ISource(std::move(header)) + , remaining_rows(total_num_rows), max_block_size(max_block_size_) + { + } + + String getName() const override { return "ConstChunkGenerator"; } + +protected: + Chunk generate() override + { + if (!remaining_rows) + return {}; + + size_t num_rows = std::min(max_block_size, remaining_rows); + remaining_rows -= num_rows; + return cloneConstWithDefault(Chunk{getPort().getHeader().getColumns(), 0}, num_rows); + } + +private: + size_t remaining_rows; + size_t max_block_size; +}; + +} diff --git a/src/Processors/Sources/DelayedSource.cpp b/src/Processors/Sources/DelayedSource.cpp index ee7fd757949..f7928f89015 100644 --- a/src/Processors/Sources/DelayedSource.cpp +++ b/src/Processors/Sources/DelayedSource.cpp @@ -133,6 +133,12 @@ void DelayedSource::work() processors = Pipe::detachProcessors(std::move(pipe)); + if (rows_before_limit) + { + for (auto & processor : processors) + processor->setRowsBeforeLimitCounter(rows_before_limit); + } + synchronizePorts(totals_output, totals, header, processors); synchronizePorts(extremes_output, extremes, header, processors); } @@ -148,7 +154,9 @@ Processors DelayedSource::expandPipeline() inputs.emplace_back(outputs.front().getHeader(), this); /// Connect checks that header is same for ports. connect(*output, inputs.back()); - inputs.back().setNeeded(); + + if (output == main_output) + inputs.back().setNeeded(); } /// Executor will check that all processors are connected. diff --git a/src/Processors/Sources/DelayedSource.h b/src/Processors/Sources/DelayedSource.h index f069bde455f..0b2751e18a6 100644 --- a/src/Processors/Sources/DelayedSource.h +++ b/src/Processors/Sources/DelayedSource.h @@ -30,10 +30,13 @@ public: OutputPort * getTotalsPort() { return totals; } OutputPort * getExtremesPort() { return extremes; } + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit.swap(counter); } + private: QueryPlanResourceHolder resources; Creator creator; Processors processors; + RowsBeforeLimitCounterPtr rows_before_limit; /// Outputs for DelayedSource. OutputPort * main = nullptr; diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index 115e24d5740..a4e81a081a4 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -59,7 +59,6 @@ PostgreSQLSource::PostgreSQLSource( init(sample_block); } - template void PostgreSQLSource::init(const Block & sample_block) { @@ -82,7 +81,8 @@ void PostgreSQLSource::onStart() { try { - tx = std::make_shared(connection_holder->get()); + auto & conn = connection_holder->get(); + tx = std::make_shared(conn); } catch (const pqxx::broken_connection &) { @@ -180,6 +180,27 @@ void PostgreSQLSource::onFinish() if (tx && auto_commit) tx->commit(); + + is_completed = true; +} + +template +PostgreSQLSource::~PostgreSQLSource() +{ + if (!is_completed) + { + try + { + stream.reset(); + tx.reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + connection_holder->setBroken(); + } } template diff --git a/src/Processors/Sources/PostgreSQLSource.h b/src/Processors/Sources/PostgreSQLSource.h index 312e9f5fb18..8a648ae8bb5 100644 --- a/src/Processors/Sources/PostgreSQLSource.h +++ b/src/Processors/Sources/PostgreSQLSource.h @@ -28,6 +28,8 @@ public: String getName() const override { return "PostgreSQL"; } + ~PostgreSQLSource() override; + protected: PostgreSQLSource( std::shared_ptr tx_, @@ -54,6 +56,7 @@ private: ExternalResultDescription description; bool started = false; + bool is_completed = false; postgres::ConnectionHolderPtr connection_holder; diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 310a1d33e28..74ab3649068 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -77,6 +77,8 @@ std::optional RemoteSource::tryGenerate() { if (value.total_rows_to_read) addTotalRowsApprox(value.total_rows_to_read); + if (value.total_bytes_to_read) + addTotalBytes(value.total_bytes_to_read); progress(value.read_rows, value.read_bytes); }); diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp index ace175f251c..2625a7cdabb 100644 --- a/src/Processors/Sources/ShellCommandSource.cpp +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -12,6 +13,7 @@ #include #include #include +#include namespace DB @@ -21,10 +23,10 @@ namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; extern const int TIMEOUT_EXCEEDED; - extern const int CANNOT_FCNTL; extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; - extern const int CANNOT_POLL; extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR; + extern const int CANNOT_FCNTL; + extern const int CANNOT_POLL; } static bool tryMakeFdNonBlocking(int fd) @@ -64,19 +66,14 @@ static void makeFdBlocking(int fd) throwFromErrno("Cannot set blocking mode of pipe", ErrorCodes::CANNOT_FCNTL); } -static bool pollFd(int fd, size_t timeout_milliseconds, int events) +static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_milliseconds) { - pollfd pfd; - pfd.fd = fd; - pfd.events = events; - pfd.revents = 0; - int res; while (true) { Stopwatch watch; - res = poll(&pfd, 1, static_cast(timeout_milliseconds)); + res = poll(pfds, static_cast(num), static_cast(timeout_milliseconds)); if (res < 0) { @@ -94,17 +91,44 @@ static bool pollFd(int fd, size_t timeout_milliseconds, int events) } } - return res > 0; + return res; +} + +static bool pollFd(int fd, size_t timeout_milliseconds, int events) +{ + pollfd pfd; + pfd.fd = fd; + pfd.events = events; + pfd.revents = 0; + + return pollWithTimeout(&pfd, 1, timeout_milliseconds) > 0; } class TimeoutReadBufferFromFileDescriptor : public BufferWithOwnMemory { public: - explicit TimeoutReadBufferFromFileDescriptor(int fd_, size_t timeout_milliseconds_) - : fd(fd_) + explicit TimeoutReadBufferFromFileDescriptor( + int stdout_fd_, + int stderr_fd_, + size_t timeout_milliseconds_, + ExternalCommandStderrReaction stderr_reaction_) + : stdout_fd(stdout_fd_) + , stderr_fd(stderr_fd_) , timeout_milliseconds(timeout_milliseconds_) + , stderr_reaction(stderr_reaction_) { - makeFdNonBlocking(fd); + makeFdNonBlocking(stdout_fd); + makeFdNonBlocking(stderr_fd); + + pfds[0].fd = stdout_fd; + pfds[0].events = POLLIN; + pfds[1].fd = stderr_fd; + pfds[1].events = POLLIN; + + if (stderr_reaction == ExternalCommandStderrReaction::NONE) + num_pfds = 1; + else + num_pfds = 2; } bool nextImpl() override @@ -113,19 +137,54 @@ public: while (!bytes_read) { - if (!pollFd(fd, timeout_milliseconds, POLLIN)) + pfds[0].revents = 0; + pfds[1].revents = 0; + size_t num_events = pollWithTimeout(pfds, num_pfds, timeout_milliseconds); + if (0 == num_events) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Pipe read timeout exceeded {} milliseconds", timeout_milliseconds); - ssize_t res = ::read(fd, internal_buffer.begin(), internal_buffer.size()); + bool has_stdout = pfds[0].revents > 0; + bool has_stderr = pfds[1].revents > 0; - if (-1 == res && errno != EINTR) - throwFromErrno("Cannot read from pipe", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); + if (has_stderr) + { + if (stderr_read_buf == nullptr) + stderr_read_buf.reset(new char[BUFFER_SIZE]); + ssize_t res = ::read(stderr_fd, stderr_read_buf.get(), BUFFER_SIZE); + if (res > 0) + { + std::string_view str(stderr_read_buf.get(), res); + if (stderr_reaction == ExternalCommandStderrReaction::THROW) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Executable generates stderr: {}", str); + else if (stderr_reaction == ExternalCommandStderrReaction::LOG) + LOG_WARNING( + &::Poco::Logger::get("TimeoutReadBufferFromFileDescriptor"), "Executable generates stderr: {}", str); + else if (stderr_reaction == ExternalCommandStderrReaction::LOG_FIRST) + { + res = std::min(ssize_t(stderr_result_buf.reserve()), res); + if (res > 0) + stderr_result_buf.insert(stderr_result_buf.end(), str.begin(), str.begin() + res); + } + else if (stderr_reaction == ExternalCommandStderrReaction::LOG_LAST) + { + stderr_result_buf.insert(stderr_result_buf.end(), str.begin(), str.begin() + res); + } + } + } - if (res == 0) - break; + if (has_stdout) + { + ssize_t res = ::read(stdout_fd, internal_buffer.begin(), internal_buffer.size()); - if (res > 0) - bytes_read += res; + if (-1 == res && errno != EINTR) + throwFromErrno("Cannot read from pipe", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); + + if (res == 0) + break; + + if (res > 0) + bytes_read += res; + } } if (bytes_read > 0) @@ -143,25 +202,46 @@ public: void reset() const { - makeFdBlocking(fd); + makeFdBlocking(stdout_fd); + makeFdBlocking(stderr_fd); } ~TimeoutReadBufferFromFileDescriptor() override { - tryMakeFdBlocking(fd); + tryMakeFdBlocking(stdout_fd); + tryMakeFdBlocking(stderr_fd); + + if (!stderr_result_buf.empty()) + { + String stderr_result; + stderr_result.reserve(stderr_result_buf.size()); + stderr_result.append(stderr_result_buf.begin(), stderr_result_buf.end()); + LOG_WARNING( + &::Poco::Logger::get("ShellCommandSource"), + "Executable generates stderr at the {}: {}", + stderr_reaction == ExternalCommandStderrReaction::LOG_FIRST ? "beginning" : "end", + stderr_result); + } } private: - int fd; + int stdout_fd; + int stderr_fd; size_t timeout_milliseconds; + ExternalCommandStderrReaction stderr_reaction; + + static constexpr size_t BUFFER_SIZE = 4_KiB; + pollfd pfds[2]; + size_t num_pfds; + std::unique_ptr stderr_read_buf; + boost::circular_buffer_space_optimized stderr_result_buf{BUFFER_SIZE}; }; class TimeoutWriteBufferFromFileDescriptor : public BufferWithOwnMemory { public: explicit TimeoutWriteBufferFromFileDescriptor(int fd_, size_t timeout_milliseconds_) - : fd(fd_) - , timeout_milliseconds(timeout_milliseconds_) + : fd(fd_), timeout_milliseconds(timeout_milliseconds_) { makeFdNonBlocking(fd); } @@ -248,6 +328,8 @@ namespace ContextPtr context_, const std::string & format_, size_t command_read_timeout_milliseconds, + ExternalCommandStderrReaction stderr_reaction, + bool check_exit_code_, const Block & sample_block_, std::unique_ptr && command_, std::vector && send_data_tasks = {}, @@ -260,13 +342,14 @@ namespace , sample_block(sample_block_) , command(std::move(command_)) , configuration(configuration_) - , timeout_command_out(command->out.getFD(), command_read_timeout_milliseconds) + , timeout_command_out(command->out.getFD(), command->err.getFD(), command_read_timeout_milliseconds, stderr_reaction) , command_holder(std::move(command_holder_)) , process_pool(process_pool_) + , check_exit_code(check_exit_code_) { for (auto && send_data_task : send_data_tasks) { - send_data_threads.emplace_back([task = std::move(send_data_task), this]() + send_data_threads.emplace_back([task = std::move(send_data_task), this]() mutable { try { @@ -276,6 +359,10 @@ namespace { std::lock_guard lock(send_data_lock); exception_during_send_data = std::current_exception(); + + /// task should be reset inside catch block or else it breaks d'tor + /// invariants such as in ~WriteBuffer. + task = {}; } }); } @@ -352,11 +439,7 @@ namespace } if (!executor->pull(chunk)) - { - if (configuration.check_exit_code) - command->wait(); return {}; - } current_read_rows += chunk.getNumRows(); } @@ -379,6 +462,21 @@ namespace if (thread.joinable()) thread.join(); + if (check_exit_code) + { + if (process_pool) + { + bool valid_command + = configuration.read_fixed_number_of_rows && current_read_rows >= configuration.number_of_rows_to_read; + + // We can only wait for pooled commands when they are invalid. + if (!valid_command) + command->wait(); + } + else + command->wait(); + } + rethrowExceptionDuringSendDataIfNeeded(); } @@ -413,6 +511,8 @@ namespace ShellCommandHolderPtr command_holder; std::shared_ptr process_pool; + bool check_exit_code = false; + QueryPipeline pipeline; std::unique_ptr executor; @@ -534,7 +634,8 @@ Pipe ShellCommandSourceCoordinator::createPipe( } int write_buffer_fd = write_buffer->getFD(); - auto timeout_write_buffer = std::make_shared(write_buffer_fd, configuration.command_write_timeout_milliseconds); + auto timeout_write_buffer + = std::make_shared(write_buffer_fd, configuration.command_write_timeout_milliseconds); input_pipes[i].resize(1); @@ -570,6 +671,8 @@ Pipe ShellCommandSourceCoordinator::createPipe( context, configuration.format, configuration.command_read_timeout_milliseconds, + configuration.stderr_reaction, + configuration.check_exit_code, std::move(sample_block), std::move(process), std::move(tasks), diff --git a/src/Processors/Sources/ShellCommandSource.h b/src/Processors/Sources/ShellCommandSource.h index 6dc6781cc4c..44bd725bbe2 100644 --- a/src/Processors/Sources/ShellCommandSource.h +++ b/src/Processors/Sources/ShellCommandSource.h @@ -4,6 +4,7 @@ #include +#include #include #include @@ -33,9 +34,6 @@ struct ShellCommandSourceConfiguration size_t number_of_rows_to_read = 0; /// Max block size size_t max_block_size = DEFAULT_BLOCK_SIZE; - /// Will throw if the command exited with - /// non-zero status code - size_t check_exit_code = false; }; class ShellCommandSourceCoordinator @@ -57,6 +55,15 @@ public: /// Timeout for writing data to command stdin size_t command_write_timeout_milliseconds = 10000; + /// Reaction when external command outputs data to its stderr. + ExternalCommandStderrReaction stderr_reaction = ExternalCommandStderrReaction::NONE; + + /// Will throw if the command exited with + /// non-zero status code. + /// NOTE: If executable pool is used, we cannot check exit code, + /// which makes this configuration no effect. + size_t check_exit_code = false; + /// Pool size valid only if executable_pool = true size_t pool_size = 16; diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 3d5a910f054..4e9f7b7601a 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -159,14 +159,14 @@ void AggregatingInOrderTransform::consume(Chunk chunk) if (group_by_key) params->aggregator.mergeOnBlockSmall(variants, key_begin, key_end, aggregate_columns_data, key_columns_raw); else - params->aggregator.mergeOnIntervalWithoutKeyImpl(variants, key_begin, key_end, aggregate_columns_data); + params->aggregator.mergeOnIntervalWithoutKey(variants, key_begin, key_end, aggregate_columns_data); } else { if (group_by_key) params->aggregator.executeOnBlockSmall(variants, key_begin, key_end, key_columns_raw, aggregate_function_instructions.data()); else - params->aggregator.executeOnIntervalWithoutKeyImpl(variants, key_begin, key_end, aggregate_function_instructions.data()); + params->aggregator.executeOnIntervalWithoutKey(variants, key_begin, key_end, aggregate_function_instructions.data()); } } diff --git a/src/Processors/Transforms/AggregatingPartialResultTransform.cpp b/src/Processors/Transforms/AggregatingPartialResultTransform.cpp new file mode 100644 index 00000000000..cf8ce72e096 --- /dev/null +++ b/src/Processors/Transforms/AggregatingPartialResultTransform.cpp @@ -0,0 +1,47 @@ +#include + +namespace DB +{ + +AggregatingPartialResultTransform::AggregatingPartialResultTransform( + const Block & input_header, const Block & output_header, AggregatingTransformPtr aggregating_transform_, + UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) + : PartialResultTransform(input_header, output_header, partial_result_limit_, partial_result_duration_ms_) + , aggregating_transform(std::move(aggregating_transform_)) + , transform_aggregator(input_header, aggregating_transform->params->params) + {} + +void AggregatingPartialResultTransform::transformPartialResult(Chunk & chunk) +{ + auto & params = aggregating_transform->params->params; + + bool no_more_keys = false; + AggregatedDataVariants variants; + ColumnRawPtrs key_columns(params.keys_size); + Aggregator::AggregateColumns aggregate_columns(params.aggregates_size); + + const UInt64 num_rows = chunk.getNumRows(); + transform_aggregator.executeOnBlock(chunk.detachColumns(), 0, num_rows, variants, key_columns, aggregate_columns, no_more_keys); + + auto transformed_block = transform_aggregator.convertToBlocks(variants, /*final*/ true, /*max_threads*/ 1).front(); + + chunk = convertToChunk(transformed_block); +} + +PartialResultTransform::ShaphotResult AggregatingPartialResultTransform::getRealProcessorSnapshot() +{ + std::lock_guard lock(aggregating_transform->snapshot_mutex); + if (aggregating_transform->is_generate_initialized) + return {{}, SnaphotStatus::Stopped}; + + if (aggregating_transform->variants.empty()) + return {{}, SnaphotStatus::NotReady}; + + auto & snapshot_aggregator = aggregating_transform->params->aggregator; + auto & snapshot_variants = aggregating_transform->many_data->variants; + auto block = snapshot_aggregator.prepareBlockAndFillWithoutKeySnapshot(*snapshot_variants.at(0)); + + return {convertToChunk(block), SnaphotStatus::Ready}; +} + +} diff --git a/src/Processors/Transforms/AggregatingPartialResultTransform.h b/src/Processors/Transforms/AggregatingPartialResultTransform.h new file mode 100644 index 00000000000..f7bac3a5394 --- /dev/null +++ b/src/Processors/Transforms/AggregatingPartialResultTransform.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class AggregatingPartialResultTransform : public PartialResultTransform +{ +public: + using AggregatingTransformPtr = std::shared_ptr; + + AggregatingPartialResultTransform( + const Block & input_header, const Block & output_header, AggregatingTransformPtr aggregating_transform_, + UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); + + String getName() const override { return "AggregatingPartialResultTransform"; } + + void transformPartialResult(Chunk & chunk) override; + ShaphotResult getRealProcessorSnapshot() override; + +private: + AggregatingTransformPtr aggregating_transform; + Aggregator transform_aggregator; +}; + +} diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 4bd000797a6..b4d2785bed2 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -657,6 +658,8 @@ void AggregatingTransform::consume(Chunk chunk) src_rows += num_rows; src_bytes += chunk.bytes(); + std::lock_guard lock(snapshot_mutex); + if (params->params.only_merge) { auto block = getInputs().front().getHeader().cloneWithColumns(chunk.detachColumns()); @@ -676,6 +679,7 @@ void AggregatingTransform::initGenerate() if (is_generate_initialized) return; + std::lock_guard lock(snapshot_mutex); is_generate_initialized = true; /// If there was no data, and we aggregate without keys, and we must return single row with the result of empty aggregation. @@ -806,4 +810,12 @@ void AggregatingTransform::initGenerate() } } +ProcessorPtr AggregatingTransform::getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) +{ + const auto & input_header = inputs.front().getHeader(); + const auto & output_header = outputs.front().getHeader(); + auto aggregating_processor = std::dynamic_pointer_cast(current_processor); + return std::make_shared(input_header, output_header, std::move(aggregating_processor), partial_result_limit, partial_result_duration_ms); +} + } diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 38baa4d0394..791cd12326f 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -170,9 +170,23 @@ public: void work() override; Processors expandPipeline() override; + PartialResultStatus getPartialResultProcessorSupportStatus() const override + { + /// Currently AggregatingPartialResultTransform support only single-thread aggregation without key. + + /// TODO: check that insert results from aggregator.prepareBlockAndFillWithoutKey return values without + /// changing of the aggregator state when aggregation with keys will be supported in AggregatingPartialResultTransform. + bool is_partial_result_supported = params->params.keys_size == 0 /// Aggregation without key. + && many_data->variants.size() == 1; /// Use only one stream for aggregation. + + return is_partial_result_supported ? PartialResultStatus::FullSupported : PartialResultStatus::NotSupported; + } + protected: void consume(Chunk chunk); + ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; + private: /// To read the data that was flushed into the temporary data file. Processors processors; @@ -212,6 +226,13 @@ private: bool is_consume_started = false; + friend class AggregatingPartialResultTransform; + /// The mutex protects variables that are used for creating a snapshot of the current processor. + /// The current implementation of AggregatingPartialResultTransform uses the 'is_generate_initialized' variable to check + /// whether the processor has started sending data through the main pipeline, and the corresponding partial result processor should stop creating snapshots. + /// Additionally, the mutex protects the 'params->aggregator' and 'many_data->variants' variables, which are used to get data from them for a snapshot. + std::mutex snapshot_mutex; + void initGenerate(); }; diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 646256d60c0..3dfb9fe178f 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,4 +1,3 @@ -#include #include #include diff --git a/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp b/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp index 59c4b9a6a87..83a75318d61 100644 --- a/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp +++ b/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -35,7 +36,11 @@ Columns getColumnsByIndices(const Chunk & chunk, const std::vector & ind Columns columns; const Columns & all_cols = chunk.getColumns(); for (const auto & index : indices) - columns.push_back(all_cols.at(index)); + { + auto col = recursiveRemoveSparse(all_cols.at(index)); + columns.push_back(std::move(col)); + } + return columns; } @@ -149,7 +154,7 @@ IProcessor::Status FilterBySetOnTheFlyTransform::prepare() LOG_DEBUG(log, "Finished {} by [{}]: consumed {} rows in total, {} rows bypassed, result {} rows, {:.2f}% filtered", Poco::toLower(getDescription()), fmt::join(column_names, ", "), stat.consumed_rows, stat.consumed_rows_before_set, stat.result_rows, - 100 - 100.0 * stat.result_rows / stat.consumed_rows); + stat.consumed_rows > 0 ? (100 - 100.0 * stat.result_rows / stat.consumed_rows) : 0); } else { diff --git a/src/Processors/Transforms/DistinctSortedChunkTransform.cpp b/src/Processors/Transforms/DistinctSortedChunkTransform.cpp index 28a3260d742..04741a6f231 100644 --- a/src/Processors/Transforms/DistinctSortedChunkTransform.cpp +++ b/src/Processors/Transforms/DistinctSortedChunkTransform.cpp @@ -5,6 +5,7 @@ namespace DB namespace ErrorCodes { + extern const int LOGICAL_ERROR; extern const int SET_SIZE_LIMIT_EXCEEDED; } @@ -126,9 +127,20 @@ bool DistinctSortedChunkTransform::isKey(const size_t key_pos, const size_t row_ bool DistinctSortedChunkTransform::isLatestKeyFromPrevChunk(const size_t row_pos) const { - for (size_t i = 0; i < sorted_columns.size(); ++i) + for (size_t i = 0, s = sorted_columns.size(); i < s; ++i) { - const int res = prev_chunk_latest_key[i]->compareAt(0, row_pos, *sorted_columns[i], sorted_columns_descr[i].nulls_direction); + const auto & sorted_column = *sorted_columns[i]; + /// temporary hardening due to suspious crashes in sqlancer tests + if (unlikely(sorted_column.size() <= row_pos)) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Unexpected size of a sorted column: size {}, row_pos {}, column position {}, type {}", + sorted_column.size(), + row_pos, + i, + sorted_column.getFamilyName()); + + const int res = prev_chunk_latest_key[i]->compareAt(0, row_pos, sorted_column, sorted_columns_descr[i].nulls_direction); if (res != 0) return false; } @@ -193,6 +205,8 @@ void DistinctSortedChunkTransform::transform(Chunk & chunk) if (unlikely(0 == chunk_rows)) return; + convertToFullIfSparse(chunk); + Columns input_columns = chunk.detachColumns(); /// split input columns into sorted and other("non-sorted") columns initChunkProcessing(input_columns); diff --git a/src/Processors/Transforms/ExceptionKeepingTransform.cpp b/src/Processors/Transforms/ExceptionKeepingTransform.cpp index 3c40c078225..b50f66b0240 100644 --- a/src/Processors/Transforms/ExceptionKeepingTransform.cpp +++ b/src/Processors/Transforms/ExceptionKeepingTransform.cpp @@ -2,7 +2,6 @@ #include #include #include -#include namespace DB { diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 0d3341b000c..78dace56e4e 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -25,6 +25,12 @@ void ExpressionTransform::transform(Chunk & chunk) chunk.setColumns(block.getColumns(), num_rows); } +ProcessorPtr ExpressionTransform::getPartialResultProcessor(const ProcessorPtr & /*current_processor*/, UInt64 /*partial_result_limit*/, UInt64 /*partial_result_duration_ms*/) +{ + const auto & header = getInputPort().getHeader(); + return std::make_shared(header, expression); +} + ConvertingTransform::ConvertingTransform(const Block & header_, ExpressionActionsPtr expression_) : ExceptionKeepingTransform(header_, ExpressionTransform::transformHeader(header_, expression_->getActionsDAG())) , expression(std::move(expression_)) diff --git a/src/Processors/Transforms/ExpressionTransform.h b/src/Processors/Transforms/ExpressionTransform.h index 791c7d7ba73..8250f25f0f8 100644 --- a/src/Processors/Transforms/ExpressionTransform.h +++ b/src/Processors/Transforms/ExpressionTransform.h @@ -26,10 +26,15 @@ public: static Block transformHeader(Block header, const ActionsDAG & expression); + PartialResultStatus getPartialResultProcessorSupportStatus() const override { return PartialResultStatus::FullSupported; } + protected: void transform(Chunk & chunk) override; + ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; + private: + ExpressionActionsPtr expression; }; diff --git a/src/Processors/Transforms/ExtractColumnsTransform.cpp b/src/Processors/Transforms/ExtractColumnsTransform.cpp new file mode 100644 index 00000000000..44bf5582290 --- /dev/null +++ b/src/Processors/Transforms/ExtractColumnsTransform.cpp @@ -0,0 +1,35 @@ +#include +#include + +namespace DB +{ + +ExtractColumnsTransform::ExtractColumnsTransform(const Block & header_, const NamesAndTypesList & requested_columns_) + : ISimpleTransform(header_, transformHeader(header_, requested_columns_), false), requested_columns(requested_columns_) +{ + +} + +Block ExtractColumnsTransform::transformHeader(Block header, const NamesAndTypesList & requested_columns_) +{ + ColumnsWithTypeAndName columns; + columns.reserve(requested_columns_.size()); + for (const auto & required_column : requested_columns_) + columns.emplace_back(getColumnFromBlock(header, required_column), required_column.type, required_column.name); + + return Block(std::move(columns)); +} + +void ExtractColumnsTransform::transform(Chunk & chunk) +{ + size_t num_rows = chunk.getNumRows(); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); + Columns columns; + columns.reserve(requested_columns.size()); + for (const auto & required_column : requested_columns) + columns.emplace_back(getColumnFromBlock(block, required_column)); + + chunk.setColumns(std::move(columns), num_rows); +} + +} diff --git a/src/Processors/Transforms/ExtractColumnsTransform.h b/src/Processors/Transforms/ExtractColumnsTransform.h new file mode 100644 index 00000000000..f8b3d803736 --- /dev/null +++ b/src/Processors/Transforms/ExtractColumnsTransform.h @@ -0,0 +1,26 @@ +#pragma once +#include + +namespace DB +{ + +/// Extracts required columns and subcolumns from the block. +class ExtractColumnsTransform final : public ISimpleTransform +{ +public: + ExtractColumnsTransform( + const Block & header_, + const NamesAndTypesList & requested_columns_); + + String getName() const override { return "ExtractColumnsTransform"; } + + static Block transformHeader(Block header, const NamesAndTypesList & requested_columns_); + +protected: + void transform(Chunk & chunk) override; + +private: + const NamesAndTypesList requested_columns; +}; + +} diff --git a/src/Processors/Transforms/FilterSortedStreamByRange.h b/src/Processors/Transforms/FilterSortedStreamByRange.h index e1141ebd299..e3d3f6f10ef 100644 --- a/src/Processors/Transforms/FilterSortedStreamByRange.h +++ b/src/Processors/Transforms/FilterSortedStreamByRange.h @@ -27,6 +27,9 @@ public: true) , filter_transform(header_, expression_, filter_column_name_, remove_filter_column_, on_totals_) { + assertBlocksHaveEqualStructure( + header_, getOutputPort().getHeader(), + "Expression for FilterSortedStreamByRange should not change header"); } String getName() const override { return "FilterSortedStreamByRange"; } diff --git a/src/Processors/Transforms/LimitByTransform.cpp b/src/Processors/Transforms/LimitByTransform.cpp index cb2804007bd..5e6d7dc816a 100644 --- a/src/Processors/Transforms/LimitByTransform.cpp +++ b/src/Processors/Transforms/LimitByTransform.cpp @@ -33,14 +33,11 @@ void LimitByTransform::transform(Chunk & chunk) for (UInt64 row = 0; row < num_rows; ++row) { - UInt128 key{}; SipHash hash; - for (auto position : key_positions) columns[position]->updateHashWithValue(row, hash); - hash.get128(key); - + const auto key = hash.get128(); auto count = keys_counts[key]++; if (count >= group_offset && (group_length > std::numeric_limits::max() - group_offset || count < group_length + group_offset)) diff --git a/src/Processors/Transforms/LimitPartialResultTransform.cpp b/src/Processors/Transforms/LimitPartialResultTransform.cpp new file mode 100644 index 00000000000..c9eaa9dc7dd --- /dev/null +++ b/src/Processors/Transforms/LimitPartialResultTransform.cpp @@ -0,0 +1,42 @@ +#include +#include + +namespace DB +{ + +LimitPartialResultTransform::LimitPartialResultTransform( + const Block & header, + UInt64 partial_result_limit_, + UInt64 partial_result_duration_ms_, + UInt64 limit_, + UInt64 offset_) + : PartialResultTransform(header, partial_result_limit_, partial_result_duration_ms_) + , limit(limit_) + , offset(offset_) + {} + +void LimitPartialResultTransform::transformPartialResult(Chunk & chunk) +{ + UInt64 num_rows = chunk.getNumRows(); + if (num_rows < offset || limit == 0) + { + chunk = {}; + return; + } + + UInt64 length = std::min(limit, num_rows - offset); + + /// Check if some rows should be removed + if (length < num_rows) + { + UInt64 num_columns = chunk.getNumColumns(); + auto columns = chunk.detachColumns(); + + for (UInt64 i = 0; i < num_columns; ++i) + columns[i] = columns[i]->cut(offset, length); + + chunk.setColumns(std::move(columns), length); + } +} + +} diff --git a/src/Processors/Transforms/LimitPartialResultTransform.h b/src/Processors/Transforms/LimitPartialResultTransform.h new file mode 100644 index 00000000000..3a0116b624d --- /dev/null +++ b/src/Processors/Transforms/LimitPartialResultTransform.h @@ -0,0 +1,36 @@ +#pragma once + +#include + +namespace DB +{ + +class LimitTransform; + +/// Currently support only single thread implementation with one input and one output ports +class LimitPartialResultTransform : public PartialResultTransform +{ +public: + using LimitTransformPtr = std::shared_ptr; + + LimitPartialResultTransform( + const Block & header, + UInt64 partial_result_limit_, + UInt64 partial_result_duration_ms_, + UInt64 limit_, + UInt64 offset_); + + String getName() const override { return "LimitPartialResultTransform"; } + + void transformPartialResult(Chunk & chunk) override; + /// LimitsTransform doesn't have a state which can be snapshoted + ShaphotResult getRealProcessorSnapshot() override { return {{}, SnaphotStatus::Stopped}; } + +private: + UInt64 limit; + UInt64 offset; + + LimitTransformPtr limit_transform; +}; + +} diff --git a/src/Processors/Transforms/LimitsCheckingTransform.cpp b/src/Processors/Transforms/LimitsCheckingTransform.cpp index 02d2fef808c..0557f3f291e 100644 --- a/src/Processors/Transforms/LimitsCheckingTransform.cpp +++ b/src/Processors/Transforms/LimitsCheckingTransform.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace DB diff --git a/src/Processors/Transforms/LimitsCheckingTransform.h b/src/Processors/Transforms/LimitsCheckingTransform.h index 2f96a17c17b..eabb988dab6 100644 --- a/src/Processors/Transforms/LimitsCheckingTransform.h +++ b/src/Processors/Transforms/LimitsCheckingTransform.h @@ -33,6 +33,8 @@ public: void setQuota(const std::shared_ptr & quota_) { quota = quota_; } + PartialResultStatus getPartialResultProcessorSupportStatus() const override { return PartialResultStatus::SkipSupported; } + protected: void transform(Chunk & chunk) override; diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 46abd5c891d..5ee8d677dec 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -52,12 +52,12 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, if (left_nullable && right_nullable) { - int res = left_column.compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint); + int res = left_nullable->compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint); if (res) return res; /// NULL != NULL case - if (left_column.isNullAt(lhs_pos)) + if (left_nullable->isNullAt(lhs_pos)) return null_direction_hint; return 0; @@ -68,7 +68,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, { if (const auto * left_nullable = checkAndGetColumn(left_column)) { - if (left_column.isNullAt(lhs_pos)) + if (left_nullable->isNullAt(lhs_pos)) return null_direction_hint; return left_nullable->getNestedColumn().compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint); } @@ -78,7 +78,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, { if (const auto * right_nullable = checkAndGetColumn(right_column)) { - if (right_column.isNullAt(rhs_pos)) + if (right_nullable->isNullAt(rhs_pos)) return -null_direction_hint; return left_column.compareAt(lhs_pos, rhs_pos, right_nullable->getNestedColumn(), null_direction_hint); } diff --git a/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp b/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp new file mode 100644 index 00000000000..e4a2af2cdd8 --- /dev/null +++ b/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp @@ -0,0 +1,48 @@ +#include + +namespace DB +{ + +MergeSortingPartialResultTransform::MergeSortingPartialResultTransform( + const Block & header, MergeSortingTransformPtr merge_sorting_transform_, + UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) + : PartialResultTransform(header, partial_result_limit_, partial_result_duration_ms_) + , merge_sorting_transform(std::move(merge_sorting_transform_)) + {} + +PartialResultTransform::ShaphotResult MergeSortingPartialResultTransform::getRealProcessorSnapshot() +{ + std::lock_guard lock(merge_sorting_transform->snapshot_mutex); + if (merge_sorting_transform->generated_prefix) + return {{}, SnaphotStatus::Stopped}; + + if (merge_sorting_transform->chunks.empty()) + return {{}, SnaphotStatus::NotReady}; + + /// Sort all input data + merge_sorting_transform->remerge(); + /// Add a copy of the first `partial_result_limit` rows to a generated_chunk + /// to send it later as a partial result in the next prepare stage of the current processor + auto generated_columns = merge_sorting_transform->chunks[0].cloneEmptyColumns(); + size_t total_rows = 0; + for (const auto & merged_chunk : merge_sorting_transform->chunks) + { + size_t rows = std::min(merged_chunk.getNumRows(), partial_result_limit - total_rows); + if (rows == 0) + break; + + for (size_t position = 0; position < generated_columns.size(); ++position) + { + auto column = merged_chunk.getColumns()[position]; + generated_columns[position]->insertRangeFrom(*column, 0, rows); + } + + total_rows += rows; + } + + auto partial_result = Chunk(std::move(generated_columns), total_rows, merge_sorting_transform->chunks[0].getChunkInfo()); + merge_sorting_transform->enrichChunkWithConstants(partial_result); + return {std::move(partial_result), SnaphotStatus::Ready}; +} + +} diff --git a/src/Processors/Transforms/MergeSortingPartialResultTransform.h b/src/Processors/Transforms/MergeSortingPartialResultTransform.h new file mode 100644 index 00000000000..781aa8e1265 --- /dev/null +++ b/src/Processors/Transforms/MergeSortingPartialResultTransform.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class MergeSortingPartialResultTransform : public PartialResultTransform +{ +public: + using MergeSortingTransformPtr = std::shared_ptr; + + MergeSortingPartialResultTransform( + const Block & header, MergeSortingTransformPtr merge_sorting_transform_, + UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); + + String getName() const override { return "MergeSortingPartialResultTransform"; } + + /// MergeSortingTransform always receives chunks in a sorted state, so transformation is not needed + void transformPartialResult(Chunk & /*chunk*/) override {} + ShaphotResult getRealProcessorSnapshot() override; + +private: + MergeSortingTransformPtr merge_sorting_transform; +}; + +} diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index de77711d129..e801e5e16d5 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -136,6 +137,8 @@ void MergeSortingTransform::consume(Chunk chunk) /// If there were only const columns in sort description, then there is no need to sort. /// Return the chunk as is. + std::lock_guard lock(snapshot_mutex); + if (description.empty()) { generated_chunk = std::move(chunk); @@ -213,6 +216,8 @@ void MergeSortingTransform::serialize() void MergeSortingTransform::generate() { + std::lock_guard lock(snapshot_mutex); + if (!generated_prefix) { size_t num_tmp_files = tmp_data ? tmp_data->getStreams().size() : 0; @@ -273,4 +278,11 @@ void MergeSortingTransform::remerge() sum_bytes_in_blocks = new_sum_bytes_in_blocks; } +ProcessorPtr MergeSortingTransform::getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) +{ + const auto & header = inputs.front().getHeader(); + auto merge_sorting_processor = std::dynamic_pointer_cast(current_processor); + return std::make_shared(header, std::move(merge_sorting_processor), partial_result_limit, partial_result_duration_ms); +} + } diff --git a/src/Processors/Transforms/MergeSortingTransform.h b/src/Processors/Transforms/MergeSortingTransform.h index e8c180b6903..67f098b4362 100644 --- a/src/Processors/Transforms/MergeSortingTransform.h +++ b/src/Processors/Transforms/MergeSortingTransform.h @@ -33,6 +33,8 @@ public: String getName() const override { return "MergeSortingTransform"; } + PartialResultStatus getPartialResultProcessorSupportStatus() const override { return PartialResultStatus::FullSupported; } + protected: void consume(Chunk chunk) override; void serialize() override; @@ -40,6 +42,8 @@ protected: Processors expandPipeline() override; + ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; + private: size_t max_bytes_before_remerge; double remerge_lowered_memory_bytes_ratio; @@ -59,6 +63,13 @@ private: void remerge(); ProcessorPtr external_merging_sorted; + + friend class MergeSortingPartialResultTransform; + /// The mutex protects variables that are used for creating a snapshot of the current processor. + /// The current implementation of MergeSortingPartialResultTransform uses the 'generated_prefix' variable to check + /// whether the processor has started sending data through the main pipeline, and the corresponding partial result processor should stop creating snapshots. + /// Additionally, the mutex protects the 'chunks' variable and all variables in the 'remerge' function, which is used to transition 'chunks' to a sorted state. + std::mutex snapshot_mutex; }; } diff --git a/src/Processors/Transforms/PartialResultTransform.cpp b/src/Processors/Transforms/PartialResultTransform.cpp new file mode 100644 index 00000000000..97ff79dee54 --- /dev/null +++ b/src/Processors/Transforms/PartialResultTransform.cpp @@ -0,0 +1,80 @@ +#include + +namespace DB +{ + + +PartialResultTransform::PartialResultTransform(const Block & header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) + : PartialResultTransform(header, header, partial_result_limit_, partial_result_duration_ms_) {} + +PartialResultTransform::PartialResultTransform(const Block & input_header, const Block & output_header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) + : IProcessor({input_header}, {output_header}) + , input(inputs.front()) + , output(outputs.front()) + , partial_result_limit(partial_result_limit_) + , partial_result_duration_ms(partial_result_duration_ms_) + , watch(CLOCK_MONOTONIC) + {} + +IProcessor::Status PartialResultTransform::prepare() +{ + if (output.isFinished()) + { + input.close(); + return Status::Finished; + } + + if (finished_getting_snapshots) + { + output.finish(); + return Status::Finished; + } + + if (!output.canPush()) + { + input.setNotNeeded(); + return Status::PortFull; + } + + /// If input data from previous partial result processor is finished then + /// PartialResultTransform ready to create snapshots and send them as a partial result + if (input.isFinished()) + { + if (partial_result.snapshot_status == SnaphotStatus::Ready) + { + partial_result.snapshot_status = SnaphotStatus::NotReady; + output.push(std::move(partial_result.chunk)); + return Status::PortFull; + } + + return Status::Ready; + } + + input.setNeeded(); + if (!input.hasData()) + return Status::NeedData; + + partial_result.chunk = input.pull(); + transformPartialResult(partial_result.chunk); + if (partial_result.chunk.getNumRows() > 0) + { + output.push(std::move(partial_result.chunk)); + return Status::PortFull; + } + + return Status::NeedData; +} + +void PartialResultTransform::work() +{ + if (partial_result_duration_ms < watch.elapsedMilliseconds()) + { + partial_result = getRealProcessorSnapshot(); + if (partial_result.snapshot_status == SnaphotStatus::Stopped) + finished_getting_snapshots = true; + + watch.restart(); + } +} + +} diff --git a/src/Processors/Transforms/PartialResultTransform.h b/src/Processors/Transforms/PartialResultTransform.h new file mode 100644 index 00000000000..4fe87638f38 --- /dev/null +++ b/src/Processors/Transforms/PartialResultTransform.h @@ -0,0 +1,57 @@ +#pragma once + +#include + +namespace DB +{ + +/// Processors of this type are used to construct an auxiliary pipeline with processors corresponding to those in the main pipeline. +/// These processors work in two modes: +/// 1) Creating a snapshot of the corresponding processor from the main pipeline once per partial_result_duration_ms (period in milliseconds), and then sending the snapshot through the partial result pipeline. +/// 2) Transforming small blocks of data in the same way as the original processor and sending the transformed data through the partial result pipeline. +/// All processors of this type rely on the invariant that a new block from the previous processor of the partial result pipeline overwrites information about the previous block of the same previous processor. +class PartialResultTransform : public IProcessor +{ +public: + PartialResultTransform(const Block & header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); + PartialResultTransform(const Block & input_header, const Block & output_header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); + + String getName() const override { return "PartialResultTransform"; } + + Status prepare() override; + void work() override; + + bool isPartialResultProcessor() const override { return true; } + +protected: + enum class SnaphotStatus + { + NotReady, // Waiting for data from the previous partial result processor or awaiting a timer before creating the snapshot. + Ready, // Current partial result processor has received a snapshot from the processor in the main pipeline. + Stopped, // The processor from the main pipeline has started sending data, and the pipeline for partial results should use data from the next processors of the main pipeline. + }; + + struct ShaphotResult + { + Chunk chunk; + SnaphotStatus snapshot_status; + }; + + InputPort & input; + OutputPort & output; + + UInt64 partial_result_limit; + UInt64 partial_result_duration_ms; + + ShaphotResult partial_result = {{}, SnaphotStatus::NotReady}; + + bool finished_getting_snapshots = false; + + virtual void transformPartialResult(Chunk & /*chunk*/) = 0; + virtual ShaphotResult getRealProcessorSnapshot() = 0; // { return {{}, SnaphotStatus::Stopped}; } + +private: + Stopwatch watch; +}; + +} diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index 33c2d870b76..3fc9a4e71db 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -159,7 +159,7 @@ void PartialSortingTransform::transform(Chunk & chunk) { MutableColumnPtr sort_description_threshold_column_updated = raw_block_columns[i]->cloneEmpty(); sort_description_threshold_column_updated->insertFrom(*raw_block_columns[i], min_row_to_compare); - sort_description_threshold_columns_updated[i] = std::move(sort_description_threshold_column_updated); + sort_description_threshold_columns_updated[i] = sort_description_threshold_column_updated->convertToFullColumnIfSparse(); } sort_description_threshold_columns = std::move(sort_description_threshold_columns_updated); diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index e89aec31655..7de9538e435 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index a785d52bf65..be76971ddcd 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -148,7 +148,7 @@ static int compareValuesWithOffsetFloat(const IColumn * _compared_column, const auto * reference_column = assert_cast( _reference_column); const auto offset = _offset.get(); - assert(offset >= 0); + chassert(offset >= 0); const auto compared_value_data = compared_column->getDataAt(compared_row); assert(compared_value_data.size == sizeof(typename ColumnType::ValueType)); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 7f7f9058f1b..34f02ba4ead 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -72,7 +72,7 @@ struct ViewsData std::atomic_bool has_exception = false; std::exception_ptr first_exception; - ViewsData(ThreadStatusesHolderPtr thread_status_holder_, ContextPtr context_, StorageID source_storage_id_, StorageMetadataPtr source_metadata_snapshot_ , StoragePtr source_storage_) + ViewsData(ThreadStatusesHolderPtr thread_status_holder_, ContextPtr context_, StorageID source_storage_id_, StorageMetadataPtr source_metadata_snapshot_, StoragePtr source_storage_) : thread_status_holder(std::move(thread_status_holder_)) , context(std::move(context_)) , source_storage_id(std::move(source_storage_id_)) @@ -281,7 +281,7 @@ Chain buildPushingToViewsChain( /// and switch back to the original thread_status. auto * original_thread = current_thread; SCOPE_EXIT({ current_thread = original_thread; }); - + current_thread = nullptr; std::unique_ptr view_thread_status_ptr = std::make_unique(/*check_current_thread_on_destruction=*/ false); /// Copy of a ThreadStatus should be internal. view_thread_status_ptr->setInternalThread(); @@ -432,6 +432,7 @@ Chain buildPushingToViewsChain( processors.emplace_back(std::move(finalizing_views)); result_chain = Chain(std::move(processors)); result_chain.setNumThreads(std::min(views_data->max_threads, max_parallel_streams)); + result_chain.setConcurrencyControl(settings.use_concurrency_control); } if (auto * live_view = dynamic_cast(storage.get())) diff --git a/src/Processors/examples/CMakeLists.txt b/src/Processors/examples/CMakeLists.txt index 5d43a0d7d08..0c8734aee3c 100644 --- a/src/Processors/examples/CMakeLists.txt +++ b/src/Processors/examples/CMakeLists.txt @@ -2,3 +2,9 @@ if (TARGET ch_contrib::hivemetastore) clickhouse_add_executable (comma_separated_streams comma_separated_streams.cpp) target_link_libraries (comma_separated_streams PRIVATE dbms) endif() + +if (USE_ORC) + clickhouse_add_executable (native_orc native_orc.cpp) + target_link_libraries (native_orc PRIVATE dbms) + target_include_directories (native_orc PRIVATE ${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include) +endif () diff --git a/src/Processors/examples/native_orc.cpp b/src/Processors/examples/native_orc.cpp new file mode 100644 index 00000000000..201e87b1f56 --- /dev/null +++ b/src/Processors/examples/native_orc.cpp @@ -0,0 +1,36 @@ +#include +#include +#include +#include + +using namespace DB; + +int main() +{ + /// Read schema from orc file + String path = "/path/to/orc/file"; + // String path = "/data1/clickhouse_official/data/user_files/bigolive_audience_stats_orc.orc"; + { + ReadBufferFromFile in(path); + NativeORCSchemaReader schema_reader(in, {}); + auto schema = schema_reader.readSchema(); + std::cout << "schema:" << schema.toString() << std::endl; + } + + /// Read schema from string with orc data + { + ReadBufferFromFile in(path); + + String content; + WriteBufferFromString out(content); + + copyData(in, out); + + content.resize(out.count()); + ReadBufferFromString in2(content); + NativeORCSchemaReader schema_reader(in2, {}); + auto schema = schema_reader.readSchema(); + std::cout << "schema:" << schema.toString() << std::endl; + } + return 0; +} diff --git a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp index 40718bd968a..ce5992c2548 100644 --- a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp +++ b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp @@ -29,7 +29,7 @@ TEST(Processors, PortsConnected) QueryStatusPtr element; PipelineExecutor executor(processors, element); - executor.execute(1); + executor.execute(1, false); } TEST(Processors, PortsNotConnected) @@ -55,7 +55,7 @@ TEST(Processors, PortsNotConnected) { QueryStatusPtr element; PipelineExecutor executor(processors, element); - executor.execute(1); + executor.execute(1, false); ASSERT_TRUE(false) << "Should have thrown."; } catch (DB::Exception & e) diff --git a/src/QueryPipeline/Chain.h b/src/QueryPipeline/Chain.h index 322e49d0d49..c093fc57ad3 100644 --- a/src/QueryPipeline/Chain.h +++ b/src/QueryPipeline/Chain.h @@ -29,6 +29,9 @@ public: size_t getNumThreads() const { return num_threads; } void setNumThreads(size_t num_threads_) { num_threads = num_threads_; } + bool getConcurrencyControl() const { return concurrency_control; } + void setConcurrencyControl(bool concurrency_control_) { concurrency_control = concurrency_control_; } + void addSource(ProcessorPtr processor); void addSink(ProcessorPtr processor); void appendChain(Chain chain); @@ -66,6 +69,7 @@ private: /// input port output port std::list processors; size_t num_threads = 0; + bool concurrency_control = false; }; } diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index 91ba01c479f..293d152ea65 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB { @@ -167,12 +168,9 @@ Pipe::Pipe(ProcessorPtr source) { checkSource(*source); - if (collected_processors) - collected_processors->emplace_back(source); - output_ports.push_back(&source->getOutputs().front()); header = output_ports.front()->getHeader(); - processors->emplace_back(std::move(source)); + addProcessor(std::move(source)); max_parallel_streams = 1; } @@ -319,6 +317,16 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow res.processors->insert(res.processors->end(), pipe.processors->begin(), pipe.processors->end()); res.output_ports.insert(res.output_ports.end(), pipe.output_ports.begin(), pipe.output_ports.end()); + if (res.isPartialResultActive() && pipe.isPartialResultActive()) + { + res.partial_result_ports.insert( + res.partial_result_ports.end(), + pipe.partial_result_ports.begin(), + pipe.partial_result_ports.end()); + } + else + res.dropPartialResult(); + res.max_parallel_streams += pipe.max_parallel_streams; if (pipe.totals_port) @@ -352,11 +360,11 @@ void Pipe::addSource(ProcessorPtr source) else assertBlocksHaveEqualStructure(header, source_header, "Pipes"); - if (collected_processors) - collected_processors->emplace_back(source); - output_ports.push_back(&source->getOutputs().front()); - processors->emplace_back(std::move(source)); + if (isPartialResultActive()) + partial_result_ports.push_back(nullptr); + + addProcessor(std::move(source)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } @@ -374,11 +382,9 @@ void Pipe::addTotalsSource(ProcessorPtr source) assertBlocksHaveEqualStructure(header, source_header, "Pipes"); - if (collected_processors) - collected_processors->emplace_back(source); - totals_port = &source->getOutputs().front(); - processors->emplace_back(std::move(source)); + + addProcessor(std::move(source)); } void Pipe::addExtremesSource(ProcessorPtr source) @@ -394,11 +400,20 @@ void Pipe::addExtremesSource(ProcessorPtr source) assertBlocksHaveEqualStructure(header, source_header, "Pipes"); - if (collected_processors) - collected_processors->emplace_back(source); - extremes_port = &source->getOutputs().front(); - processors->emplace_back(std::move(source)); + + addProcessor(std::move(source)); +} + +void Pipe::activatePartialResult(UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) +{ + if (is_partial_result_active) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Partial result for Pipe should be initialized only once"); + + is_partial_result_active = true; + partial_result_limit = partial_result_limit_; + partial_result_duration_ms = partial_result_duration_ms_; + partial_result_ports.assign(output_ports.size(), nullptr); } static void dropPort(OutputPort *& port, Processors & processors, Processors * collected_processors) @@ -426,6 +441,15 @@ void Pipe::dropExtremes() dropPort(extremes_port, *processors, collected_processors); } +void Pipe::dropPartialResult() +{ + for (auto & port : partial_result_ports) + dropPort(port, *processors, collected_processors); + + is_partial_result_active = false; + partial_result_ports.clear(); +} + void Pipe::addTransform(ProcessorPtr transform) { addTransform(std::move(transform), static_cast(nullptr), static_cast(nullptr)); @@ -456,6 +480,8 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort if (extremes) extremes_port = extremes; + addPartialResultTransform(transform); + size_t next_output = 0; for (auto & input : inputs) { @@ -506,10 +532,7 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort if (extremes_port) assertBlocksHaveEqualStructure(header, extremes_port->getHeader(), "Pipes"); - if (collected_processors) - collected_processors->emplace_back(transform); - - processors->emplace_back(std::move(transform)); + addProcessor(std::move(transform)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } @@ -546,6 +569,8 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes_port = nullptr; } + addPartialResultTransform(transform); + bool found_totals = false; bool found_extremes = false; @@ -595,14 +620,104 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * if (extremes_port) assertBlocksHaveEqualStructure(header, extremes_port->getHeader(), "Pipes"); - if (collected_processors) - collected_processors->emplace_back(transform); - - processors->emplace_back(std::move(transform)); + addProcessor(std::move(transform)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } +void Pipe::addPartialResultSimpleTransform(const ProcessorPtr & transform, size_t partial_result_port_id) +{ + if (isPartialResultActive()) + { + auto & partial_result_port = partial_result_ports[partial_result_port_id]; + auto partial_result_status = transform->getPartialResultProcessorSupportStatus(); + + if (partial_result_status == IProcessor::PartialResultStatus::NotSupported) + dropPort(partial_result_port, *processors, collected_processors); + + if (partial_result_status != IProcessor::PartialResultStatus::FullSupported) + return; + + auto partial_result_transform = IProcessor::getPartialResultProcessorPtr(transform, partial_result_limit, partial_result_duration_ms); + + connectPartialResultPort(partial_result_port, partial_result_transform->getInputs().front()); + + partial_result_port = &partial_result_transform->getOutputs().front(); + + addProcessor(std::move(partial_result_transform)); + } +} + +void Pipe::addPartialResultTransform(const ProcessorPtr & transform) +{ + if (isPartialResultActive()) + { + size_t new_outputs_size = transform->getOutputs().size(); + auto partial_result_status = transform->getPartialResultProcessorSupportStatus(); + + if (partial_result_status == IProcessor::PartialResultStatus::SkipSupported && new_outputs_size != partial_result_ports.size()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot skip transform {} in the partial result part of the Pipe because it has {} output ports, but the partial result part expects {} output ports", + transform->getName(), + new_outputs_size, + partial_result_ports.size()); + + if (partial_result_status == IProcessor::PartialResultStatus::NotSupported) + { + for (auto & partial_result_port : partial_result_ports) + dropPort(partial_result_port, *processors, collected_processors); + + partial_result_ports.assign(new_outputs_size, nullptr); + } + + if (partial_result_status != IProcessor::PartialResultStatus::FullSupported) + return; + + auto partial_result_transform = IProcessor::getPartialResultProcessorPtr(transform, partial_result_limit, partial_result_duration_ms); + auto & inputs = partial_result_transform->getInputs(); + + if (inputs.size() != partial_result_ports.size()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot add partial result transform {} to Pipe because it has {} input ports, but {} expected", + partial_result_transform->getName(), + inputs.size(), + partial_result_ports.size()); + + size_t next_port = 0; + for (auto & input : inputs) + { + connectPartialResultPort(partial_result_ports[next_port], input); + ++next_port; + } + + partial_result_ports.assign(new_outputs_size, nullptr); + + next_port = 0; + for (auto & new_partial_result_port : partial_result_transform->getOutputs()) + { + partial_result_ports[next_port] = &new_partial_result_port; + ++next_port; + } + + addProcessor(std::move(partial_result_transform)); + } +} + +void Pipe::connectPartialResultPort(OutputPort * partial_result_port, InputPort & partial_result_transform_port) +{ + if (partial_result_port == nullptr) + { + auto source = std::make_shared(getHeader()); + partial_result_port = &source->getPort(); + + addProcessor(std::move(source)); + } + + connect(*partial_result_port, partial_result_transform_port); +} + void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) { if (output_ports.empty()) @@ -610,7 +725,7 @@ void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) Block new_header; - auto add_transform = [&](OutputPort *& port, StreamType stream_type) + auto add_transform = [&](OutputPort *& port, size_t partial_result_port_id, StreamType stream_type) { if (!port) return; @@ -646,19 +761,22 @@ void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) { connect(*port, transform->getInputs().front()); port = &transform->getOutputs().front(); + if (stream_type == StreamType::Main) + addPartialResultSimpleTransform(transform, partial_result_port_id); - if (collected_processors) - collected_processors->emplace_back(transform); - - processors->emplace_back(std::move(transform)); + addProcessor(std::move(transform)); } }; + size_t partial_result_port_id = 0; for (auto & port : output_ports) - add_transform(port, StreamType::Main); + { + add_transform(port, partial_result_port_id, StreamType::Main); + ++partial_result_port_id; + } - add_transform(totals_port, StreamType::Totals); - add_transform(extremes_port, StreamType::Extremes); + add_transform(totals_port, 0, StreamType::Totals); + add_transform(extremes_port, 0, StreamType::Extremes); header = std::move(new_header); } @@ -679,6 +797,7 @@ void Pipe::addChains(std::vector chains) dropTotals(); dropExtremes(); + dropPartialResult(); size_t max_parallel_streams_for_chains = 0; @@ -697,18 +816,21 @@ void Pipe::addChains(std::vector chains) auto added_processors = Chain::getProcessors(std::move(chains[i])); for (auto & transform : added_processors) - { - if (collected_processors) - collected_processors->emplace_back(transform); - - processors->emplace_back(std::move(transform)); - } + addProcessor(std::move(transform)); } header = std::move(new_header); max_parallel_streams = std::max(max_parallel_streams, max_parallel_streams_for_chains); } +void Pipe::addProcessor(ProcessorPtr processor) +{ + if (collected_processors) + collected_processors->emplace_back(processor); + + processors->emplace_back(std::move(processor)); +} + void Pipe::resize(size_t num_streams, bool force, bool strict) { if (output_ports.empty()) @@ -769,6 +891,9 @@ void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter) add_transform(totals_port, StreamType::Totals); add_transform(extremes_port, StreamType::Extremes); + for (auto & port : partial_result_ports) + add_transform(port, StreamType::PartialResult); + output_ports.clear(); header.clear(); } @@ -778,6 +903,9 @@ void Pipe::transform(const Transformer & transformer, bool check_ports) if (output_ports.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot transform empty Pipe"); + /// TODO: Add functionality to work with partial result ports in transformer. + dropPartialResult(); + auto new_processors = transformer(output_ports); /// Create hash table with new processors. diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h index 09931e38578..70e933bcfd2 100644 --- a/src/QueryPipeline/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -48,6 +48,9 @@ public: OutputPort * getOutputPort(size_t pos) const { return output_ports[pos]; } OutputPort * getTotalsPort() const { return totals_port; } OutputPort * getExtremesPort() const { return extremes_port; } + OutputPort * getPartialResultPort(size_t pos) const { return partial_result_ports.empty() ? nullptr : partial_result_ports[pos]; } + + bool isPartialResultActive() { return is_partial_result_active; } /// Add processor to list, add it output ports to output_ports. /// Processor shouldn't have input ports, output ports shouldn't be connected. @@ -58,9 +61,13 @@ public: void addTotalsSource(ProcessorPtr source); void addExtremesSource(ProcessorPtr source); - /// Drop totals and extremes (create NullSink for them). + /// Activate sending partial result during main pipeline execution + void activatePartialResult(UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); + + /// Drop totals, extremes and partial result (create NullSink for them). void dropTotals(); void dropExtremes(); + void dropPartialResult(); /// Add processor to list. It should have size() input ports with compatible header. /// Output ports should have same headers. @@ -69,11 +76,16 @@ public: void addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort * extremes); void addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes); + void addPartialResultTransform(const ProcessorPtr & transform); + void addPartialResultSimpleTransform(const ProcessorPtr & transform, size_t partial_result_port_id); + void connectPartialResultPort(OutputPort * partial_result_port, InputPort & partial_result_transform_port); + enum class StreamType { Main = 0, /// Stream for query data. There may be several streams of this type. Totals, /// Stream for totals. No more than one. Extremes, /// Stream for extremes. No more than one. + PartialResult, /// Stream for partial result data. There may be several streams of this type. }; using ProcessorGetter = std::function; @@ -109,10 +121,17 @@ private: Block header; std::shared_ptr processors; - /// Output ports. Totals and extremes are allowed to be empty. + /// If the variable is true, then each time a processor is added pipe will try + /// to add processor which will send partial result from original processor + bool is_partial_result_active = false; + UInt64 partial_result_limit = 0; + UInt64 partial_result_duration_ms = 0; + + /// Output ports. Totals, extremes and partial results are allowed to be empty. OutputPortRawPtrs output_ports; OutputPort * totals_port = nullptr; OutputPort * extremes_port = nullptr; + OutputPortRawPtrs partial_result_ports; /// It is the max number of processors which can be executed in parallel for each step. /// Usually, it's the same as the number of output ports. @@ -128,6 +147,8 @@ private: static Pipe unitePipes(Pipes pipes, Processors * collected_processors, bool allow_empty_header); void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter); + void addProcessor(ProcessorPtr processor); + friend class QueryPipelineBuilder; friend class QueryPipeline; }; diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index 87a1408969e..9a836f68da1 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -65,7 +66,8 @@ static void checkPulling( Processors & processors, OutputPort * output, OutputPort * totals, - OutputPort * extremes) + OutputPort * extremes, + OutputPort * partial_result) { if (!output || output->isConnected()) throw Exception( @@ -82,9 +84,15 @@ static void checkPulling( ErrorCodes::LOGICAL_ERROR, "Cannot create pulling QueryPipeline because its extremes port is connected"); + if (partial_result && partial_result->isConnected()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create pulling QueryPipeline because its partial_result port is connected"); + bool found_output = false; bool found_totals = false; bool found_extremes = false; + bool found_partial_result = false; for (const auto & processor : processors) { for (const auto & in : processor->getInputs()) @@ -98,6 +106,8 @@ static void checkPulling( found_totals = true; else if (extremes && &out == extremes) found_extremes = true; + else if (partial_result && &out == partial_result) + found_partial_result = true; else checkOutput(out, processor); } @@ -115,6 +125,10 @@ static void checkPulling( throw Exception( ErrorCodes::LOGICAL_ERROR, "Cannot create pulling QueryPipeline because its extremes port does not belong to any processor"); + if (partial_result && !found_partial_result) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create pulling QueryPipeline because its partial result port does not belong to any processor"); } static void checkCompleted(Processors & processors) @@ -164,7 +178,7 @@ static void initRowsBeforeLimit(IOutputFormat * output_format) /// 5. Limit ... : Set counter on the input port of Limit /// Case 1. - if (typeid_cast(processor) && !limit_processor) + if ((typeid_cast(processor) || typeid_cast(processor)) && !limit_processor) { processors.emplace_back(processor); continue; @@ -199,7 +213,7 @@ static void initRowsBeforeLimit(IOutputFormat * output_format) } /// Case 4. - if (typeid_cast(processor)) + if (typeid_cast(processor) || typeid_cast(processor)) { processors.emplace_back(processor); limit_candidates[limit_processor].push_back(limit_input_port); @@ -317,17 +331,20 @@ QueryPipeline::QueryPipeline( std::shared_ptr processors_, OutputPort * output_, OutputPort * totals_, - OutputPort * extremes_) + OutputPort * extremes_, + OutputPort * partial_result_) : resources(std::move(resources_)) , processors(std::move(processors_)) , output(output_) , totals(totals_) , extremes(extremes_) + , partial_result(partial_result_) { - checkPulling(*processors, output, totals, extremes); + checkPulling(*processors, output, totals, extremes, partial_result); } QueryPipeline::QueryPipeline(Pipe pipe) + : partial_result_duration_ms(pipe.partial_result_duration_ms) { if (pipe.numOutputPorts() > 0) { @@ -335,9 +352,10 @@ QueryPipeline::QueryPipeline(Pipe pipe) output = pipe.getOutputPort(0); totals = pipe.getTotalsPort(); extremes = pipe.getExtremesPort(); + partial_result = pipe.getPartialResultPort(0); processors = std::move(pipe.processors); - checkPulling(*processors, output, totals, extremes); + checkPulling(*processors, output, totals, extremes, partial_result); } else { @@ -369,6 +387,7 @@ QueryPipeline::QueryPipeline(std::shared_ptr format) auto & format_main = format->getPort(IOutputFormat::PortKind::Main); auto & format_totals = format->getPort(IOutputFormat::PortKind::Totals); auto & format_extremes = format->getPort(IOutputFormat::PortKind::Extremes); + auto & format_partial_result = format->getPort(IOutputFormat::PortKind::PartialResult); if (!totals) { @@ -384,12 +403,21 @@ QueryPipeline::QueryPipeline(std::shared_ptr format) processors->emplace_back(std::move(source)); } + if (!partial_result) + { + auto source = std::make_shared(format_partial_result.getHeader()); + partial_result = &source->getPort(); + processors->emplace_back(std::move(source)); + } + connect(*totals, format_totals); connect(*extremes, format_extremes); + connect(*partial_result, format_partial_result); input = &format_main; totals = nullptr; extremes = nullptr; + partial_result = nullptr; output_format = format.get(); @@ -417,6 +445,7 @@ void QueryPipeline::complete(std::shared_ptr sink) drop(totals, *processors); drop(extremes, *processors); + drop(partial_result, *processors); connect(*output, sink->getPort()); processors->emplace_back(std::move(sink)); @@ -432,6 +461,7 @@ void QueryPipeline::complete(Chain chain) drop(totals, *processors); drop(extremes, *processors); + drop(partial_result, *processors); processors->reserve(processors->size() + chain.getProcessors().size() + 1); for (auto processor : chain.getProcessors()) @@ -457,6 +487,7 @@ void QueryPipeline::complete(Pipe pipe) pipe.resize(1); pipe.dropExtremes(); pipe.dropTotals(); + pipe.dropPartialResult(); connect(*pipe.getOutputPort(0), *input); input = nullptr; @@ -485,11 +516,13 @@ void QueryPipeline::complete(std::shared_ptr format) addMaterializing(output, *processors); addMaterializing(totals, *processors); addMaterializing(extremes, *processors); + addMaterializing(partial_result, *processors); } auto & format_main = format->getPort(IOutputFormat::PortKind::Main); auto & format_totals = format->getPort(IOutputFormat::PortKind::Totals); auto & format_extremes = format->getPort(IOutputFormat::PortKind::Extremes); + auto & format_partial_result = format->getPort(IOutputFormat::PortKind::PartialResult); if (!totals) { @@ -505,13 +538,22 @@ void QueryPipeline::complete(std::shared_ptr format) processors->emplace_back(std::move(source)); } + if (!partial_result) + { + auto source = std::make_shared(format_partial_result.getHeader()); + partial_result = &source->getPort(); + processors->emplace_back(std::move(source)); + } + connect(*output, format_main); connect(*totals, format_totals); connect(*extremes, format_extremes); + connect(*partial_result, format_partial_result); output = nullptr; totals = nullptr; extremes = nullptr; + partial_result = nullptr; initRowsBeforeLimit(format.get()); output_format = format.get(); @@ -683,6 +725,7 @@ void QueryPipeline::convertStructureTo(const ColumnsWithTypeAndName & columns) addExpression(output, actions, *processors); addExpression(totals, actions, *processors); addExpression(extremes, actions, *processors); + addExpression(partial_result, actions, *processors); } std::unique_ptr QueryPipeline::getReadProgressCallback() const diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index 0476b8e4bbf..20e58bc0f59 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -75,7 +75,8 @@ public: std::shared_ptr processors_, OutputPort * output_, OutputPort * totals_ = nullptr, - OutputPort * extremes_ = nullptr); + OutputPort * extremes_ = nullptr, + OutputPort * partial_result_ = nullptr); bool initialized() const { return !processors->empty(); } /// When initialized, exactly one of the following is true. @@ -100,6 +101,9 @@ public: size_t getNumThreads() const { return num_threads; } void setNumThreads(size_t num_threads_) { num_threads = num_threads_; } + bool getConcurrencyControl() const { return concurrency_control; } + void setConcurrencyControl(bool concurrency_control_) { concurrency_control = concurrency_control_; } + void setProcessListElement(QueryStatusPtr elem); void setProgressCallback(const ProgressCallback & callback); void setLimitsAndQuota(const StreamLocalLimits & limits, std::shared_ptr quota_); @@ -151,12 +155,17 @@ private: OutputPort * output = nullptr; OutputPort * totals = nullptr; OutputPort * extremes = nullptr; + OutputPort * partial_result = nullptr; QueryStatusPtr process_list_element; IOutputFormat * output_format = nullptr; size_t num_threads = 0; + bool concurrency_control = false; + + UInt64 partial_result_limit = 0; + UInt64 partial_result_duration_ms = 0; friend class PushingPipelineExecutor; friend class PullingPipelineExecutor; diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 553b18dd57b..90f5ee364f3 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -110,6 +110,16 @@ void QueryPipelineBuilder::init(QueryPipeline & pipeline) pipe.header = {}; } + if (pipeline.partial_result) + { + /// Set partial result ports only after activation because when activated, it is set to nullptr + pipe.activatePartialResult(pipeline.partial_result_limit, pipeline.partial_result_duration_ms); + pipe.partial_result_ports = {pipeline.partial_result}; + } + + if (!pipeline.partial_result) + pipe.dropPartialResult(); + pipe.totals_port = pipeline.totals; pipe.extremes_port = pipeline.extremes; pipe.max_parallel_streams = pipeline.num_threads; @@ -278,6 +288,7 @@ QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( /// Note: it may be > than settings.max_threads, so we should apply this limit again. bool will_limit_max_threads = true; size_t max_threads = 0; + bool concurrency_control = false; Pipes pipes; QueryPlanResourceHolder resources; @@ -297,6 +308,8 @@ QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( /// It may happen if max_distributed_connections > max_threads if (pipeline.max_threads > max_threads_limit) max_threads_limit = pipeline.max_threads; + + concurrency_control = pipeline.getConcurrencyControl(); } QueryPipelineBuilder pipeline; @@ -307,6 +320,7 @@ QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( { pipeline.setMaxThreads(max_threads); pipeline.limitMaxThreads(max_threads_limit); + pipeline.setConcurrencyControl(concurrency_control); } pipeline.setCollectedProcessors(nullptr); @@ -348,6 +362,10 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesYShaped left->checkInitializedAndNotCompleted(); right->checkInitializedAndNotCompleted(); + /// TODO: Support joining of partial results from different pipelines. + left->pipe.dropPartialResult(); + right->pipe.dropPartialResult(); + left->pipe.dropExtremes(); right->pipe.dropExtremes(); if (left->getNumStreams() != 1 || right->getNumStreams() != 1) @@ -360,6 +378,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesYShaped auto joining = std::make_shared(join, inputs, out_header, max_block_size); + /// TODO: Support partial results in merge pipelines after joining support above. return mergePipelines(std::move(left), std::move(right), std::move(joining), collected_processors); } @@ -380,6 +399,10 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe left->pipe.dropExtremes(); right->pipe.dropExtremes(); + /// TODO: Support joining of partial results from different pipelines. + left->pipe.dropPartialResult(); + right->pipe.dropPartialResult(); + left->pipe.collected_processors = collected_processors; /// Collect the NEW processors for the right pipeline. @@ -579,6 +602,7 @@ void QueryPipelineBuilder::addCreatingSetsTransform( const SizeLimits & limits, PreparedSetsCachePtr prepared_sets_cache) { + dropTotalsAndExtremes(); resize(1); auto transform = std::make_shared( @@ -589,12 +613,7 @@ void QueryPipelineBuilder::addCreatingSetsTransform( limits, std::move(prepared_sets_cache)); - InputPort * totals_port = nullptr; - - if (pipe.getTotalsPort()) - totals_port = transform->addTotalsPort(); - - pipe.addTransform(std::move(transform), totals_port, nullptr); + pipe.addTransform(std::move(transform)); } void QueryPipelineBuilder::addPipelineBefore(QueryPipelineBuilder pipeline) @@ -634,7 +653,7 @@ PipelineExecutorPtr QueryPipelineBuilder::execute() if (!isCompleted()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot execute pipeline because it is not completed"); - return std::make_shared(pipe.processors, process_list_element); + return std::make_shared(pipe.processors, process_list_element, pipe.partial_result_duration_ms); } Pipe QueryPipelineBuilder::getPipe(QueryPipelineBuilder pipeline, QueryPlanResourceHolder & resources) @@ -648,6 +667,7 @@ QueryPipeline QueryPipelineBuilder::getPipeline(QueryPipelineBuilder builder) QueryPipeline res(std::move(builder.pipe)); res.addResources(std::move(builder.resources)); res.setNumThreads(builder.getNumThreads()); + res.setConcurrencyControl(builder.getConcurrencyControl()); res.setProcessListElement(builder.process_list_element); res.setProgressCallback(builder.progress_callback); return res; diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index e744e3612ce..612e7b1652f 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -85,6 +85,12 @@ public: /// Pipeline will be completed after this transformation. void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter); + /// Activate building separate pipeline for sending partial result. + void activatePartialResult(UInt64 partial_result_limit, UInt64 partial_result_duration_ms) { pipe.activatePartialResult(partial_result_limit, partial_result_duration_ms); } + + /// Check if building of a pipeline for sending partial result active. + bool isPartialResultActive() { return pipe.isPartialResultActive(); } + /// Add totals which returns one chunk with single row with defaults. void addDefaultTotals(); @@ -183,6 +189,16 @@ public: max_threads = max_threads_; } + void setConcurrencyControl(bool concurrency_control_) + { + concurrency_control = concurrency_control_; + } + + bool getConcurrencyControl() + { + return concurrency_control; + } + void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); } void setQueryIdHolder(std::shared_ptr query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); } void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); } @@ -201,6 +217,8 @@ private: /// Sometimes, more streams are created then the number of threads for more optimal execution. size_t max_threads = 0; + bool concurrency_control = false; + QueryStatusPtr process_list_element; ProgressCallback progress_callback = nullptr; diff --git a/src/QueryPipeline/ReadProgressCallback.cpp b/src/QueryPipeline/ReadProgressCallback.cpp index 0f50d56f1a5..4d7c7aa0f2a 100644 --- a/src/QueryPipeline/ReadProgressCallback.cpp +++ b/src/QueryPipeline/ReadProgressCallback.cpp @@ -63,6 +63,18 @@ bool ReadProgressCallback::onProgress(uint64_t read_rows, uint64_t read_bytes, c process_list_elem->updateProgressIn(total_rows_progress); } + size_t bytes = 0; + if ((bytes = total_bytes.exchange(0)) != 0) + { + Progress total_bytes_progress = {0, 0, 0, bytes}; + + if (progress_callback) + progress_callback(total_bytes_progress); + + if (process_list_elem) + process_list_elem->updateProgressIn(total_bytes_progress); + } + Progress value {read_rows, read_bytes}; if (progress_callback) diff --git a/src/QueryPipeline/ReadProgressCallback.h b/src/QueryPipeline/ReadProgressCallback.h index 08f2f9fc99b..5dbf3344bdf 100644 --- a/src/QueryPipeline/ReadProgressCallback.h +++ b/src/QueryPipeline/ReadProgressCallback.h @@ -23,6 +23,7 @@ public: void setProcessListElement(QueryStatusPtr elem); void setProgressCallback(const ProgressCallback & callback) { progress_callback = callback; } void addTotalRowsApprox(size_t value) { total_rows_approx += value; } + void addTotalBytes(size_t value) { total_bytes += value; } /// Skip updating profile events. /// For merges in mutations it may need special logic, it's done inside ProgressCallback. @@ -37,6 +38,8 @@ private: /// The approximate total number of rows to read. For progress bar. std::atomic_size_t total_rows_approx = 0; + /// The total number of bytes to read. For progress bar. + std::atomic_size_t total_bytes = 0; std::mutex limits_and_quotas_mutex; Stopwatch total_stopwatch{CLOCK_MONOTONIC_COARSE}; /// Including waiting time diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index cd6f65b7b43..b834870c334 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -108,7 +108,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( , scalars(scalars_), external_tables(external_tables_), stage(stage_) , extension(extension_) { - create_connections = [this, pool, throttler, extension_](AsyncCallback async_callback)->std::unique_ptr + create_connections = [this, pool, throttler](AsyncCallback async_callback)->std::unique_ptr { const Settings & current_settings = context->getSettingsRef(); auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); @@ -121,26 +121,32 @@ RemoteQueryExecutor::RemoteQueryExecutor( table_to_check = std::make_shared(main_table.getQualifiedName()); auto res = std::make_unique(pool, context, timeouts, throttler, pool_mode, table_to_check, std::move(async_callback)); - if (extension_ && extension_->replica_info) - res->setReplicaInfo(*extension_->replica_info); + if (extension && extension->replica_info) + res->setReplicaInfo(*extension->replica_info); return res; } #endif std::vector connection_entries; + std::optional skip_unavailable_endpoints; + if (extension && extension->parallel_reading_coordinator) + skip_unavailable_endpoints = true; + if (main_table) { - auto try_results = pool->getManyChecked(timeouts, ¤t_settings, pool_mode, main_table.getQualifiedName(), std::move(async_callback)); + auto try_results = pool->getManyChecked(timeouts, ¤t_settings, pool_mode, main_table.getQualifiedName(), std::move(async_callback), skip_unavailable_endpoints); connection_entries.reserve(try_results.size()); for (auto & try_result : try_results) connection_entries.emplace_back(std::move(try_result.entry)); } else - connection_entries = pool->getMany(timeouts, ¤t_settings, pool_mode, std::move(async_callback)); + { + connection_entries = pool->getMany(timeouts, ¤t_settings, pool_mode, std::move(async_callback), skip_unavailable_endpoints); + } auto res = std::make_unique(std::move(connection_entries), current_settings, throttler); - if (extension_ && extension_->replica_info) - res->setReplicaInfo(*extension_->replica_info); + if (extension && extension->replica_info) + res->setReplicaInfo(*extension->replica_info); return res; }; } @@ -237,7 +243,7 @@ void RemoteQueryExecutor::sendQueryUnlocked(ClientInfo::QueryKind query_kind, As AsyncCallbackSetter async_callback_setter(connections.get(), async_callback); const auto & settings = context->getSettingsRef(); - if (needToSkipUnavailableShard()) + if (isReplicaUnavailable() || needToSkipUnavailableShard()) { /// To avoid sending the query again in the read(), we need to update the following flags: was_cancelled = true; @@ -363,7 +369,7 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::readAsync() read_context->resume(); - if (needToSkipUnavailableShard()) + if (isReplicaUnavailable() || needToSkipUnavailableShard()) { /// We need to tell the coordinator not to wait for this replica. /// But at this point it may lead to an incomplete result set, because @@ -438,9 +444,9 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::processPacket(Packet packet processMergeTreeReadTaskRequest(packet.request.value()); return ReadResult(ReadResult::Type::ParallelReplicasToken); - case Protocol::Server::MergeTreeAllRangesAnnounecement: + case Protocol::Server::MergeTreeAllRangesAnnouncement: chassert(packet.announcement.has_value()); - processMergeTreeInitialReadAnnounecement(packet.announcement.value()); + processMergeTreeInitialReadAnnouncement(packet.announcement.value()); return ReadResult(ReadResult::Type::ParallelReplicasToken); case Protocol::Server::ReadTaskRequest: @@ -562,7 +568,7 @@ void RemoteQueryExecutor::processMergeTreeReadTaskRequest(ParallelReadRequest re connections->sendMergeTreeReadTaskResponse(response); } -void RemoteQueryExecutor::processMergeTreeInitialReadAnnounecement(InitialAllRangesAnnouncement announcement) +void RemoteQueryExecutor::processMergeTreeInitialReadAnnouncement(InitialAllRangesAnnouncement announcement) { if (!extension || !extension->parallel_reading_coordinator) throw Exception(ErrorCodes::LOGICAL_ERROR, "Coordinator for parallel reading from replicas is not initialized"); @@ -591,44 +597,56 @@ void RemoteQueryExecutor::finish() /// Send the request to abort the execution of the request, if not already sent. tryCancel("Cancelling query because enough data has been read"); - /// If connections weren't created yet or query wasn't sent, nothing to do. - if (!connections || !sent_query) + /// If connections weren't created yet, query wasn't sent or was already finished, nothing to do. + if (!connections || !sent_query || finished) return; /// Get the remaining packets so that there is no out of sync in the connections to the replicas. - Packet packet = connections->drain(); - switch (packet.type) + /// We do this manually instead of calling drain() because we want to process Log, ProfileEvents and Progress + /// packets that had been sent before the connection is fully finished in order to have final statistics of what + /// was executed in the remote queries + while (connections->hasActiveConnections() && !finished) { - case Protocol::Server::EndOfStream: - finished = true; - break; + Packet packet = connections->receivePacket(); - case Protocol::Server::Log: - /// Pass logs from remote server to client - if (auto log_queue = CurrentThread::getInternalTextLogsQueue()) - log_queue->pushBlock(std::move(packet.block)); - break; + switch (packet.type) + { + case Protocol::Server::EndOfStream: + finished = true; + break; - case Protocol::Server::Exception: - got_exception_from_replica = true; - packet.exception->rethrow(); - break; + case Protocol::Server::Exception: + got_exception_from_replica = true; + packet.exception->rethrow(); + break; - case Protocol::Server::ProfileEvents: - /// Pass profile events from remote server to client - if (auto profile_queue = CurrentThread::getInternalProfileEventsQueue()) - if (!profile_queue->emplace(std::move(packet.block))) - throw Exception(ErrorCodes::SYSTEM_ERROR, "Could not push into profile queue"); - break; + case Protocol::Server::Log: + /// Pass logs from remote server to client + if (auto log_queue = CurrentThread::getInternalTextLogsQueue()) + log_queue->pushBlock(std::move(packet.block)); + break; - case Protocol::Server::TimezoneUpdate: - break; + case Protocol::Server::ProfileEvents: + /// Pass profile events from remote server to client + if (auto profile_queue = CurrentThread::getInternalProfileEventsQueue()) + if (!profile_queue->emplace(std::move(packet.block))) + throw Exception(ErrorCodes::SYSTEM_ERROR, "Could not push into profile queue"); + break; - default: - got_unknown_packet_from_replica = true; - throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from one of the following replicas: {}", - toString(packet.type), - connections->dumpAddresses()); + case Protocol::Server::ProfileInfo: + /// Use own (client-side) info about read bytes, it is more correct info than server-side one. + if (profile_info_callback) + profile_info_callback(packet.profile_info); + break; + + case Protocol::Server::Progress: + if (progress_callback) + progress_callback(packet.progress); + break; + + default: + break; + } } } diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index a843ce520de..8d834eb3f81 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -46,9 +46,9 @@ public: /// decide whether to deny or to accept that request. struct Extension { - std::shared_ptr task_iterator; - std::shared_ptr parallel_reading_coordinator; - std::optional replica_info; + std::shared_ptr task_iterator = nullptr; + std::shared_ptr parallel_reading_coordinator = nullptr; + std::optional replica_info = {}; }; /// Takes already set connection. @@ -186,6 +186,8 @@ public: bool needToSkipUnavailableShard() const { return context->getSettingsRef().skip_unavailable_shards && (0 == connections->size()); } + bool isReplicaUnavailable() const { return extension && extension->parallel_reading_coordinator && connections->size() == 0; } + private: RemoteQueryExecutor( const String & query_, const Block & header_, ContextPtr context_, @@ -283,7 +285,7 @@ private: void processReadTaskRequest(); void processMergeTreeReadTaskRequest(ParallelReadRequest request); - void processMergeTreeInitialReadAnnounecement(InitialAllRangesAnnouncement announcement); + void processMergeTreeInitialReadAnnouncement(InitialAllRangesAnnouncement announcement); /// Cancel query and restart it with info about duplicate UUIDs /// only for `allow_experimental_query_deduplication`. diff --git a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp index 06d4a8cef87..5e211bf036d 100644 --- a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp +++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp @@ -76,7 +76,7 @@ bool RemoteQueryExecutorReadContext::checkTimeout(bool blocking) epoll_event events[3]; events[0].data.fd = events[1].data.fd = events[2].data.fd = -1; - size_t num_events = epoll.getManyReady(3, events, blocking); + size_t num_events = epoll.getManyReady(3, events, blocking ? -1 : 0); bool is_socket_ready = false; diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 67d30012b0e..77a5369252e 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -833,7 +833,7 @@ namespace { settings_changes.push_back({key, value}); } - query_context->checkSettingsConstraints(settings_changes); + query_context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); query_context->applySettingsChanges(settings_changes); query_context->setCurrentQueryId(query_info.query_id()); @@ -1118,7 +1118,7 @@ namespace SettingsChanges settings_changes; for (const auto & [key, value] : external_table.settings()) settings_changes.push_back({key, value}); - external_table_context->checkSettingsConstraints(settings_changes); + external_table_context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); external_table_context->applySettingsChanges(settings_changes); } auto in = external_table_context->getInputFormat( @@ -1134,7 +1134,7 @@ namespace }); auto executor = cur_pipeline.execute(); - executor->execute(1); + executor->execute(1, false); } } diff --git a/src/Server/HTTP/HTTPServerResponse.h b/src/Server/HTTP/HTTPServerResponse.h index f5b7a70dc79..236a56e2323 100644 --- a/src/Server/HTTP/HTTPServerResponse.h +++ b/src/Server/HTTP/HTTPServerResponse.h @@ -5,9 +5,9 @@ #include #include -#include #include + namespace DB { diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index c8015cfd185..62f492fa0a9 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -3,7 +3,7 @@ #include #include #include - +#include namespace DB { @@ -29,28 +29,32 @@ void WriteBufferFromHTTPServerResponse::startSendHeaders() } } -void WriteBufferFromHTTPServerResponse::writeHeaderSummary() +void WriteBufferFromHTTPServerResponse::writeHeaderProgressImpl(const char * header_name) { if (headers_finished_sending) return; WriteBufferFromOwnString progress_string_writer; - accumulated_progress.writeJSON(progress_string_writer); + + writeCString("{", progress_string_writer); + accumulated_progress.writeJSON(progress_string_writer, false); + writeCString(",\"peak_memory_usage\":\"", progress_string_writer); + writeText(peak_memory_usage, progress_string_writer); + writeCString("\"}", progress_string_writer); if (response_header_ostr) - *response_header_ostr << "X-ClickHouse-Summary: " << progress_string_writer.str() << "\r\n" << std::flush; + *response_header_ostr << header_name << progress_string_writer.str() << "\r\n" << std::flush; +} + +void WriteBufferFromHTTPServerResponse::writeHeaderSummary() +{ + accumulated_progress.incrementElapsedNs(progress_watch.elapsed()); + writeHeaderProgressImpl("X-ClickHouse-Summary: "); } void WriteBufferFromHTTPServerResponse::writeHeaderProgress() { - if (headers_finished_sending) - return; - - WriteBufferFromOwnString progress_string_writer; - accumulated_progress.writeJSON(progress_string_writer); - - if (response_header_ostr) - *response_header_ostr << "X-ClickHouse-Progress: " << progress_string_writer.str() << "\r\n" << std::flush; + writeHeaderProgressImpl("X-ClickHouse-Progress: "); } void WriteBufferFromHTTPServerResponse::writeExceptionCode() @@ -149,7 +153,7 @@ WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse( } -void WriteBufferFromHTTPServerResponse::onProgress(const Progress & progress) +void WriteBufferFromHTTPServerResponse::onProgress(const Progress & progress, Int64 peak_memory_usage_) { std::lock_guard lock(mutex); @@ -158,9 +162,10 @@ void WriteBufferFromHTTPServerResponse::onProgress(const Progress & progress) return; accumulated_progress.incrementPiecewiseAtomically(progress); - + peak_memory_usage = peak_memory_usage_; if (send_progress && progress_watch.elapsed() >= send_progress_interval_ms * 1000000) { + accumulated_progress.incrementElapsedNs(progress_watch.elapsed()); progress_watch.restart(); /// Send all common headers before our special progress headers. diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h index ce677616755..b4c66357d3b 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h @@ -43,7 +43,7 @@ public: ~WriteBufferFromHTTPServerResponse() override; /// Writes progress in repeating HTTP headers. - void onProgress(const Progress & progress); + void onProgress(const Progress & progress, Int64 peak_memory_usage_); /// Turn compression on or off. /// The setting has any effect only if HTTP headers haven't been sent yet. @@ -89,6 +89,8 @@ private: /// but not finish them with \r\n, allowing to send more headers subsequently. void startSendHeaders(); + // Used for write the header X-ClickHouse-Progress / X-ClickHouse-Summary + void writeHeaderProgressImpl(const char * header_name); // Used for write the header X-ClickHouse-Progress void writeHeaderProgress(); // Used for write the header X-ClickHouse-Summary @@ -126,6 +128,8 @@ private: int exception_code = 0; + Int64 peak_memory_usage = 0; + std::mutex mutex; /// progress callback could be called from different threads. }; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 069670c84a5..f4deed8715e 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -59,6 +59,7 @@ namespace DB namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int CANNOT_PARSE_TEXT; extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; @@ -69,15 +70,19 @@ namespace ErrorCodes extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; extern const int CANNOT_PARSE_IPV4; extern const int CANNOT_PARSE_IPV6; + extern const int CANNOT_PARSE_UUID; extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; extern const int CANNOT_OPEN_FILE; extern const int CANNOT_COMPILE_REGEXP; - + extern const int DUPLICATE_COLUMN; + extern const int ILLEGAL_COLUMN; + extern const int THERE_IS_NO_COLUMN; extern const int UNKNOWN_ELEMENT_IN_AST; extern const int UNKNOWN_TYPE_OF_AST_NODE; extern const int TOO_DEEP_AST; extern const int TOO_BIG_AST; extern const int UNEXPECTED_AST_STRUCTURE; + extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; extern const int SYNTAX_ERROR; @@ -188,7 +193,9 @@ static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int excepti { return HTTPResponse::HTTP_FORBIDDEN; } - else if (exception_code == ErrorCodes::CANNOT_PARSE_TEXT || + else if (exception_code == ErrorCodes::BAD_ARGUMENTS || + exception_code == ErrorCodes::CANNOT_COMPILE_REGEXP || + exception_code == ErrorCodes::CANNOT_PARSE_TEXT || exception_code == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE || exception_code == ErrorCodes::CANNOT_PARSE_QUOTED_STRING || exception_code == ErrorCodes::CANNOT_PARSE_DATE || @@ -198,14 +205,19 @@ static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int excepti exception_code == ErrorCodes::CANNOT_PARSE_IPV4 || exception_code == ErrorCodes::CANNOT_PARSE_IPV6 || exception_code == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || + exception_code == ErrorCodes::CANNOT_PARSE_UUID || + exception_code == ErrorCodes::DUPLICATE_COLUMN || + exception_code == ErrorCodes::ILLEGAL_COLUMN || exception_code == ErrorCodes::UNKNOWN_ELEMENT_IN_AST || exception_code == ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE || + exception_code == ErrorCodes::THERE_IS_NO_COLUMN || exception_code == ErrorCodes::TOO_DEEP_AST || exception_code == ErrorCodes::TOO_BIG_AST || exception_code == ErrorCodes::UNEXPECTED_AST_STRUCTURE || exception_code == ErrorCodes::SYNTAX_ERROR || exception_code == ErrorCodes::INCORRECT_DATA || - exception_code == ErrorCodes::TYPE_MISMATCH) + exception_code == ErrorCodes::TYPE_MISMATCH || + exception_code == ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE) { return HTTPResponse::HTTP_BAD_REQUEST; } @@ -561,8 +573,7 @@ void HTTPHandler::processQuery( session->makeSessionContext(); } - auto client_info = session->getClientInfo(); - auto context = session->makeQueryContext(std::move(client_info)); + auto context = session->makeQueryContext(); /// This parameter is used to tune the behavior of output formats (such as Native) for compatibility. if (params.has("client_protocol_version")) @@ -596,7 +607,7 @@ void HTTPHandler::processQuery( size_t buffer_size_http = DBMS_DEFAULT_BUFFER_SIZE; size_t buffer_size_memory = (buffer_size_total > buffer_size_http) ? buffer_size_total : 0; - unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10); + unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); used_output.out = std::make_shared( response, @@ -638,7 +649,7 @@ void HTTPHandler::processQuery( throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected MemoryWriteBuffer"); auto rdbuf = prev_memory_buffer->tryGetReadBuffer(); - copyData(*rdbuf , *next_buffer); + copyData(*rdbuf, *next_buffer); return next_buffer; }; @@ -764,7 +775,7 @@ void HTTPHandler::processQuery( context->setDefaultFormat(default_format); /// For external data we also want settings - context->checkSettingsConstraints(settings_changes); + context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); context->applySettingsChanges(settings_changes); /// Set the query id supplied by the user, if any, and also update the OpenTelemetry fields. @@ -815,7 +826,11 @@ void HTTPHandler::processQuery( /// While still no data has been sent, we will report about query execution progress by sending HTTP headers. /// Note that we add it unconditionally so the progress is available for `X-ClickHouse-Summary` - append_callback([&used_output](const Progress & progress) { used_output.out->onProgress(progress); }); + append_callback([&used_output](const Progress & progress) + { + const auto& thread_group = CurrentThread::getGroup(); + used_output.out->onProgress(progress, thread_group->memory_tracker.getPeak()); + }); if (settings.readonly > 0 && settings.cancel_http_readonly_queries_on_client_close) { diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index 78e374ee9e0..1c911034da1 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -132,21 +132,25 @@ void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IS auto ping_handler = std::make_shared>(server, ping_response_expression); ping_handler->attachStrictPath("/ping"); ping_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/ping"); factory.addHandler(ping_handler); auto replicas_status_handler = std::make_shared>(server); replicas_status_handler->attachNonStrictPath("/replicas_status"); replicas_status_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/replicas_status"); factory.addHandler(replicas_status_handler); auto play_handler = std::make_shared>(server); play_handler->attachNonStrictPath("/play"); play_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/play"); factory.addHandler(play_handler); auto dashboard_handler = std::make_shared>(server); dashboard_handler->attachNonStrictPath("/dashboard"); dashboard_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/dashboard"); factory.addHandler(dashboard_handler); auto js_handler = std::make_shared>(server); diff --git a/src/Server/HTTPPathHints.cpp b/src/Server/HTTPPathHints.cpp new file mode 100644 index 00000000000..51ef3eabffe --- /dev/null +++ b/src/Server/HTTPPathHints.cpp @@ -0,0 +1,16 @@ +#include + +namespace DB +{ + +void HTTPPathHints::add(const String & http_path) +{ + http_paths.push_back(http_path); +} + +std::vector HTTPPathHints::getAllRegisteredNames() const +{ + return http_paths; +} + +} diff --git a/src/Server/HTTPPathHints.h b/src/Server/HTTPPathHints.h new file mode 100644 index 00000000000..fe6feebb727 --- /dev/null +++ b/src/Server/HTTPPathHints.h @@ -0,0 +1,22 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +class HTTPPathHints : public IHints<> +{ +public: + std::vector getAllRegisteredNames() const override; + void add(const String & http_path); + +private: + std::vector http_paths; +}; + +using HTTPPathHintsPtr = std::shared_ptr; + +} diff --git a/src/Server/HTTPRequestHandlerFactoryMain.cpp b/src/Server/HTTPRequestHandlerFactoryMain.cpp index 61a2909d30f..5481bcd5083 100644 --- a/src/Server/HTTPRequestHandlerFactoryMain.cpp +++ b/src/Server/HTTPRequestHandlerFactoryMain.cpp @@ -29,7 +29,7 @@ std::unique_ptr HTTPRequestHandlerFactoryMain::createRequest || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST) { - return std::unique_ptr(new NotFoundHandler); + return std::unique_ptr(new NotFoundHandler(hints.getHints(request.getURI()))); } return nullptr; diff --git a/src/Server/HTTPRequestHandlerFactoryMain.h b/src/Server/HTTPRequestHandlerFactoryMain.h index b0e57bd6b3b..07b278d831c 100644 --- a/src/Server/HTTPRequestHandlerFactoryMain.h +++ b/src/Server/HTTPRequestHandlerFactoryMain.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -15,11 +16,14 @@ public: void addHandler(HTTPRequestHandlerFactoryPtr child_factory) { child_factories.emplace_back(child_factory); } + void addPathToHints(const std::string & http_path) { hints.add(http_path); } + std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; private: Poco::Logger * log; std::string name; + HTTPPathHints hints; std::vector child_factories; }; diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index 9741592868a..5f6da208778 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -88,7 +88,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe Output used_output; const auto & config = server.config(); - unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10); + unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); used_output.out = std::make_shared( response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index e3edc281e83..58d227a5ae5 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -175,10 +175,13 @@ struct SocketInterruptablePollWrapper } while (rc < 0 && errno == POCO_EINTR); - if (rc >= 1 && poll_buf[0].revents & POLLIN) - socket_ready = true; - if (rc >= 2 && poll_buf[1].revents & POLLIN) - fd_ready = true; + if (rc >= 1) + { + if (poll_buf[0].revents & POLLIN) + socket_ready = true; + if (poll_buf[1].revents & POLLIN) + fd_ready = true; + } #endif } diff --git a/src/Server/NotFoundHandler.cpp b/src/Server/NotFoundHandler.cpp index 3181708b9b7..5b1db508551 100644 --- a/src/Server/NotFoundHandler.cpp +++ b/src/Server/NotFoundHandler.cpp @@ -10,7 +10,8 @@ void NotFoundHandler::handleRequest(HTTPServerRequest & request, HTTPServerRespo try { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_NOT_FOUND); - *response.send() << "There is no handle " << request.getURI() << "\n\n" + *response.send() << "There is no handle " << request.getURI() + << (!hints.empty() ? fmt::format(". Maybe you meant {}.", hints.front()) : "") << "\n\n" << "Use / or /ping for health checks.\n" << "Or /replicas_status for more sophisticated health checks.\n\n" << "Send queries from your program with POST method or GET /?query=...\n\n" diff --git a/src/Server/NotFoundHandler.h b/src/Server/NotFoundHandler.h index 749ac388c4d..1cbfcd57f8f 100644 --- a/src/Server/NotFoundHandler.h +++ b/src/Server/NotFoundHandler.h @@ -9,7 +9,10 @@ namespace DB class NotFoundHandler : public HTTPRequestHandler { public: + NotFoundHandler(std::vector hints_) : hints(std::move(hints_)) {} void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; +private: + std::vector hints; }; } diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index 79025624206..357c39e30d2 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -18,7 +18,7 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe try { const auto & config = server.config(); - unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10); + unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); setResponseDefaultHeaders(response, keep_alive_timeout); diff --git a/src/Server/ProtocolServerAdapter.cpp b/src/Server/ProtocolServerAdapter.cpp index 915b6265993..8d14a849894 100644 --- a/src/Server/ProtocolServerAdapter.cpp +++ b/src/Server/ProtocolServerAdapter.cpp @@ -1,7 +1,7 @@ #include #include -#if USE_GRPC && !defined(CLICKHOUSE_PROGRAM_STANDALONE_BUILD) +#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) #include #endif @@ -37,7 +37,7 @@ ProtocolServerAdapter::ProtocolServerAdapter( { } -#if USE_GRPC && !defined(CLICKHOUSE_PROGRAM_STANDALONE_BUILD) +#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) class ProtocolServerAdapter::GRPCServerAdapterImpl : public Impl { public: diff --git a/src/Server/ProtocolServerAdapter.h b/src/Server/ProtocolServerAdapter.h index e08b12e67f2..dd11c1dfc58 100644 --- a/src/Server/ProtocolServerAdapter.h +++ b/src/Server/ProtocolServerAdapter.h @@ -23,7 +23,7 @@ public: ProtocolServerAdapter & operator =(ProtocolServerAdapter && src) = default; ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr tcp_server_); -#if USE_GRPC && !defined(CLICKHOUSE_PROGRAM_STANDALONE_BUILD) +#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr grpc_server_); #endif diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index 8c0ab0c1a3b..ad54b24f31d 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -79,7 +79,7 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe } const auto & config = getContext()->getConfigRef(); - setResponseDefaultHeaders(response, config.getUInt("keep_alive_timeout", 10)); + setResponseDefaultHeaders(response, config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT)); if (!ok) { diff --git a/src/Server/ServerType.cpp b/src/Server/ServerType.cpp new file mode 100644 index 00000000000..fb052e7d6e6 --- /dev/null +++ b/src/Server/ServerType.cpp @@ -0,0 +1,153 @@ +#include + +#include +#include + +#include + + +namespace DB +{ + +namespace +{ + std::vector getTypeIndexToTypeName() + { + constexpr std::size_t types_size = magic_enum::enum_count(); + + std::vector type_index_to_type_name; + type_index_to_type_name.resize(types_size); + + auto entries = magic_enum::enum_entries(); + for (const auto & [entry, str] : entries) + { + auto str_copy = String(str); + std::replace(str_copy.begin(), str_copy.end(), '_', ' '); + type_index_to_type_name[static_cast(entry)] = std::move(str_copy); + } + + return type_index_to_type_name; + } +} + +const char * ServerType::serverTypeToString(ServerType::Type type) +{ + /** During parsing if SystemQuery is not parsed properly it is added to Expected variants as description check IParser.h. + * Description string must be statically allocated. + */ + static std::vector type_index_to_type_name = getTypeIndexToTypeName(); + const auto & type_name = type_index_to_type_name[static_cast(type)]; + return type_name.data(); +} + +bool ServerType::shouldStart(Type server_type, const std::string & server_custom_name) const +{ + auto is_type_default = [](Type current_type) + { + switch (current_type) + { + case Type::TCP: + case Type::TCP_WITH_PROXY: + case Type::TCP_SECURE: + case Type::HTTP: + case Type::HTTPS: + case Type::MYSQL: + case Type::GRPC: + case Type::POSTGRESQL: + case Type::PROMETHEUS: + case Type::INTERSERVER_HTTP: + case Type::INTERSERVER_HTTPS: + return true; + default: + return false; + } + }; + + if (exclude_types.contains(Type::QUERIES_ALL)) + return false; + + if (exclude_types.contains(Type::QUERIES_DEFAULT) && is_type_default(server_type)) + return false; + + if (exclude_types.contains(Type::QUERIES_CUSTOM) && server_type == Type::CUSTOM) + return false; + + if (exclude_types.contains(server_type)) + { + if (server_type != Type::CUSTOM) + return false; + + if (exclude_custom_names.contains(server_custom_name)) + return false; + } + + if (type == Type::QUERIES_ALL) + return true; + + if (type == Type::QUERIES_DEFAULT) + return is_type_default(server_type); + + if (type == Type::QUERIES_CUSTOM) + return server_type == Type::CUSTOM; + + if (type == Type::CUSTOM) + return server_type == type && server_custom_name == custom_name; + + return server_type == type; +} + +bool ServerType::shouldStop(const std::string & port_name) const +{ + Type port_type; + std::string port_custom_name; + + if (port_name == "http_port") + port_type = Type::HTTP; + + else if (port_name == "https_port") + port_type = Type::HTTPS; + + else if (port_name == "tcp_port") + port_type = Type::TCP; + + else if (port_name == "tcp_with_proxy_port") + port_type = Type::TCP_WITH_PROXY; + + else if (port_name == "tcp_port_secure") + port_type = Type::TCP_SECURE; + + else if (port_name == "mysql_port") + port_type = Type::MYSQL; + + else if (port_name == "postgresql_port") + port_type = Type::POSTGRESQL; + + else if (port_name == "grpc_port") + port_type = Type::GRPC; + + else if (port_name == "prometheus.port") + port_type = Type::PROMETHEUS; + + else if (port_name == "interserver_http_port") + port_type = Type::INTERSERVER_HTTP; + + else if (port_name == "interserver_https_port") + port_type = Type::INTERSERVER_HTTPS; + + else if (port_name.starts_with("protocols.") && port_name.ends_with(".port")) + { + port_type = Type::CUSTOM; + + constexpr size_t protocols_size = std::string_view("protocols.").size(); + constexpr size_t ports_size = std::string_view(".ports").size(); + + port_custom_name = port_name.substr(protocols_size, port_name.size() - protocols_size - ports_size + 1); + } + + else + return false; + + return shouldStart(port_type, port_custom_name); +} + +} diff --git a/src/Server/ServerType.h b/src/Server/ServerType.h new file mode 100644 index 00000000000..e3544fe6a28 --- /dev/null +++ b/src/Server/ServerType.h @@ -0,0 +1,60 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ServerType +{ +public: + enum Type + { + TCP, + TCP_WITH_PROXY, + TCP_SECURE, + HTTP, + HTTPS, + MYSQL, + GRPC, + POSTGRESQL, + PROMETHEUS, + CUSTOM, + INTERSERVER_HTTP, + INTERSERVER_HTTPS, + QUERIES_ALL, + QUERIES_DEFAULT, + QUERIES_CUSTOM, + END + }; + + using Types = std::unordered_set; + using CustomNames = std::unordered_set; + + ServerType() = default; + + explicit ServerType( + Type type_, + const std::string & custom_name_ = "", + const Types & exclude_types_ = {}, + const CustomNames exclude_custom_names_ = {}) + : type(type_), + custom_name(custom_name_), + exclude_types(exclude_types_), + exclude_custom_names(exclude_custom_names_) {} + + static const char * serverTypeToString(Type type); + + /// Checks whether provided in the arguments type should be started or stopped based on current server type. + bool shouldStart(Type server_type, const std::string & server_custom_name = "") const; + bool shouldStop(const std::string & port_name) const; + + Type type; + std::string custom_name; + + Types exclude_types; + CustomNames exclude_custom_names; +}; + +} diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp index 13a01ba8139..a33fbfbbf95 100644 --- a/src/Server/StaticRequestHandler.cpp +++ b/src/Server/StaticRequestHandler.cpp @@ -90,7 +90,7 @@ static inline void trySendExceptionToClient( void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { - auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", 10); + auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); const auto & out = responseWriteBuffer(request, response, keep_alive_timeout); try diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a747f06f1ce..c687a6064b4 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -83,6 +84,22 @@ namespace ProfileEvents extern const Event MergeTreeAllRangesAnnouncementsSentElapsedMicroseconds; } +namespace DB::ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int ATTEMPT_TO_READ_AFTER_EOF; + extern const int CLIENT_HAS_CONNECTED_TO_WRONG_PORT; + extern const int UNKNOWN_EXCEPTION; + extern const int UNKNOWN_PACKET_FROM_CLIENT; + extern const int POCO_EXCEPTION; + extern const int SOCKET_TIMEOUT; + extern const int UNEXPECTED_PACKET_FROM_CLIENT; + extern const int UNKNOWN_PROTOCOL; + extern const int AUTHENTICATION_FAILED; + extern const int QUERY_WAS_CANCELLED; + extern const int CLIENT_INFO_DOES_NOT_MATCH; +} + namespace { NameToNameMap convertToQueryParameters(const Settings & passed_params) @@ -98,26 +115,70 @@ NameToNameMap convertToQueryParameters(const Settings & passed_params) return query_parameters; } +// This function corrects the wrong client_name from the old client. +// Old clients 28.7 and some intermediate versions of 28.7 were sending different ClientInfo.client_name +// "ClickHouse client" was sent with the hello message. +// "ClickHouse" or "ClickHouse " was sent with the query message. +void correctQueryClientInfo(const ClientInfo & session_client_info, ClientInfo & client_info) +{ + if (client_info.getVersionNumber() <= VersionNumber(23, 8, 1) && + session_client_info.client_name == "ClickHouse client" && + (client_info.client_name == "ClickHouse" || client_info.client_name == "ClickHouse ")) + { + client_info.client_name = "ClickHouse client"; + } +} + +void validateClientInfo(const ClientInfo & session_client_info, const ClientInfo & client_info) +{ + // Secondary query may contain different client_info. + // In the case of select from distributed table or 'select * from remote' from non-tcp handler. Server sends the initial client_info data. + // + // Example 1: curl -q -s --max-time 60 -sS "http://127.0.0.1:8123/?" -d "SELECT 1 FROM remote('127.0.0.1', system.one)" + // HTTP handler initiates TCP connection with remote 127.0.0.1 (session on remote 127.0.0.1 use TCP interface) + // HTTP handler sends client_info with HTTP interface and HTTP data by TCP protocol in Protocol::Client::Query message. + // + // Example 2: select * from --host shard_1 // distributed table has 2 shards: shard_1, shard_2 + // shard_1 receives a message with 'ClickHouse client' client_name + // shard_1 initiates TCP connection with shard_2 with 'ClickHouse server' client_name. + // shard_1 sends 'ClickHouse client' client_name in Protocol::Client::Query message to shard_2. + if (client_info.query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) + return; + + if (session_client_info.interface != client_info.interface) + { + throw Exception( + DB::ErrorCodes::CLIENT_INFO_DOES_NOT_MATCH, + "Client info's interface does not match: {} not equal to {}", + toString(session_client_info.interface), + toString(client_info.interface)); + } + + if (session_client_info.interface == ClientInfo::Interface::TCP) + { + if (session_client_info.client_name != client_info.client_name) + throw Exception( + DB::ErrorCodes::CLIENT_INFO_DOES_NOT_MATCH, + "Client info's client_name does not match: {} not equal to {}", + session_client_info.client_name, + client_info.client_name); + + // TCP handler got patch version 0 always for backward compatibility. + if (!session_client_info.clientVersionEquals(client_info, false)) + throw Exception( + DB::ErrorCodes::CLIENT_INFO_DOES_NOT_MATCH, + "Client info's version does not match: {} not equal to {}", + session_client_info.getVersionStr(), + client_info.getVersionStr()); + + // os_user, quota_key, client_trace_context can be different. + } +} } namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int ATTEMPT_TO_READ_AFTER_EOF; - extern const int CLIENT_HAS_CONNECTED_TO_WRONG_PORT; - extern const int UNKNOWN_EXCEPTION; - extern const int UNKNOWN_PACKET_FROM_CLIENT; - extern const int POCO_EXCEPTION; - extern const int SOCKET_TIMEOUT; - extern const int UNEXPECTED_PACKET_FROM_CLIENT; - extern const int UNKNOWN_PROTOCOL; - extern const int AUTHENTICATION_FAILED; - extern const int QUERY_WAS_CANCELLED; -} - TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_) : Poco::Net::TCPServerConnection(socket_) , server(server_) @@ -184,14 +245,17 @@ void TCPHandler::runImpl() try { receiveHello(); + + /// In interserver mode queries are executed without a session context. + if (!is_interserver_mode) + session->makeSessionContext(); + sendHello(); if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM) receiveAddendum(); - if (!is_interserver_mode) /// In interserver mode queries are executed without a session context. + if (!is_interserver_mode) { - session->makeSessionContext(); - /// If session created, then settings in session context has been updated. /// So it's better to update the connection settings for flexibility. extractConnectionSettingsFromContext(session->sessionContext()); @@ -260,6 +324,17 @@ void TCPHandler::runImpl() std::unique_ptr exception; bool network_error = false; bool query_duration_already_logged = false; + auto log_query_duration = [this, &query_duration_already_logged]() + { + if (query_duration_already_logged) + return; + query_duration_already_logged = true; + auto elapsed_sec = state.watch.elapsedSeconds(); + /// We already logged more detailed info if we read some rows + if (elapsed_sec < 1.0 && state.progress.read_rows) + return; + LOG_DEBUG(log, "Processed in {} sec.", elapsed_sec); + }; try { @@ -396,7 +471,7 @@ void TCPHandler::runImpl() if (state.cancellation_status == CancellationStatus::FULLY_CANCELLED) return; - sendMergeTreeAllRangesAnnounecementAssumeLocked(announcement); + sendMergeTreeAllRangesAnnouncementAssumeLocked(announcement); ProfileEvents::increment(ProfileEvents::MergeTreeAllRangesAnnouncementsSent); ProfileEvents::increment(ProfileEvents::MergeTreeAllRangesAnnouncementsSentElapsedMicroseconds, watch.elapsedMicroseconds()); }); @@ -489,9 +564,7 @@ void TCPHandler::runImpl() /// Do it before sending end of stream, to have a chance to show log message in client. query_scope->logPeakMemoryUsage(); - - LOG_DEBUG(log, "Processed in {} sec.", state.watch.elapsedSeconds()); - query_duration_already_logged = true; + log_query_duration(); if (state.is_connection_closed) break; @@ -613,10 +686,7 @@ void TCPHandler::runImpl() LOG_WARNING(log, "Can't skip data packets after query failure."); } - if (!query_duration_already_logged) - { - LOG_DEBUG(log, "Processed in {} sec.", state.watch.elapsedSeconds()); - } + log_query_duration(); /// QueryState should be cleared before QueryScope, since otherwise /// the MemoryTracker will be wrong for possible deallocations. @@ -815,7 +885,8 @@ void TCPHandler::processOrdinaryQueryWithProcessors() std::unique_lock progress_lock(task_callback_mutex, std::defer_lock); { - PullingAsyncPipelineExecutor executor(pipeline); + bool has_partial_result_setting = query_context->getSettingsRef().partial_result_update_duration_ms.totalMilliseconds() > 0; + PullingAsyncPipelineExecutor executor(pipeline, has_partial_result_setting); CurrentMetrics::Increment query_thread_metric_increment{CurrentMetrics::QueryThread}; Block block; @@ -974,9 +1045,9 @@ void TCPHandler::sendReadTaskRequestAssumeLocked() } -void TCPHandler::sendMergeTreeAllRangesAnnounecementAssumeLocked(InitialAllRangesAnnouncement announcement) +void TCPHandler::sendMergeTreeAllRangesAnnouncementAssumeLocked(InitialAllRangesAnnouncement announcement) { - writeVarUInt(Protocol::Server::MergeTreeAllRangesAnnounecement, *out); + writeVarUInt(Protocol::Server::MergeTreeAllRangesAnnouncement, *out); announcement.serialize(*out); out->next(); } @@ -1181,7 +1252,6 @@ std::unique_ptr TCPHandler::makeSession() res->setClientName(client_name); res->setClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); res->setConnectionClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); - res->setQuotaClientKey(quota_key); res->setClientInterface(interface); return res; @@ -1274,11 +1344,10 @@ void TCPHandler::receiveHello() void TCPHandler::receiveAddendum() { if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_QUOTA_KEY) - { readStringBinary(quota_key, *in); - if (!is_interserver_mode) - session->setQuotaClientKey(quota_key); - } + + if (!is_interserver_mode) + session->setQuotaClientKey(quota_key); } @@ -1302,16 +1371,16 @@ void TCPHandler::receiveUnexpectedHello() void TCPHandler::sendHello() { writeVarUInt(Protocol::Server::Hello, *out); - writeStringBinary(DBMS_NAME, *out); - writeVarUInt(DBMS_VERSION_MAJOR, *out); - writeVarUInt(DBMS_VERSION_MINOR, *out); + writeStringBinary(VERSION_NAME, *out); + writeVarUInt(VERSION_MAJOR, *out); + writeVarUInt(VERSION_MINOR, *out); writeVarUInt(DBMS_TCP_PROTOCOL_VERSION, *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) writeStringBinary(DateLUT::instance().getTimeZone(), *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME) writeStringBinary(server_display_name, *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSION_PATCH) - writeVarUInt(DBMS_VERSION_PATCH, *out); + writeVarUInt(VERSION_PATCH, *out); if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES) { auto rules = server.context()->getAccessControl().getPasswordComplexityRules(); @@ -1477,8 +1546,15 @@ void TCPHandler::receiveQuery() /// Read client info. ClientInfo client_info = session->getClientInfo(); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_CLIENT_INFO) + { client_info.read(*in, client_tcp_protocol_version); + correctQueryClientInfo(session->getClientInfo(), client_info); + const auto & config_ref = Context::getGlobalContextInstance()->getServerSettings(); + if (config_ref.validate_tcp_client_information) + validateClientInfo(session->getClientInfo(), client_info); + } + /// Per query settings are also passed via TCP. /// We need to check them before applying due to they can violate the settings constraints. auto settings_format = (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS) @@ -1591,12 +1667,12 @@ void TCPHandler::receiveQuery() if (query_kind == ClientInfo::QueryKind::INITIAL_QUERY) { /// Throw an exception if the passed settings violate the constraints. - query_context->checkSettingsConstraints(settings_changes); + query_context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); } else { /// Quietly clamp to the constraints if it's not an initial query. - query_context->clampToSettingsConstraints(settings_changes); + query_context->clampToSettingsConstraints(settings_changes, SettingSource::QUERY); } query_context->applySettingsChanges(settings_changes); diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 235f634afec..cfb17ce6ae6 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -264,7 +264,7 @@ private: void sendEndOfStream(); void sendPartUUIDs(); void sendReadTaskRequestAssumeLocked(); - void sendMergeTreeAllRangesAnnounecementAssumeLocked(InitialAllRangesAnnouncement announcement); + void sendMergeTreeAllRangesAnnouncementAssumeLocked(InitialAllRangesAnnouncement announcement); void sendMergeTreeReadTaskRequestAssumeLocked(ParallelReadRequest request); void sendProfileInfo(const ProfileInfo & info); void sendTotals(const Block & totals); diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index 3997e0f19b6..fdba03baa73 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -6,10 +6,18 @@ #include #include -#include #include +#include + +#include "config.h" + +/// Embedded HTML pages +INCBIN(resource_play_html, SOURCE_DIR "/programs/server/play.html"); +INCBIN(resource_dashboard_html, SOURCE_DIR "/programs/server/dashboard.html"); +INCBIN(resource_uplot_js, SOURCE_DIR "/programs/server/js/uplot.js"); + namespace DB { @@ -22,7 +30,7 @@ WebUIRequestHandler::WebUIRequestHandler(IServer & server_) void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { - auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", 10); + auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); response.setContentType("text/html; charset=UTF-8"); @@ -34,13 +42,13 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR if (request.getURI().starts_with("/play")) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - *response.send() << getResource("play.html"); + *response.send() << std::string_view(reinterpret_cast(gresource_play_htmlData), gresource_play_htmlSize); } else if (request.getURI().starts_with("/dashboard")) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - std::string html(getResource("dashboard.html")); + std::string html(reinterpret_cast(gresource_dashboard_htmlData), gresource_dashboard_htmlSize); /// Replace a link to external JavaScript file to embedded file. /// This allows to open the HTML without running a server and to host it on server. @@ -55,7 +63,7 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR else if (request.getURI() == "/js/uplot.js") { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - *response.send() << getResource("js/uplot.js"); + *response.send() << std::string_view(reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize); } else { diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index a9247f9b898..f38fc1f3734 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1077,9 +1077,8 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { if (!command.if_exists) { - String exception_message = fmt::format("Wrong column. Cannot find column {} to modify", backQuote(column_name)); - all_columns.appendHintsMessage(exception_message, column_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Wrong column. Cannot find column {} to modify{}", + backQuote(column_name), all_columns.getHintsMessage(column_name)); } else continue; @@ -1351,9 +1350,14 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const validateColumnsDefaultsAndGetSampleBlock(default_expr_list, all_columns.getAll(), context); } -bool AlterCommands::hasSettingsAlterCommand() const +bool AlterCommands::hasNonReplicatedAlterCommand() const { - return std::any_of(begin(), end(), [](const AlterCommand & c) { return c.isSettingsAlter(); }); + return std::any_of(begin(), end(), [](const AlterCommand & c) { return c.isSettingsAlter() || c.isCommentAlter(); }); +} + +bool AlterCommands::areNonReplicatedAlterCommands() const +{ + return std::all_of(begin(), end(), [](const AlterCommand & c) { return c.isSettingsAlter() || c.isCommentAlter(); }); } bool AlterCommands::isSettingsAlter() const diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index 3e526dcc0bb..c06872f9757 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -196,8 +196,11 @@ public: /// Commands have to be prepared before apply. void apply(StorageInMemoryMetadata & metadata, ContextPtr context) const; - /// At least one command modify settings. - bool hasSettingsAlterCommand() const; + /// At least one command modify settings or comments. + bool hasNonReplicatedAlterCommand() const; + + /// All commands modify settings or comments. + bool areNonReplicatedAlterCommands() const; /// All commands modify settings only. bool isSettingsAlter() const; diff --git a/src/Storages/Cache/ExternalDataSourceCache.cpp b/src/Storages/Cache/ExternalDataSourceCache.cpp index 1fc68a2d774..c0277d0cbbb 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.cpp +++ b/src/Storages/Cache/ExternalDataSourceCache.cpp @@ -57,7 +57,7 @@ LocalFileHolder::~LocalFileHolder() { if (original_readbuffer) { - dynamic_cast(original_readbuffer.get())->seek(0, SEEK_SET); + assert_cast(original_readbuffer.get())->seek(0, SEEK_SET); file_cache_controller->value().startBackgroundDownload(std::move(original_readbuffer), *thread_pool); } } @@ -122,7 +122,7 @@ off_t RemoteReadBuffer::seek(off_t offset, int whence) { if (local_file_holder->original_readbuffer) { - auto ret = dynamic_cast(local_file_holder->original_readbuffer.get())->seek(offset, whence); + auto ret = assert_cast(local_file_holder->original_readbuffer.get())->seek(offset, whence); BufferBase::set( local_file_holder->original_readbuffer->buffer().begin(), local_file_holder->original_readbuffer->buffer().size(), @@ -147,7 +147,7 @@ off_t RemoteReadBuffer::getPosition() { if (local_file_holder->original_readbuffer) { - return dynamic_cast(local_file_holder->original_readbuffer.get())->getPosition(); + return assert_cast(local_file_holder->original_readbuffer.get())->getPosition(); } return local_file_holder->file_buffer->getPosition(); } diff --git a/src/Storages/Cache/SchemaCache.cpp b/src/Storages/Cache/SchemaCache.cpp index 22b6921f6c1..299dd292772 100644 --- a/src/Storages/Cache/SchemaCache.cpp +++ b/src/Storages/Cache/SchemaCache.cpp @@ -5,7 +5,11 @@ namespace ProfileEvents { extern const Event SchemaInferenceCacheHits; + extern const Event SchemaInferenceCacheSchemaHits; + extern const Event SchemaInferenceCacheNumRowsHits; extern const Event SchemaInferenceCacheMisses; + extern const Event SchemaInferenceCacheSchemaMisses; + extern const Event SchemaInferenceCacheNumRowsMisses; extern const Event SchemaInferenceCacheEvictions; extern const Event SchemaInferenceCacheInvalidations; } @@ -17,29 +21,41 @@ SchemaCache::SchemaCache(size_t max_elements_) : max_elements(max_elements_) { } -void SchemaCache::add(const Key & key, const ColumnsDescription & columns) +void SchemaCache::addColumns(const Key & key, const ColumnsDescription & columns) { std::lock_guard lock(mutex); - addUnlocked(key, columns); + addUnlocked(key, columns, std::nullopt); } -void SchemaCache::addMany(const Keys & keys, const ColumnsDescription & columns) +void SchemaCache::addManyColumns(const Keys & keys, const ColumnsDescription & columns) { std::lock_guard lock(mutex); for (const auto & key : keys) - addUnlocked(key, columns); + addUnlocked(key, columns, std::nullopt); } -void SchemaCache::addUnlocked(const Key & key, const ColumnsDescription & columns) +void SchemaCache::addNumRows(const DB::SchemaCache::Key & key, size_t num_rows) { - /// Do nothing if this key is already in cache; - if (data.contains(key)) + std::lock_guard lock(mutex); + addUnlocked(key, std::nullopt, num_rows); +} + +void SchemaCache::addUnlocked(const Key & key, const std::optional & columns, std::optional num_rows) +{ + /// Update columns/num_rows with new values if this key is already in cache. + if (auto it = data.find(key); it != data.end()) + { + if (columns) + it->second.schema_info.columns = columns; + if (num_rows) + it->second.schema_info.num_rows = num_rows; return; + } time_t now = std::time(nullptr); auto it = queue.insert(queue.end(), key); - data[key] = {SchemaInfo{columns, now}, it}; + data[key] = {SchemaInfo{columns, num_rows, now}, it}; checkOverflow(); } @@ -54,7 +70,35 @@ void SchemaCache::checkOverflow() ProfileEvents::increment(ProfileEvents::SchemaInferenceCacheEvictions); } -std::optional SchemaCache::tryGet(const Key & key, LastModificationTimeGetter get_last_mod_time) +std::optional SchemaCache::tryGetColumns(const DB::SchemaCache::Key & key, DB::SchemaCache::LastModificationTimeGetter get_last_mod_time) +{ + auto schema_info = tryGetImpl(key, get_last_mod_time); + if (!schema_info) + return std::nullopt; + + if (schema_info->columns) + ProfileEvents::increment(ProfileEvents::SchemaInferenceCacheSchemaHits); + else + ProfileEvents::increment(ProfileEvents::SchemaInferenceCacheSchemaMisses); + + return schema_info->columns; +} + +std::optional SchemaCache::tryGetNumRows(const DB::SchemaCache::Key & key, DB::SchemaCache::LastModificationTimeGetter get_last_mod_time) +{ + auto schema_info = tryGetImpl(key, get_last_mod_time); + if (!schema_info) + return std::nullopt; + + if (schema_info->num_rows) + ProfileEvents::increment(ProfileEvents::SchemaInferenceCacheNumRowsHits); + else + ProfileEvents::increment(ProfileEvents::SchemaInferenceCacheNumRowsMisses); + + return schema_info->num_rows; +} + +std::optional SchemaCache::tryGetImpl(const Key & key, LastModificationTimeGetter get_last_mod_time) { std::lock_guard lock(mutex); auto it = data.find(key); @@ -64,6 +108,8 @@ std::optional SchemaCache::tryGet(const Key & key, LastModif return std::nullopt; } + ProfileEvents::increment(ProfileEvents::SchemaInferenceCacheHits); + auto & schema_info = it->second.schema_info; auto & queue_iterator = it->second.iterator; if (get_last_mod_time) @@ -89,9 +135,7 @@ std::optional SchemaCache::tryGet(const Key & key, LastModif /// Move key to the end of queue. queue.splice(queue.end(), queue, queue_iterator); - - ProfileEvents::increment(ProfileEvents::SchemaInferenceCacheHits); - return schema_info.columns; + return schema_info; } void SchemaCache::clear() diff --git a/src/Storages/Cache/SchemaCache.h b/src/Storages/Cache/SchemaCache.h index deec32df49e..6f1ce917852 100644 --- a/src/Storages/Cache/SchemaCache.h +++ b/src/Storages/Cache/SchemaCache.h @@ -11,7 +11,8 @@ namespace DB const size_t DEFAULT_SCHEMA_CACHE_ELEMENTS = 4096; -/// Cache that stores columns description by some string key. It's used in schema inference. +/// Cache that stores columns description and/or number of rows by some string key. +/// It's used in schema inference and fast count from format file. /// It implements LRU semantic: after each access to a key in cache we move this key to /// the end of the queue, if we reached the limit of maximum elements in the cache we /// remove keys from the beginning of the queue. @@ -47,26 +48,31 @@ public: struct SchemaInfo { - ColumnsDescription columns; + std::optional columns; + std::optional num_rows; time_t registration_time; }; using LastModificationTimeGetter = std::function()>; - /// Add new key with a schema - void add(const Key & key, const ColumnsDescription & columns); + /// Add new key or update existing with a schema + void addColumns(const Key & key, const ColumnsDescription & columns); + /// Add/update many keys with the same schema (usually used for globs) + void addManyColumns(const Keys & keys, const ColumnsDescription & columns); - /// Add many keys with the same schema (usually used for globs) - void addMany(const Keys & keys, const ColumnsDescription & columns); + /// Add new key or update existing with number of rows + void addNumRows(const Key & key, size_t num_rows); - std::optional tryGet(const Key & key, LastModificationTimeGetter get_last_mod_time = {}); + std::optional tryGetColumns(const Key & key, LastModificationTimeGetter get_last_mod_time = {}); + std::optional tryGetNumRows(const Key & key, LastModificationTimeGetter get_last_mod_time = {}); void clear(); std::unordered_map getAll(); private: - void addUnlocked(const Key & key, const ColumnsDescription & columns); + void addUnlocked(const Key & key, const std::optional & columns, std::optional num_rows); + std::optional tryGetImpl(const Key & key, LastModificationTimeGetter get_last_mod_time); void checkOverflow(); using Queue = std::list; diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 045afd7e6e6..0c918bda5fd 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -232,9 +232,7 @@ void ColumnsDescription::remove(const String & column_name) auto range = getNameRange(columns, column_name); if (range.first == range.second) { - String exception_message = fmt::format("There is no column {} in table", column_name); - appendHintsMessage(exception_message, column_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table{}", column_name, getHintsMessage(column_name)); } for (auto list_it = range.first; list_it != range.second;) @@ -249,9 +247,8 @@ void ColumnsDescription::rename(const String & column_from, const String & colum auto it = columns.get<1>().find(column_from); if (it == columns.get<1>().end()) { - String exception_message = fmt::format("Cannot find column {} in ColumnsDescription", column_from); - appendHintsMessage(exception_message, column_from); - throw Exception::createDeprecated(exception_message, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find column {} in ColumnsDescription{}", + column_from, getHintsMessage(column_from)); } columns.get<1>().modify_key(it, [&column_to] (String & old_name) diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 365a999673e..ee0bb5efb66 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -98,7 +98,7 @@ struct ColumnDescription /// Description of multiple table columns (in CREATE TABLE for example). -class ColumnsDescription : public IHints<1, ColumnsDescription> +class ColumnsDescription : public IHints<> { public: ColumnsDescription() = default; @@ -160,9 +160,8 @@ public: auto it = columns.get<1>().find(column_name); if (it == columns.get<1>().end()) { - String exception_message = fmt::format("Cannot find column {} in ColumnsDescription", column_name); - appendHintsMessage(exception_message, column_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find column {} in ColumnsDescription{}", + column_name, getHintsMessage(column_name)); } removeSubcolumns(it->name); diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp index 309aa54909a..1172a40627d 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp @@ -281,7 +281,6 @@ struct DeltaLakeMetadataParser::Impl ArrowColumnToCHColumn column_reader( header, "Parquet", - format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns, /* null_as_default */true, /* case_insensitive_column_matching */false); diff --git a/src/Storages/DataLakes/S3MetadataReader.cpp b/src/Storages/DataLakes/S3MetadataReader.cpp index f62c440bc2f..ac472c190e4 100644 --- a/src/Storages/DataLakes/S3MetadataReader.cpp +++ b/src/Storages/DataLakes/S3MetadataReader.cpp @@ -57,8 +57,8 @@ std::vector S3DataLakeMetadataReadHelper::listFiles( { outcome = client->ListObjectsV2(request); if (!outcome.IsSuccess()) - throw Exception( - ErrorCodes::S3_ERROR, + throw S3Exception( + outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with key {}, S3 exception: {}, message: {}", quoteString(bucket), quoteString(base_configuration.url.key), diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index d8d9a0c9d1e..35199ec1f84 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -186,6 +186,15 @@ void DistributedAsyncInsertDirectoryQueue::shutdownAndDropAllData() fs::remove_all(path); } +void DistributedAsyncInsertDirectoryQueue::shutdownWithoutFlush() +{ + /// It's incompatible with should_batch_inserts + /// because processFilesWithBatching may push to the queue after shutdown + chassert(!should_batch_inserts); + pending_files.finish(); + task_handle->deactivate(); +} + void DistributedAsyncInsertDirectoryQueue::run() { @@ -401,7 +410,7 @@ try if (!current_file.empty()) processFile(current_file); - while (pending_files.tryPop(current_file)) + while (!pending_files.isFinished() && pending_files.tryPop(current_file)) processFile(current_file); } @@ -419,7 +428,7 @@ catch (...) throw; } -void DistributedAsyncInsertDirectoryQueue::processFile(const std::string & file_path) +void DistributedAsyncInsertDirectoryQueue::processFile(std::string & file_path) { OpenTelemetry::TracingContextHolderPtr thread_trace_context; @@ -459,7 +468,7 @@ void DistributedAsyncInsertDirectoryQueue::processFile(const std::string & file_ if (isDistributedSendBroken(e.code(), e.isRemoteException())) { markAsBroken(file_path); - current_file.clear(); + file_path.clear(); } throw; } @@ -473,8 +482,8 @@ void DistributedAsyncInsertDirectoryQueue::processFile(const std::string & file_ auto dir_sync_guard = getDirectorySyncGuard(relative_path); markAsSend(file_path); - current_file.clear(); LOG_TRACE(log, "Finished processing `{}` (took {} ms)", file_path, watch.elapsedMilliseconds()); + file_path.clear(); } struct DistributedAsyncInsertDirectoryQueue::BatchHeader diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h index 9a8a235e265..6378479761d 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h @@ -66,6 +66,8 @@ public: void shutdownAndDropAllData(); + void shutdownWithoutFlush(); + static std::shared_ptr createSourceFromFile(const String & file_name); /// For scheduling via DistributedSink. @@ -100,7 +102,7 @@ private: void addFile(const std::string & file_path); void initializeFilesFromDisk(); void processFiles(); - void processFile(const std::string & file_path); + void processFile(std::string & file_path); void processFilesWithBatching(); void markAsBroken(const std::string & file_path); diff --git a/src/Storages/Distributed/DistributedSettings.h b/src/Storages/Distributed/DistributedSettings.h index 86b77707dee..85e64cabfd7 100644 --- a/src/Storages/Distributed/DistributedSettings.h +++ b/src/Storages/Distributed/DistributedSettings.h @@ -26,6 +26,7 @@ class ASTStorage; M(UInt64, monitor_split_batch_on_failure, 0, "Default - distributed_directory_monitor_split_batch_on_failure", 0) \ M(Milliseconds, monitor_sleep_time_ms, 0, "Default - distributed_directory_monitor_sleep_time_ms", 0) \ M(Milliseconds, monitor_max_sleep_time_ms, 0, "Default - distributed_directory_monitor_max_sleep_time_ms", 0) \ + M(Bool, flush_on_detach, true, "Flush data to remote nodes on DETACH/DROP/server shutdown", 0) \ DECLARE_SETTINGS_TRAITS(DistributedSettingsTraits, LIST_OF_DISTRIBUTED_SETTINGS) diff --git a/src/Storages/ExecutableSettings.h b/src/Storages/ExecutableSettings.h index 9374dac461e..10dbae8ac9f 100644 --- a/src/Storages/ExecutableSettings.h +++ b/src/Storages/ExecutableSettings.h @@ -14,7 +14,9 @@ class ASTStorage; M(UInt64, max_command_execution_time, 10, "Max command execution time in seconds.", 0) \ M(UInt64, command_termination_timeout, 10, "Command termination timeout in seconds.", 0) \ M(UInt64, command_read_timeout, 10000, "Timeout for reading data from command stdout in milliseconds.", 0) \ - M(UInt64, command_write_timeout, 10000, "Timeout for writing data to command stdin in milliseconds.", 0) + M(UInt64, command_write_timeout, 10000, "Timeout for writing data to command stdin in milliseconds.", 0) \ + M(ExternalCommandStderrReaction, stderr_reaction, ExternalCommandStderrReaction::NONE, "Reaction when external command outputs data to its stderr.", 0) \ + M(Bool, check_exit_code, false, "Throw exception if the command exited with non-zero status code.", 0) \ DECLARE_SETTINGS_TRAITS(ExecutableSettingsTraits, LIST_OF_EXECUTABLE_SETTINGS) diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp index e503c5edaab..4454fdd9cbd 100644 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ b/src/Storages/ExternalDataSourceConfiguration.cpp @@ -130,7 +130,7 @@ std::optional getExternalDataSourceConfiguration( "Named collection of connection parameters is missing some " "of the parameters and dictionary parameters are not added"); } - return ExternalDataSourceInfo{ .configuration = configuration, .specific_args = {}, .settings_changes = config_settings }; + return ExternalDataSourceInfo{.configuration = configuration, .settings_changes = config_settings}; } return std::nullopt; } diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h index b825548debe..d4e737a7de1 100644 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ b/src/Storages/ExternalDataSourceConfiguration.h @@ -39,7 +39,6 @@ using StorageSpecificArgs = std::vector>; struct ExternalDataSourceInfo { ExternalDataSourceConfiguration configuration; - StorageSpecificArgs specific_args; SettingsChanges settings_changes; }; @@ -85,7 +84,6 @@ struct URLBasedDataSourceConfiguration struct URLBasedDataSourceConfig { URLBasedDataSourceConfiguration configuration; - StorageSpecificArgs specific_args; }; std::optional getURLBasedDataSourceConfiguration( diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index dae6f6a7ca9..5faccefd836 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -719,6 +719,7 @@ bool StorageFileLog::streamToViews() { block_io.pipeline.complete(std::move(input)); block_io.pipeline.setNumThreads(max_streams_number); + block_io.pipeline.setConcurrencyControl(new_context->getSettingsRef().use_concurrency_control); block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); CompletedPipelineExecutor executor(block_io.pipeline); executor.execute(); diff --git a/src/Storages/Freeze.cpp b/src/Storages/Freeze.cpp index d2e19551c92..b9642ec7907 100644 --- a/src/Storages/Freeze.cpp +++ b/src/Storages/Freeze.cpp @@ -232,6 +232,7 @@ PartitionCommandsResultInfo Unfreezer::unfreezePartitionsFromTableDirectory(Merg bool keep_shared = removeFreezedPart(disk, path, partition_directory, local_context, zookeeper); result.push_back(PartitionCommandResultInfo{ + .command_type = "UNFREEZE PART", .partition_id = partition_id, .part_name = partition_directory, .backup_path = disk->getPath() + table_directory.generic_string(), @@ -239,11 +240,11 @@ PartitionCommandsResultInfo Unfreezer::unfreezePartitionsFromTableDirectory(Merg .backup_name = backup_name, }); - LOG_DEBUG(log, "Unfreezed part by path {}, keep shared data: {}", disk->getPath() + path, keep_shared); + LOG_DEBUG(log, "Unfrozen part by path {}, keep shared data: {}", disk->getPath() + path, keep_shared); } } - LOG_DEBUG(log, "Unfreezed {} parts", result.size()); + LOG_DEBUG(log, "Unfrozen {} parts", result.size()); return result; } diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index 7b149518c0a..6695a5baa09 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -145,10 +145,7 @@ HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::A hdfsBuilderSetNameNodePort(builder.get(), port); } - if (config.has(std::string(CONFIG_PREFIX))) - { - builder.loadFromConfig(config, std::string(CONFIG_PREFIX)); - } + builder.loadFromConfig(config, std::string(CONFIG_PREFIX)); if (!user.empty()) { diff --git a/src/Storages/HDFS/HDFSCommon.h b/src/Storages/HDFS/HDFSCommon.h index 4588480602a..23f9e4d8f12 100644 --- a/src/Storages/HDFS/HDFSCommon.h +++ b/src/Storages/HDFS/HDFSCommon.h @@ -57,7 +57,23 @@ public: ~HDFSBuilderWrapper() { hdfsFreeBuilder(hdfs_builder); } HDFSBuilderWrapper(const HDFSBuilderWrapper &) = delete; - HDFSBuilderWrapper(HDFSBuilderWrapper &&) = default; + HDFSBuilderWrapper & operator=(const HDFSBuilderWrapper &) = delete; + + HDFSBuilderWrapper(HDFSBuilderWrapper && other) noexcept + { + *this = std::move(other); + } + + HDFSBuilderWrapper & operator=(HDFSBuilderWrapper && other) noexcept + { + std::swap(hdfs_builder, other.hdfs_builder); + config_stor = std::move(other.config_stor); + hadoop_kerberos_keytab = std::move(other.hadoop_kerberos_keytab); + hadoop_kerberos_principal = std::move(other.hadoop_kerberos_principal); + hadoop_security_kerberos_ticket_cache_path = std::move(other.hadoop_security_kerberos_ticket_cache_path); + need_kinit = std::move(other.need_kinit); + return *this; + } hdfsBuilder * get() { return hdfs_builder; } diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 483f0894cc4..ca84719b793 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -3,6 +3,7 @@ #if USE_HDFS #include #include +#include #include #include #include @@ -41,6 +42,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory file_size_) : BufferWithOwnMemory(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size) , hdfs_uri(hdfs_uri_) , hdfs_file_path(hdfs_file_path_) @@ -63,6 +66,22 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory(file_info->mSize); + hdfsFreeFileInfo(file_info, 1); + } } ~ReadBufferFromHDFSImpl() override @@ -72,10 +91,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemorymSize; + return file_size; } bool nextImpl() override @@ -95,6 +111,10 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory= file_size) + { + return false; + } ResourceGuard rlock(read_settings.resource_link, num_bytes_to_read); int bytes_read; @@ -156,10 +176,11 @@ ReadBufferFromHDFS::ReadBufferFromHDFS( const Poco::Util::AbstractConfiguration & config_, const ReadSettings & read_settings_, size_t read_until_position_, - bool use_external_buffer_) + bool use_external_buffer_, + std::optional file_size_) : ReadBufferFromFileBase(read_settings_.remote_fs_buffer_size, nullptr, 0) , impl(std::make_unique( - hdfs_uri_, hdfs_file_path_, config_, read_settings_, read_until_position_, use_external_buffer_)) + hdfs_uri_, hdfs_file_path_, config_, read_settings_, read_until_position_, use_external_buffer_, file_size_)) , use_external_buffer(use_external_buffer_) { } @@ -229,20 +250,6 @@ size_t ReadBufferFromHDFS::getFileOffsetOfBufferEnd() const return impl->getPosition(); } -IAsynchronousReader::Result ReadBufferFromHDFS::readInto(char * data, size_t size, size_t offset, size_t /*ignore*/) -{ - /// TODO: we don't need to copy if there is no pending data - seek(offset, SEEK_SET); - if (eof()) - return {0, 0, nullptr}; - - /// Make sure returned size no greater than available bytes in working_buffer - size_t count = std::min(size, available()); - memcpy(data, position(), count); - position() += count; - return {count, 0, nullptr}; -} - String ReadBufferFromHDFS::getFileName() const { return impl->hdfs_file_path; diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/HDFS/ReadBufferFromHDFS.h index 6aed3ddff26..d9671e7e445 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.h +++ b/src/Storages/HDFS/ReadBufferFromHDFS.h @@ -29,7 +29,8 @@ public: const Poco::Util::AbstractConfiguration & config_, const ReadSettings & read_settings_, size_t read_until_position_ = 0, - bool use_external_buffer = false); + bool use_external_buffer = false, + std::optional file_size = std::nullopt); ~ReadBufferFromHDFS() override; @@ -43,8 +44,6 @@ public: size_t getFileOffsetOfBufferEnd() const override; - IAsynchronousReader::Result readInto(char * data, size_t size, size_t offset, size_t ignore) override; - String getFileName() const override; private: diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 3d7e2b05f5a..7d144814803 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include @@ -28,9 +30,8 @@ #include #include #include -#include +#include #include -#include #include #include @@ -50,6 +51,11 @@ namespace fs = std::filesystem; +namespace ProfileEvents +{ + extern const Event EngineFileLikeReadFiles; +} + namespace DB { namespace ErrorCodes @@ -115,9 +121,9 @@ namespace { if (next_slash_after_glob_pos == std::string::npos) { - result.emplace_back( + result.emplace_back(StorageHDFS::PathWithInfo{ String(ls.file_info[i].mName), - StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}); + StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); } else { @@ -202,7 +208,7 @@ namespace throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); for (int i = 0; i < ls.length; ++i) { - const String full_path = String(ls.file_info[i].mName); + const String full_path = fs::path(ls.file_info[i].mName).lexically_normal(); const size_t last_slash = full_path.rfind('/'); const String file_name = full_path.substr(last_slash); const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; @@ -212,7 +218,7 @@ namespace { if (re2::RE2::FullMatch(file_name, matcher)) result.push_back(StorageHDFS::PathWithInfo{ - String(ls.file_info[i].mName), + String(full_path), StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); } else if (is_directory && looking_for_directory) @@ -247,7 +253,8 @@ namespace HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); HDFSFSPtr fs = createHDFSFS(builder.get()); - return LSWithRegexpMatching("/", fs, path_from_uri); + auto res = LSWithRegexpMatching("/", fs, path_from_uri); + return res; } } @@ -291,12 +298,75 @@ StorageHDFS::StorageHDFS( storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - auto default_virtuals = NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; + virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); +} - auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList(); - virtual_columns = getVirtualsForStorage(columns, default_virtuals); +namespace +{ + class ReadBufferIterator : public IReadBufferIterator, WithContext + { + public: + ReadBufferIterator( + const std::vector & paths_with_info_, + const String & uri_without_path_, + const String & format_, + const String & compression_method_, + const ContextPtr & context_) + : WithContext(context_) + , paths_with_info(paths_with_info_) + , uri_without_path(uri_without_path_) + , format(format_) + , compression_method(compression_method_) + { + } + + std::unique_ptr next() override + { + StorageHDFS::PathWithInfo path_with_info; + bool is_first = current_index == 0; + + while (true) + { + if (current_index == paths_with_info.size()) + { + if (is_first) + throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because all files are empty. " + "You must specify table structure manually", format); + return nullptr; + } + + path_with_info = paths_with_info[current_index++]; + if (getContext()->getSettingsRef().hdfs_skip_empty_files && path_with_info.info && path_with_info.info->size == 0) + continue; + + auto compression = chooseCompressionMethod(path_with_info.path, compression_method); + auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); + if (!getContext()->getSettingsRef().hdfs_skip_empty_files || !impl->eof()) + { + const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; + return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); + } + } + } + + void setNumRowsToLastFile(size_t num_rows) override + { + if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3) + return; + + String source = uri_without_path + paths_with_info[current_index - 1].path; + auto key = getKeyForSchemaCache(source, format, std::nullopt, getContext()); + StorageHDFS::getSchemaCache(getContext()).addNumRows(key, num_rows); + } + + private: + const std::vector & paths_with_info; + const String & uri_without_path; + const String & format; + const String & compression_method; + size_t current_index = 0; + }; } ColumnsDescription StorageHDFS::getTableStructureFromData( @@ -307,6 +377,7 @@ ColumnsDescription StorageHDFS::getTableStructureFromData( { const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); auto paths_with_info = getPathsList(path_from_uri, uri, ctx); + if (paths_with_info.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format)) throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, @@ -315,47 +386,21 @@ ColumnsDescription StorageHDFS::getTableStructureFromData( std::optional columns_from_cache; if (ctx->getSettingsRef().schema_inference_use_cache_for_hdfs) - columns_from_cache = tryGetColumnsFromCache(paths_with_info, path_from_uri, format, ctx); - - ReadBufferIterator read_buffer_iterator - = [&, my_uri_without_path = uri_without_path, it = paths_with_info.begin(), first = true]( - ColumnsDescription &) mutable -> std::unique_ptr - { - PathWithInfo path_with_info; - while (true) - { - if (it == paths_with_info.end()) - { - if (first) - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because all files are empty. " - "You must specify table structure manually", format); - return nullptr; - } - - path_with_info = *it++; - if (ctx->getSettingsRef().hdfs_skip_empty_files && path_with_info.info && path_with_info.info->size == 0) - continue; - - auto compression = chooseCompressionMethod(path_with_info.path, compression_method); - auto impl = std::make_unique(my_uri_without_path, path_with_info.path, ctx->getGlobalContext()->getConfigRef(), ctx->getReadSettings()); - if (!ctx->getSettingsRef().hdfs_skip_empty_files || !impl->eof()) - { - const Int64 zstd_window_log_max = ctx->getSettingsRef().zstd_window_log_max; - first = false; - return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); - } - } - }; + columns_from_cache = tryGetColumnsFromCache(paths_with_info, uri_without_path, format, ctx); ColumnsDescription columns; if (columns_from_cache) + { columns = *columns_from_cache; + } else + { + ReadBufferIterator read_buffer_iterator(paths_with_info, uri_without_path, format, compression_method, ctx); columns = readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, paths_with_info.size() > 1, ctx); + } if (ctx->getSettingsRef().schema_inference_use_cache_for_hdfs) - addColumnsToCache(paths_with_info, path_from_uri, columns, format, ctx); + addColumnsToCache(paths_with_info, uri_without_path, columns, format, ctx); return columns; } @@ -363,12 +408,31 @@ ColumnsDescription StorageHDFS::getTableStructureFromData( class HDFSSource::DisclosedGlobIterator::Impl { public: - Impl(ContextPtr context_, const String & uri) + Impl(const String & uri, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context) { const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - uris = getPathsList(path_from_uri, uri_without_path, context_); + uris = getPathsList(path_from_uri, uri_without_path, context); + ASTPtr filter_ast; + if (!uris.empty()) + filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, uris[0].path, context); + + if (filter_ast) + { + std::vector paths; + paths.reserve(uris.size()); + for (const auto & path_with_info : uris) + paths.push_back(path_with_info.path); + + VirtualColumnUtils::filterByPathOrFile(uris, paths, query, virtual_columns, context, filter_ast); + } + auto file_progress_callback = context->getFileProgressCallback(); + for (auto & elem : uris) + { elem.path = uri_without_path + elem.path; + if (file_progress_callback && elem.info) + file_progress_callback(FileProgress(0, elem.info->size)); + } uris_iter = uris.begin(); } @@ -389,49 +453,80 @@ private: std::vector::iterator uris_iter; }; -class HDFSSource::URISIterator::Impl +class HDFSSource::URISIterator::Impl : WithContext { public: - explicit Impl(const std::vector & uris_, ContextPtr context) + explicit Impl(const std::vector & uris_, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context_) + : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback()) { - auto path_and_uri = getPathFromUriAndUriWithoutPath(uris_[0]); - HDFSBuilderWrapper builder = createHDFSBuilder(path_and_uri.second + "/", context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - for (const auto & uri : uris_) + ASTPtr filter_ast; + if (!uris.empty()) + filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, getPathFromUriAndUriWithoutPath(uris[0]).first, getContext()); + + if (filter_ast) { - path_and_uri = getPathFromUriAndUriWithoutPath(uri); - if (!hdfsExists(fs.get(), path_and_uri.first.c_str())) - uris.push_back(uri); + std::vector paths; + paths.reserve(uris.size()); + for (const auto & uri : uris) + paths.push_back(getPathFromUriAndUriWithoutPath(uri).first); + + VirtualColumnUtils::filterByPathOrFile(uris, paths, query, virtual_columns, getContext(), filter_ast); + } + + if (!uris.empty()) + { + auto path_and_uri = getPathFromUriAndUriWithoutPath(uris[0]); + builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef()); + fs = createHDFSFS(builder.get()); } - uris_iter = uris.begin(); } StorageHDFS::PathWithInfo next() { - std::lock_guard lock(mutex); - if (uris_iter == uris.end()) - return {"", {}}; - auto key = *uris_iter; - ++uris_iter; - return {key, {}}; + String uri; + hdfsFileInfo * hdfs_info; + do + { + size_t current_index = index.fetch_add(1); + if (current_index >= uris.size()) + return {"", {}}; + + uri = uris[current_index]; + auto path_and_uri = getPathFromUriAndUriWithoutPath(uri); + hdfs_info = hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str()); + } + /// Skip non-existed files. + while (!hdfs_info && String(hdfsGetLastError()).find("FileNotFoundException") != std::string::npos); + + std::optional info; + if (hdfs_info) + { + info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; + if (file_progress_callback) + file_progress_callback(FileProgress(0, hdfs_info->mSize)); + } + + return {uri, info}; } private: - std::mutex mutex; + std::atomic_size_t index = 0; Strings uris; - Strings::iterator uris_iter; + HDFSBuilderWrapper builder; + HDFSFSPtr fs; + std::function file_progress_callback; }; -HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(ContextPtr context_, const String & uri) - : pimpl(std::make_shared(context_, uri)) {} +HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context) + : pimpl(std::make_shared(uri, query, virtual_columns, context)) {} StorageHDFS::PathWithInfo HDFSSource::DisclosedGlobIterator::next() { return pimpl->next(); } -HDFSSource::URISIterator::URISIterator(const std::vector & uris_, ContextPtr context) - : pimpl(std::make_shared(uris_, context)) +HDFSSource::URISIterator::URISIterator(const std::vector & uris_, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context) + : pimpl(std::make_shared(uris_, query, virtual_columns, context)) { } @@ -440,30 +535,25 @@ StorageHDFS::PathWithInfo HDFSSource::URISIterator::next() return pimpl->next(); } -Block HDFSSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - HDFSSource::HDFSSource( + const ReadFromFormatInfo & info, StorageHDFSPtr storage_, - const Block & block_for_format_, - const std::vector & requested_virtual_columns_, ContextPtr context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, - ColumnsDescription columns_description_) - : ISource(getHeader(block_for_format_, requested_virtual_columns_)) + bool need_only_count_, + const SelectQueryInfo & query_info_) + : ISource(info.source_header, false) , WithContext(context_) , storage(std::move(storage_)) - , block_for_format(block_for_format_) - , requested_virtual_columns(requested_virtual_columns_) + , block_for_format(info.format_header) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) , max_block_size(max_block_size_) , file_iterator(file_iterator_) - , columns_description(std::move(columns_description_)) + , columns_description(info.columns_description) + , need_only_count(need_only_count_) + , query_info(query_info_) { initialize(); } @@ -484,11 +574,25 @@ bool HDFSSource::initialize() current_path = path_with_info.path; const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path); + std::optional file_size; + if (!path_with_info.info) + { + auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef()); + auto fs = createHDFSFS(builder.get()); + auto * hdfs_info = hdfsGetPathInfo(fs.get(), path_from_uri.c_str()); + if (hdfs_info) + path_with_info.info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; + } + + if (path_with_info.info) + file_size = path_with_info.info->size; + auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); auto impl = std::make_unique( - uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); + uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size); if (!skip_empty_files || !impl->eof()) { + impl->setProgressCallback(getContext()); const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); break; @@ -497,27 +601,51 @@ bool HDFSSource::initialize() current_path = path_with_info.path; - if (path_with_info.info && path_with_info.info->size) - { - /// Adjust total_rows_approx_accumulated with new total size. - if (total_files_size) - total_rows_approx_accumulated = static_cast(std::ceil(static_cast(total_files_size + path_with_info.info->size) / total_files_size * total_rows_approx_accumulated)); - total_files_size += path_with_info.info->size; - } - - input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size); - QueryPipelineBuilder builder; - builder.init(Pipe(input_format)); - if (columns_description.hasDefaults()) + std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(path_with_info) : std::nullopt; + if (num_rows_from_cache) { - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, columns_description, *input_format, getContext()); - }); + /// We should not return single chunk with all number of rows, + /// because there is a chance that this chunk will be materialized later + /// (it can cause memory problems even with default values in columns or when virtual columns are requested). + /// Instead, we use special ConstChunkGenerator that will generate chunks + /// with max_block_size rows until total number of rows is reached. + auto source = std::make_shared(block_for_format, *num_rows_from_cache, max_block_size); + builder.init(Pipe(source)); } + else + { + std::optional max_parsing_threads; + if (need_only_count) + max_parsing_threads = 1; + + input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, std::nullopt, max_parsing_threads); + input_format->setQueryInfo(query_info, getContext()); + + if (need_only_count) + input_format->needOnlyCount(); + + builder.init(Pipe(input_format)); + if (columns_description.hasDefaults()) + { + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, columns_description, *input_format, getContext()); + }); + } + } + + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); + + ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); return true; } @@ -540,37 +668,21 @@ Chunk HDFSSource::generate() Chunk chunk; if (reader->pull(chunk)) { - Columns columns = chunk.getColumns(); UInt64 num_rows = chunk.getNumRows(); - - if (num_rows && total_files_size) - { - size_t chunk_size = input_format->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox(*this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } - - for (const auto & virtual_column : requested_virtual_columns) - { - if (virtual_column.name == "_path") - { - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, current_path); - columns.push_back(column->convertToFullColumnIfConst()); - } - else if (virtual_column.name == "_file") - { - size_t last_slash_pos = current_path.find_last_of('/'); - auto file_name = current_path.substr(last_slash_pos + 1); - - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, std::move(file_name)); - columns.push_back(column->convertToFullColumnIfConst()); - } - } - - return Chunk(std::move(columns), num_rows); + total_rows_in_file += num_rows; + size_t chunk_size = 0; + if (input_format) + chunk_size = input_format->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); + VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, current_path); + return chunk; } + if (input_format && getContext()->getSettingsRef().use_cache_for_count_from_files) + addNumRowsToCache(current_path, total_rows_in_file); + + total_rows_in_file = 0; + reader.reset(); pipeline.reset(); input_format.reset(); @@ -582,6 +694,24 @@ Chunk HDFSSource::generate() return {}; } +void HDFSSource::addNumRowsToCache(const DB::String & path, size_t num_rows) +{ + auto cache_key = getKeyForSchemaCache(path, storage->format_name, std::nullopt, getContext()); + StorageHDFS::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); +} + +std::optional HDFSSource::tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info) +{ + auto cache_key = getKeyForSchemaCache(path_with_info.path, storage->format_name, std::nullopt, getContext()); + auto get_last_mod_time = [&]() -> std::optional + { + if (path_with_info.info) + return path_with_info.info->last_mod_time; + return std::nullopt; + }; + + return StorageHDFS::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); +} class HDFSSink : public SinkToStorage { @@ -708,15 +838,15 @@ private: }; -bool StorageHDFS::supportsSubsetOfColumns() const +bool StorageHDFS::supportsSubsetOfColumns(const ContextPtr & context_) const { - return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context_); } Pipe StorageHDFS::read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, + SelectQueryInfo & query_info, ContextPtr context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, @@ -733,7 +863,7 @@ Pipe StorageHDFS::read( else if (is_path_with_globs) { /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(context_, uris[0]); + auto glob_iterator = std::make_shared(uris[0], query_info.query, virtual_columns, context_); iterator_wrapper = std::make_shared([glob_iterator]() { return glob_iterator->next(); @@ -741,57 +871,29 @@ Pipe StorageHDFS::read( } else { - auto uris_iterator = std::make_shared(uris, context_); + auto uris_iterator = std::make_shared(uris, query_info.query, virtual_columns, context_); iterator_wrapper = std::make_shared([uris_iterator]() { return uris_iterator->next(); }); } - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_), getVirtuals()); + bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) + && context_->getSettingsRef().optimize_count_from_files; Pipes pipes; auto this_ptr = std::static_pointer_cast(shared_from_this()); for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( + read_from_format_info, this_ptr, - block_for_format, - requested_virtual_columns, context_, max_block_size, iterator_wrapper, - columns_description)); + need_only_count, + query_info)); } return Pipe::unitePipes(std::move(pipes)); } @@ -946,12 +1048,19 @@ std::optional StorageHDFS::tryGetColumnsFromCache( { if (path_with_info.info) return path_with_info.info->last_mod_time; + + auto builder = createHDFSBuilder(uri_without_path + "/", ctx->getGlobalContext()->getConfigRef()); + auto fs = createHDFSFS(builder.get()); + auto * hdfs_info = hdfsGetPathInfo(fs.get(), path_with_info.path.c_str()); + if (hdfs_info) + return hdfs_info->mLastMod; + return std::nullopt; }; - String url = fs::path(uri_without_path) / path_with_info.path; + String url = uri_without_path + path_with_info.path; auto cache_key = getKeyForSchemaCache(url, format_name, {}, ctx); - auto columns = schema_cache.tryGet(cache_key, get_last_mod_time); + auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); if (columns) return columns; } @@ -969,9 +1078,9 @@ void StorageHDFS::addColumnsToCache( auto & schema_cache = getSchemaCache(ctx); Strings sources; sources.reserve(paths_with_info.size()); - std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const PathWithInfo & path_with_info){ return fs::path(uri_without_path) / path_with_info.path; }); + std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const PathWithInfo & path_with_info){ return uri_without_path + path_with_info.path; }); auto cache_keys = getKeysForSchemaCache(sources, format_name, {}, ctx); - schema_cache.addMany(cache_keys, columns); + schema_cache.addManyColumns(cache_keys, columns); } } diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 74801b68f73..ffbf4e93ff9 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include namespace DB @@ -29,6 +31,8 @@ public: struct PathWithInfo { + PathWithInfo() = default; + PathWithInfo(const String & path_, const std::optional & info_) : path(path_), info(info_) {} String path; std::optional info; }; @@ -72,7 +76,9 @@ public: /// Is is useful because column oriented formats could effectively skip unknown columns /// So we can create a header of only required columns in read method and ask /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool supportsSubsetOfColumns() const override; + bool supportsSubsetOfColumns(const ContextPtr & context_) const; + + bool supportsSubcolumns() const override { return true; } static ColumnsDescription getTableStructureFromData( const String & format, @@ -82,6 +88,8 @@ public: static SchemaCache & getSchemaCache(const ContextPtr & ctx); + bool supportsTrivialCountOptimization() const override { return true; } + protected: friend class HDFSSource; @@ -118,7 +126,7 @@ public: class DisclosedGlobIterator { public: - DisclosedGlobIterator(ContextPtr context_, const String & uri_); + DisclosedGlobIterator(const String & uri_, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context); StorageHDFS::PathWithInfo next(); private: class Impl; @@ -129,7 +137,7 @@ public: class URISIterator { public: - URISIterator(const std::vector & uris_, ContextPtr context); + URISIterator(const std::vector & uris_, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context); StorageHDFS::PathWithInfo next(); private: class Impl; @@ -140,28 +148,33 @@ public: using IteratorWrapper = std::function; using StorageHDFSPtr = std::shared_ptr; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - HDFSSource( + const ReadFromFormatInfo & info, StorageHDFSPtr storage_, - const Block & block_for_format_, - const std::vector & requested_virtual_columns_, ContextPtr context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, - ColumnsDescription columns_description_); + bool need_only_count_, + const SelectQueryInfo & query_info_); String getName() const override; Chunk generate() override; private: + void addNumRowsToCache(const String & path, size_t num_rows); + std::optional tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info); + StorageHDFSPtr storage; Block block_for_format; - std::vector requested_virtual_columns; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; UInt64 max_block_size; std::shared_ptr file_iterator; ColumnsDescription columns_description; + bool need_only_count; + size_t total_rows_in_file = 0; + SelectQueryInfo query_info; std::unique_ptr read_buf; std::shared_ptr input_format; @@ -169,11 +182,6 @@ private: std::unique_ptr reader; String current_path; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; - size_t total_files_size = 0; - /// Recreate ReadBuffer and PullingPipelineExecutor for each file. bool initialize(); }; diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index b98459aeee3..83655b06cc8 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -64,6 +65,8 @@ StorageHDFSCluster::StorageHDFSCluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); + + virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); } void StorageHDFSCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) @@ -76,9 +79,9 @@ void StorageHDFSCluster::addColumnsStructureToQuery(ASTPtr & query, const String } -RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(ASTPtr, const ContextPtr & context) const +RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const { - auto iterator = std::make_shared(context, uri); + auto iterator = std::make_shared(uri, query, virtual_columns, context); auto callback = std::make_shared>([iter = std::move(iterator)]() mutable -> String { return iter->next().path; }); return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; } diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 350051ab089..8ad4a83c5b9 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -37,12 +37,17 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + bool supportsSubcolumns() const override { return true; } + + bool supportsTrivialCountOptimization() const override { return true; } + private: void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; String uri; String format_name; String compression_method; + NamesAndTypesList virtual_columns; }; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 00c942fd56b..28fa010b6d2 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -122,6 +122,7 @@ public: String compression_method_, Block sample_block_, ContextPtr context_, + const SelectQueryInfo & query_info_, UInt64 max_block_size_, const StorageHive & storage_, const Names & text_input_field_names_ = {}) @@ -138,6 +139,7 @@ public: , text_input_field_names(text_input_field_names_) , format_settings(getFormatSettings(getContext())) , read_settings(getContext()->getReadSettings()) + , query_info(query_info_) { to_read_block = sample_block; @@ -278,6 +280,7 @@ public: auto input_format = FormatFactory::instance().getInput( format, *read_buf, to_read_block, getContext(), max_block_size, updateFormatSettings(current_file), /* max_parsing_threads */ 1); + input_format->setQueryInfo(query_info, getContext()); Pipe pipe(input_format); if (columns_description.hasDefaults()) @@ -392,6 +395,7 @@ private: const Names & text_input_field_names; FormatSettings format_settings; ReadSettings read_settings; + SelectQueryInfo query_info; HiveFilePtr current_file; String current_path; @@ -831,6 +835,7 @@ Pipe StorageHive::read( compression_method, sample_block, context_, + query_info, max_block_size, *this, text_input_field_names)); diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 604df70f4d0..a3c47d400e2 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -65,7 +65,7 @@ public: NamesAndTypesList getVirtuals() const override; - bool supportsSubsetOfColumns() const override; + bool supportsSubsetOfColumns() const; std::optional totalRows(const Settings & settings) const override; std::optional totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr context_) const override; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 76641b656a2..fcf7675d15d 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -91,7 +91,7 @@ using IndexSize = ColumnSize; * - data storage structure (compression, etc.) * - concurrent access to data (locks, etc.) */ -class IStorage : public std::enable_shared_from_this, public TypePromotion, public IHints<1, IStorage> +class IStorage : public std::enable_shared_from_this, public TypePromotion, public IHints<> { public: IStorage() = delete; @@ -254,6 +254,10 @@ public: /// because those are internally translated into 'ALTER UDPATE' mutations. virtual bool supportsDelete() const { return false; } + /// Return true if the trivial count query could be optimized without reading the data at all + /// in totalRows() or totalRowsByPartitionPredicate() methods or with optimized reading in read() method. + virtual bool supportsTrivialCountOptimization() const { return false; } + private: StorageID storage_id; @@ -550,15 +554,15 @@ public: /** * If the storage requires some complicated work on destroying, * then you have two virtual methods: - * - flush() + * - flushAndPrepareForShutdown() * - shutdown() * * @see shutdown() - * @see flush() + * @see flushAndPrepareForShutdown() */ void flushAndShutdown() { - flush(); + flushAndPrepareForShutdown(); shutdown(); } @@ -571,7 +575,7 @@ public: /// Called before shutdown() to flush data to underlying storage /// Data in memory need to be persistent - virtual void flush() {} + virtual void flushAndPrepareForShutdown() {} /// Asks table to stop executing some action identified by action_type /// If table does not support such type of lock, and empty lock is returned @@ -598,7 +602,7 @@ public: /// Checks that table could be dropped right now /// Otherwise - throws an exception with detailed information. /// We do not use mutex because it is not very important that the size could change during the operation. - virtual void checkTableCanBeDropped() const {} + virtual void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const {} /// Similar to above but checks for DETACH. It's only used for DICTIONARIES. virtual void checkTableCanBeDetached() const {} @@ -616,8 +620,6 @@ public: /// NOTE: write-once also does not support INSERTs/merges/... for MergeTree virtual bool isStaticStorage() const; - virtual bool supportsSubsetOfColumns() const { return false; } - /// If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it. /// Used for: /// - Simple count() optimization diff --git a/src/Storages/IndicesDescription.h b/src/Storages/IndicesDescription.h index 862df6fe23c..e56642b8c76 100644 --- a/src/Storages/IndicesDescription.h +++ b/src/Storages/IndicesDescription.h @@ -62,7 +62,7 @@ struct IndexDescription }; /// All secondary indices in storage -struct IndicesDescription : public std::vector, IHints<1, IndicesDescription> +struct IndicesDescription : public std::vector, IHints<> { /// Index with name exists bool has(const String & name) const; diff --git a/src/Storages/Kafka/KafkaConsumer.cpp b/src/Storages/Kafka/KafkaConsumer.cpp index b2e6129c61c..31d431e27fe 100644 --- a/src/Storages/Kafka/KafkaConsumer.cpp +++ b/src/Storages/Kafka/KafkaConsumer.cpp @@ -61,6 +61,7 @@ KafkaConsumer::KafkaConsumer( , stopped(stopped_) , current(messages.begin()) , topics(_topics) + , exceptions_buffer(EXCEPTIONS_DEPTH) { // called (synchronously, during poll) when we enter the consumer group consumer->set_assignment_callback([this](const cppkafka::TopicPartitionList & topic_partitions) @@ -79,6 +80,7 @@ KafkaConsumer::KafkaConsumer( } assignment = topic_partitions; + num_rebalance_assignments++; }); // called (synchronously, during poll) when we leave the consumer group @@ -106,6 +108,8 @@ KafkaConsumer::KafkaConsumer( cleanUnprocessed(); stalled_status = REBALANCE_HAPPENED; + last_rebalance_timestamp_usec = static_cast(Poco::Timestamp().epochTime()); + assignment.reset(); waited_for_assignment = 0; @@ -118,12 +122,14 @@ KafkaConsumer::KafkaConsumer( // { // LOG_WARNING(log, "Commit error: {}", e.what()); // } + num_rebalance_revocations++; }); consumer->set_rebalance_error_callback([this](cppkafka::Error err) { LOG_ERROR(log, "Rebalance error: {}", err); ProfileEvents::increment(ProfileEvents::KafkaRebalanceErrors); + setExceptionInfo(err); }); } @@ -177,6 +183,7 @@ void KafkaConsumer::drain() else { LOG_ERROR(log, "Error during draining: {}", error); + setExceptionInfo(error); } } @@ -251,6 +258,8 @@ void KafkaConsumer::commit() consumer->commit(); committed = true; print_offsets("Committed offset", consumer->get_offsets_committed(consumer->get_assignment())); + last_commit_timestamp_usec = static_cast(Poco::Timestamp().epochTime()); + num_commits += 1; } catch (const cppkafka::HandleException & e) { @@ -259,7 +268,10 @@ void KafkaConsumer::commit() if (e.get_error() == RD_KAFKA_RESP_ERR__NO_OFFSET) committed = true; else + { LOG_ERROR(log, "Exception during commit attempt: {}", e.what()); + setExceptionInfo(e.what()); + } } --max_retries; } @@ -399,6 +411,8 @@ ReadBufferPtr KafkaConsumer::consume() /// Don't drop old messages immediately, since we may need them for virtual columns. auto new_messages = consumer->poll_batch(batch_size, std::chrono::milliseconds(actual_poll_timeout_ms)); + last_poll_timestamp_usec = static_cast(Poco::Timestamp().epochTime()); + num_messages_read += new_messages.size(); resetIfStopped(); if (stalled_status == CONSUMER_STOPPED) @@ -495,6 +509,7 @@ size_t KafkaConsumer::filterMessageErrors() { ProfileEvents::increment(ProfileEvents::KafkaConsumerErrors); LOG_ERROR(log, "Consumer error: {}", error); + setExceptionInfo(error); return true; } return false; @@ -527,4 +542,71 @@ void KafkaConsumer::storeLastReadMessageOffset() } } +void KafkaConsumer::setExceptionInfo(const cppkafka::Error & err, bool with_stacktrace) +{ + setExceptionInfo(err.to_string(), with_stacktrace); +} + +void KafkaConsumer::setExceptionInfo(const std::string & text, bool with_stacktrace) +{ + std::string enriched_text = text; + + if (with_stacktrace) + { + enriched_text.append(StackTrace().toString()); + } + + std::lock_guard lock(exception_mutex); + exceptions_buffer.push_back({enriched_text, static_cast(Poco::Timestamp().epochTime())}); +} + +/* + * Needed until + * https://github.com/mfontanini/cppkafka/pull/309 + * is merged, + * because consumer->get_member_id() contains a leak + */ +std::string KafkaConsumer::getMemberId() const +{ + char * memberid_ptr = rd_kafka_memberid(consumer->get_handle()); + std::string memberid_string = memberid_ptr; + rd_kafka_mem_free(nullptr, memberid_ptr); + return memberid_string; +} + + +KafkaConsumer::Stat KafkaConsumer::getStat() const +{ + KafkaConsumer::Stat::Assignments assignments; + auto cpp_assignments = consumer->get_assignment(); + auto cpp_offsets = consumer->get_offsets_position(cpp_assignments); + + for (size_t num = 0; num < cpp_assignments.size(); ++num) + { + assignments.push_back({ + cpp_assignments[num].get_topic(), + cpp_assignments[num].get_partition(), + cpp_offsets[num].get_offset(), + }); + } + + return { + .consumer_id = getMemberId() /* consumer->get_member_id() */ , + .assignments = std::move(assignments), + .last_poll_time = last_poll_timestamp_usec.load(), + .num_messages_read = num_messages_read.load(), + + .last_commit_timestamp_usec = last_commit_timestamp_usec.load(), + .last_rebalance_timestamp_usec = last_rebalance_timestamp_usec.load(), + .num_commits = num_commits.load(), + .num_rebalance_assignments = num_rebalance_assignments.load(), + .num_rebalance_revocations = num_rebalance_revocations.load(), + .exceptions_buffer = [&](){std::lock_guard lock(exception_mutex); + return exceptions_buffer;}(), + .in_use = in_use.load(), + .rdkafka_stat = [&](){std::lock_guard lock(rdkafka_stat_mutex); + return rdkafka_stat;}(), + }; +} + } diff --git a/src/Storages/Kafka/KafkaConsumer.h b/src/Storages/Kafka/KafkaConsumer.h index feda51a682e..1c3ddd85873 100644 --- a/src/Storages/Kafka/KafkaConsumer.h +++ b/src/Storages/Kafka/KafkaConsumer.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include @@ -20,10 +22,44 @@ namespace Poco namespace DB { +class StorageSystemKafkaConsumers; + using ConsumerPtr = std::shared_ptr; class KafkaConsumer { +public: + struct ExceptionInfo + { + String text; + UInt64 timestamp_usec; + }; + using ExceptionsBuffer = boost::circular_buffer; + + struct Stat // system.kafka_consumers data + { + struct Assignment + { + String topic_str; + Int32 partition_id; + Int64 current_offset; + }; + using Assignments = std::vector; + + String consumer_id; + Assignments assignments; + UInt64 last_poll_time; + UInt64 num_messages_read; + UInt64 last_commit_timestamp_usec; + UInt64 last_rebalance_timestamp_usec; + UInt64 num_commits; + UInt64 num_rebalance_assignments; + UInt64 num_rebalance_revocations; + KafkaConsumer::ExceptionsBuffer exceptions_buffer; + bool in_use; + std::string rdkafka_stat; + }; + public: KafkaConsumer( ConsumerPtr consumer_, @@ -69,6 +105,18 @@ public: auto currentTimestamp() const { return current[-1].get_timestamp(); } const auto & currentHeaderList() const { return current[-1].get_header_list(); } String currentPayload() const { return current[-1].get_payload(); } + void setExceptionInfo(const cppkafka::Error & err, bool with_stacktrace = true); + void setExceptionInfo(const std::string & text, bool with_stacktrace = true); + void setRDKafkaStat(const std::string & stat_json_string) + { + std::lock_guard lock(rdkafka_stat_mutex); + rdkafka_stat = stat_json_string; + } + void inUse() { in_use = true; } + void notInUse() { in_use = false; } + + // For system.kafka_consumers + Stat getStat() const; private: using Messages = std::vector; @@ -105,12 +153,33 @@ private: std::optional assignment; const Names topics; + /// system.kafka_consumers data is retrieved asynchronously + /// so we have to protect exceptions_buffer + mutable std::mutex exception_mutex; + const size_t EXCEPTIONS_DEPTH = 10; + ExceptionsBuffer exceptions_buffer; + + std::atomic last_exception_timestamp_usec = 0; + std::atomic last_poll_timestamp_usec = 0; + std::atomic num_messages_read = 0; + std::atomic last_commit_timestamp_usec = 0; + std::atomic num_commits = 0; + std::atomic last_rebalance_timestamp_usec = 0; + std::atomic num_rebalance_assignments = 0; + std::atomic num_rebalance_revocations = 0; + std::atomic in_use = 0; + + mutable std::mutex rdkafka_stat_mutex; + std::string rdkafka_stat; + void drain(); void cleanUnprocessed(); void resetIfStopped(); /// Return number of messages with an error. size_t filterMessageErrors(); ReadBufferPtr getNextMessage(); + + std::string getMemberId() const; }; } diff --git a/src/Storages/Kafka/KafkaSource.cpp b/src/Storages/Kafka/KafkaSource.cpp index ba242417058..cd83a6a1422 100644 --- a/src/Storages/Kafka/KafkaSource.cpp +++ b/src/Storages/Kafka/KafkaSource.cpp @@ -133,6 +133,7 @@ Chunk KafkaSource::generateImpl() { e.addMessage("while parsing Kafka message (topic: {}, partition: {}, offset: {})'", consumer->currentTopic(), consumer->currentPartition(), consumer->currentOffset()); + consumer->setExceptionInfo(e.message()); throw std::move(e); } }; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 2aba76c1a3f..43a3bedfb74 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -250,15 +250,16 @@ StorageKafka::StorageKafka( : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , kafka_settings(std::move(kafka_settings_)) - , topics(parseTopics(getContext()->getMacros()->expand(kafka_settings->kafka_topic_list.value))) - , brokers(getContext()->getMacros()->expand(kafka_settings->kafka_broker_list.value)) - , group(getContext()->getMacros()->expand(kafka_settings->kafka_group_name.value)) + , macros_info{.table_id = table_id_} + , topics(parseTopics(getContext()->getMacros()->expand(kafka_settings->kafka_topic_list.value, macros_info))) + , brokers(getContext()->getMacros()->expand(kafka_settings->kafka_broker_list.value, macros_info)) + , group(getContext()->getMacros()->expand(kafka_settings->kafka_group_name.value, macros_info)) , client_id( kafka_settings->kafka_client_id.value.empty() ? getDefaultClientId(table_id_) - : getContext()->getMacros()->expand(kafka_settings->kafka_client_id.value)) + : getContext()->getMacros()->expand(kafka_settings->kafka_client_id.value, macros_info)) , format_name(getContext()->getMacros()->expand(kafka_settings->kafka_format.value)) , max_rows_per_message(kafka_settings->kafka_max_rows_per_message.value) - , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value)) + , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value, macros_info)) , num_consumers(kafka_settings->kafka_num_consumers.value) , log(&Poco::Logger::get("StorageKafka (" + table_id_.table_name + ")")) , semaphore(0, static_cast(num_consumers)) @@ -415,7 +416,9 @@ void StorageKafka::startup() { try { - pushConsumer(createConsumer(i)); + auto consumer = createConsumer(i); + pushConsumer(consumer); + all_consumers.push_back(consumer); ++num_created_consumers; } catch (const cppkafka::Exception &) @@ -455,6 +458,7 @@ void StorageKafka::shutdown() void StorageKafka::pushConsumer(KafkaConsumerPtr consumer) { std::lock_guard lock(mutex); + consumer->notInUse(); consumers.push_back(consumer); semaphore.set(); CurrentMetrics::sub(CurrentMetrics::KafkaConsumersInUse, 1); @@ -483,6 +487,7 @@ KafkaConsumerPtr StorageKafka::popConsumer(std::chrono::milliseconds timeout) auto consumer = consumers.back(); consumers.pop_back(); CurrentMetrics::add(CurrentMetrics::KafkaConsumersInUse, 1); + consumer->inUse(); return consumer; } @@ -511,7 +516,11 @@ KafkaConsumerPtr StorageKafka::createConsumer(size_t consumer_number) size_t default_queued_min_messages = 100000; // we don't want to decrease the default conf.set("queued.min.messages", std::max(getMaxBlockSize(),default_queued_min_messages)); - updateConfiguration(conf); + /// a reference to the consumer is needed in statistic callback + /// although the consumer does not exist when callback is being registered + /// shared_ptr> comes to the rescue + auto consumer_weak_ptr_ptr = std::make_shared(); + updateConfiguration(conf, consumer_weak_ptr_ptr); // those settings should not be changed by users. conf.set("enable.auto.commit", "false"); // We manually commit offsets after a stream successfully finished @@ -522,13 +531,20 @@ KafkaConsumerPtr StorageKafka::createConsumer(size_t consumer_number) auto consumer_impl = std::make_shared(conf); consumer_impl->set_destroy_flags(RD_KAFKA_DESTROY_F_NO_CONSUMER_CLOSE); + KafkaConsumerPtr kafka_consumer_ptr; + /// NOTE: we pass |stream_cancelled| by reference here, so the buffers should not outlive the storage. if (thread_per_consumer) { auto& stream_cancelled = tasks[consumer_number]->stream_cancelled; - return std::make_shared(consumer_impl, log, getPollMaxBatchSize(), getPollTimeoutMillisecond(), intermediate_commit, stream_cancelled, topics); + kafka_consumer_ptr = std::make_shared(consumer_impl, log, getPollMaxBatchSize(), getPollTimeoutMillisecond(), intermediate_commit, stream_cancelled, topics); } - return std::make_shared(consumer_impl, log, getPollMaxBatchSize(), getPollTimeoutMillisecond(), intermediate_commit, tasks.back()->stream_cancelled, topics); + else + { + kafka_consumer_ptr = std::make_shared(consumer_impl, log, getPollMaxBatchSize(), getPollTimeoutMillisecond(), intermediate_commit, tasks.back()->stream_cancelled, topics); + } + *consumer_weak_ptr_ptr = kafka_consumer_ptr; + return kafka_consumer_ptr; } size_t StorageKafka::getMaxBlockSize() const @@ -561,7 +577,8 @@ String StorageKafka::getConfigPrefix() const return CONFIG_KAFKA_TAG; } -void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config) +void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config, + std::shared_ptr kafka_consumer_weak_ptr_ptr) { // Update consumer configuration from the configuration. Example: // @@ -641,6 +658,26 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config) LOG_IMPL(log, client_logs_level, poco_level, "[rdk:{}] {}", facility, message); }); + if (kafka_consumer_weak_ptr_ptr) + { + if (!config.has(config_prefix + "." + "statistics_interval_ms")) + { + kafka_config.set("statistics.interval.ms", "3000"); // every 3 seconds by default. set to 0 to disable. + } + + if (kafka_config.get("statistics.interval.ms") != "0") + { + kafka_config.set_stats_callback([kafka_consumer_weak_ptr_ptr](cppkafka::KafkaHandleBase &, const std::string & stat_json_string) + { + auto kafka_consumer_ptr = kafka_consumer_weak_ptr_ptr->lock(); + if (kafka_consumer_ptr) + { + kafka_consumer_ptr->setRDKafkaStat(stat_json_string); + } + }); + } + } + // Configure interceptor to change thread name // // TODO: add interceptors support into the cppkafka. @@ -695,6 +732,8 @@ void StorageKafka::threadFunc(size_t idx) { assert(idx < tasks.size()); auto task = tasks[idx]; + std::string exception_str; + try { auto table_id = getStorageID(); @@ -734,7 +773,24 @@ void StorageKafka::threadFunc(size_t idx) } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + /// do bare minimum in catch block + LockMemoryExceptionInThread lock_memory_tracker(VariableContext::Global); + exception_str = getCurrentExceptionMessage(true /* with_stacktrace */); + } + + if (!exception_str.empty()) + { + LOG_ERROR(log, "{} {}", __PRETTY_FUNCTION__, exception_str); + + auto safe_consumers = getSafeConsumers(); + for (auto const & consumer_ptr_weak : safe_consumers.consumers) + { + /// propagate materialized view exception to all consumers + if (auto consumer_ptr = consumer_ptr_weak.lock()) + { + consumer_ptr->setExceptionInfo(exception_str, false /* no stacktrace, reuse passed one */); + } + } } mv_attached.store(false); @@ -809,6 +865,7 @@ bool StorageKafka::streamToViews() // we need to read all consumers in parallel (sequential read may lead to situation // when some of consumers are not used, and will break some Kafka consumer invariants) block_io.pipeline.setNumThreads(stream_count); + block_io.pipeline.setConcurrencyControl(kafka_context->getSettingsRef().use_concurrency_control); block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); CompletedPipelineExecutor executor(block_io.pipeline); @@ -951,7 +1008,7 @@ void registerStorageKafka(StorageFactory & factory) "of getting data from Kafka, consider using a setting kafka_thread_per_consumer=1, " "and ensure you have enough threads " "in MessageBrokerSchedulePool (background_message_broker_schedule_pool_size). " - "See also https://clickhouse.com/docs/integrations/kafka/kafka-table-engine#tuning-performance", max_consumers); + "See also https://clickhouse.com/docs/en/integrations/kafka#tuning-performance", max_consumers); } else if (num_consumers < 1) { diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 09aa091ef18..77e1370c2b7 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -22,9 +23,12 @@ class Configuration; namespace DB { +class StorageSystemKafkaConsumers; + struct StorageKafkaInterceptors; using KafkaConsumerPtr = std::shared_ptr; +using KafkaConsumerWeakPtr = std::weak_ptr; /** Implements a Kafka queue table engine that can be used as a persistent queue / buffer, * or as a basic building block for creating pipelines with a continuous insertion / ETL. @@ -76,9 +80,19 @@ public: Names getVirtualColumnNames() const; HandleKafkaErrorMode getHandleKafkaErrorMode() const { return kafka_settings->kafka_handle_error_mode; } + struct SafeConsumers + { + std::shared_ptr storage_ptr; + std::unique_lock lock; + std::vector & consumers; + }; + + SafeConsumers getSafeConsumers() { return {shared_from_this(), std::unique_lock(mutex), all_consumers}; } + private: // Configuration and state std::unique_ptr kafka_settings; + Macros::MacroExpansionInfo macros_info; const Names topics; const String brokers; const String group; @@ -99,6 +113,7 @@ private: size_t num_created_consumers = 0; /// number of actually created consumers. std::vector consumers; /// available consumers + std::vector all_consumers; /// busy (belong to a KafkaSource) and vacant consumers std::mutex mutex; @@ -127,7 +142,12 @@ private: std::atomic shutdown_called = false; // Update Kafka configuration with values from CH user configuration. - void updateConfiguration(cppkafka::Configuration & kafka_config); + void updateConfiguration(cppkafka::Configuration & kafka_config, std::shared_ptr); + void updateConfiguration(cppkafka::Configuration & kafka_config) + { + updateConfiguration(kafka_config, std::make_shared()); + } + String getConfigPrefix() const; void threadFunc(size_t idx); @@ -140,6 +160,7 @@ private: bool streamToViews(); bool checkDependencies(const StorageID & table_id); + }; } diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h index e163400f2af..792133ced64 100644 --- a/src/Storages/LiveView/LiveViewSink.h +++ b/src/Storages/LiveView/LiveViewSink.h @@ -32,11 +32,8 @@ public: void onFinish() override { - UInt128 key; - String key_str; - - new_hash->get128(key); - key_str = getHexUIntLowercase(key); + const auto key = new_hash->get128(); + const auto key_str = getHexUIntLowercase(key); std::lock_guard lock(storage.mutex); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 173bb128c4a..aec2405b973 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -263,7 +263,7 @@ NamesAndTypesList StorageLiveView::getVirtuals() const }; } -void StorageLiveView::checkTableCanBeDropped() const +void StorageLiveView::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const { auto table_id = getStorageID(); auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); @@ -478,7 +478,7 @@ void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) }); auto executor = pipeline.execute(); - executor->execute(pipeline.getNumThreads()); + executor->execute(pipeline.getNumThreads(), local_context->getSettingsRef().use_concurrency_control); } void StorageLiveView::refresh() @@ -681,7 +681,6 @@ QueryPipelineBuilder StorageLiveView::completeQuery(Pipes pipes) bool StorageLiveView::getNewBlocks(const std::lock_guard & lock) { SipHash hash; - UInt128 key; BlocksPtr new_blocks = std::make_shared(); BlocksMetadataPtr new_blocks_metadata = std::make_shared(); @@ -713,7 +712,7 @@ bool StorageLiveView::getNewBlocks(const std::lock_guard & lock) new_blocks->push_back(block); } - hash.get128(key); + const auto key = hash.get128(); /// Update blocks only if hash keys do not match /// NOTE: hash could be different for the same result diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 002cbf96ebe..92ffd4dc642 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -75,7 +75,7 @@ public: NamesAndTypesList getVirtuals() const override; - void checkTableCanBeDropped() const override; + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override; void drop() override; diff --git a/src/Storages/MarkCache.h b/src/Storages/MarkCache.h index 0f4af57fc8e..bcb6ae96c68 100644 --- a/src/Storages/MarkCache.h +++ b/src/Storages/MarkCache.h @@ -41,22 +41,15 @@ private: using Base = CacheBase; public: - explicit MarkCache(size_t max_size_in_bytes) - : Base(max_size_in_bytes) {} - - MarkCache(const String & mark_cache_policy, size_t max_size_in_bytes) - : Base(mark_cache_policy, max_size_in_bytes) {} + MarkCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio) + : Base(cache_policy, max_size_in_bytes, 0, size_ratio) {} /// Calculate key from path to file and offset. static UInt128 hash(const String & path_to_file) { - UInt128 key; - SipHash hash; hash.update(path_to_file.data(), path_to_file.size() + 1); - hash.get128(key); - - return key; + return hash.get128(); } template diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h index 310890eba1e..5092fbdd864 100644 --- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h +++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h @@ -9,6 +9,9 @@ namespace DB { +static constexpr auto DISTANCE_FUNCTION_L2 = "L2Distance"; +static constexpr auto DISTANCE_FUNCTION_COSINE = "cosineDistance"; + /// Approximate Nearest Neighbour queries have a similar structure: /// - reference vector from which all distances are calculated /// - metric name (e.g L2Distance, LpDistance, etc.) diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp index 23b6668c8d8..a9cdd09e061 100644 --- a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp +++ b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp @@ -18,7 +18,9 @@ namespace CurrentMetrics namespace DB { -struct AsyncBlockIDsCache::Cache : public std::unordered_set + +template +struct AsyncBlockIDsCache::Cache : public std::unordered_set { CurrentMetrics::Increment cache_size_increment; explicit Cache(std::unordered_set && set_) @@ -27,7 +29,8 @@ struct AsyncBlockIDsCache::Cache : public std::unordered_set {} }; -std::vector AsyncBlockIDsCache::getChildren() +template +std::vector AsyncBlockIDsCache::getChildren() { auto zookeeper = storage.getZooKeeper(); @@ -50,7 +53,8 @@ std::vector AsyncBlockIDsCache::getChildren() return children; } -void AsyncBlockIDsCache::update() +template +void AsyncBlockIDsCache::update() try { std::vector paths = getChildren(); @@ -73,24 +77,27 @@ catch (...) task->scheduleAfter(update_min_interval.count()); } -AsyncBlockIDsCache::AsyncBlockIDsCache(StorageReplicatedMergeTree & storage_) +template +AsyncBlockIDsCache::AsyncBlockIDsCache(TStorage & storage_) : storage(storage_), update_min_interval(storage.getSettings()->async_block_ids_cache_min_update_interval_ms), - path(storage.zookeeper_path + "/async_blocks"), + path(storage.getZooKeeperPath() + "/async_blocks"), log_name(storage.getStorageID().getFullTableName() + " (AsyncBlockIDsCache)"), log(&Poco::Logger::get(log_name)) { task = storage.getContext()->getSchedulePool().createTask(log_name, [this]{ update(); }); } -void AsyncBlockIDsCache::start() +template +void AsyncBlockIDsCache::start() { if (storage.getSettings()->use_async_block_ids_cache) task->activateAndSchedule(); } /// Caller will keep the version of last call. When the caller calls again, it will wait util gets a newer version. -Strings AsyncBlockIDsCache::detectConflicts(const Strings & paths, UInt64 & last_version) +template +Strings AsyncBlockIDsCache::detectConflicts(const Strings & paths, UInt64 & last_version) { if (!storage.getSettings()->use_async_block_ids_cache) return {}; @@ -128,4 +135,6 @@ Strings AsyncBlockIDsCache::detectConflicts(const Strings & paths, UInt64 & last return conflicts; } +template class AsyncBlockIDsCache; + } diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.h b/src/Storages/MergeTree/AsyncBlockIDsCache.h index 91d549a0501..fbd97fd00ff 100644 --- a/src/Storages/MergeTree/AsyncBlockIDsCache.h +++ b/src/Storages/MergeTree/AsyncBlockIDsCache.h @@ -8,8 +8,7 @@ namespace DB { -class StorageReplicatedMergeTree; - +template class AsyncBlockIDsCache { struct Cache; @@ -20,7 +19,7 @@ class AsyncBlockIDsCache void update(); public: - explicit AsyncBlockIDsCache(StorageReplicatedMergeTree & storage_); + explicit AsyncBlockIDsCache(TStorage & storage_); void start(); @@ -30,7 +29,7 @@ public: private: - StorageReplicatedMergeTree & storage; + TStorage & storage; std::atomic last_updatetime; const std::chrono::milliseconds update_min_interval; @@ -48,6 +47,4 @@ private: Poco::Logger * log; }; -using AsyncBlockIDsCachePtr = std::shared_ptr; - } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index e1921f45eda..27d8991bd62 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -331,6 +331,7 @@ void DataPartStorageOnDiskBase::backup( const NameSet & files_without_checksums, const String & path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, TemporaryFilesOnDisks * temp_dirs) const @@ -350,7 +351,7 @@ void DataPartStorageOnDiskBase::backup( temp_dir_it = temp_dirs->emplace(disk, std::make_shared(disk, "tmp/")).first; temp_dir_owner = temp_dir_it->second; - fs::path temp_dir = temp_dir_owner->getPath(); + fs::path temp_dir = temp_dir_owner->getRelativePath(); temp_part_dir = temp_dir / part_path_in_backup.relative_path(); disk->createDirectories(temp_part_dir); } @@ -382,7 +383,7 @@ void DataPartStorageOnDiskBase::backup( if (files_without_checksums.contains(filepath)) { - backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk, copy_encrypted)); + backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk, read_settings, copy_encrypted)); continue; } @@ -415,6 +416,7 @@ void DataPartStorageOnDiskBase::backup( MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze( const std::string & to, const std::string & dir_path, + const WriteSettings & settings, std::function save_metadata_callback, const ClonePartParams & params) const { @@ -424,8 +426,16 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze( else disk->createDirectories(to); - localBackup(disk, getRelativePath(), fs::path(to) / dir_path, params.make_source_readonly, {}, params.copy_instead_of_hardlink, - params.files_to_copy_instead_of_hardlinks, params.external_transaction); + localBackup( + disk, + getRelativePath(), + fs::path(to) / dir_path, + settings, + params.make_source_readonly, + /* max_level= */ {}, + params.copy_instead_of_hardlink, + params.files_to_copy_instead_of_hardlinks, + params.external_transaction); if (save_metadata_callback) save_metadata_callback(disk); @@ -456,6 +466,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( const std::string & to, const std::string & dir_path, const DiskPtr & dst_disk, + const WriteSettings & write_settings, Poco::Logger * log) const { String path_to_clone = fs::path(to) / dir_path / ""; @@ -471,7 +482,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( try { dst_disk->createDirectories(to); - src_disk->copyDirectoryContent(getRelativePath(), dst_disk, path_to_clone); + src_disk->copyDirectoryContent(getRelativePath(), dst_disk, path_to_clone, write_settings); } catch (...) { diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 648bc908f59..0adf048b56a 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -55,6 +55,7 @@ public: const NameSet & files_without_checksums, const String & path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, TemporaryFilesOnDisks * temp_dirs) const override; @@ -62,6 +63,7 @@ public: MutableDataPartStoragePtr freeze( const std::string & to, const std::string & dir_path, + const WriteSettings & settings, std::function save_metadata_callback, const ClonePartParams & params) const override; @@ -69,6 +71,7 @@ public: const std::string & to, const std::string & dir_path, const DiskPtr & dst_disk, + const WriteSettings & write_settings, Poco::Logger * log) const override; void rename( diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 6a3bf2940e9..4545b2b98ae 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -203,6 +203,8 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write sendPartFromMemory(part, out, send_projections); else sendPartFromDisk(part, out, client_protocol_version, false, send_projections); + + data.addLastSentPart(part->info); } catch (const NetException &) { diff --git a/src/Storages/MergeTree/GinIndexStore.cpp b/src/Storages/MergeTree/GinIndexStore.cpp index aa0c1fccbc3..5b798ecc8a5 100644 --- a/src/Storages/MergeTree/GinIndexStore.cpp +++ b/src/Storages/MergeTree/GinIndexStore.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include @@ -243,6 +242,15 @@ void GinIndexStore::finalize() { if (!current_postings.empty()) writeSegment(); + + if (metadata_file_stream) + metadata_file_stream->finalize(); + + if (dict_file_stream) + dict_file_stream->finalize(); + + if (postings_file_stream) + postings_file_stream->finalize(); } void GinIndexStore::initFileStreams() @@ -319,13 +327,8 @@ void GinIndexStore::writeSegment() current_segment.segment_id = getNextSegmentID(); metadata_file_stream->sync(); - metadata_file_stream->finalize(); - dict_file_stream->sync(); - dict_file_stream->finalize(); - postings_file_stream->sync(); - postings_file_stream->finalize(); } GinIndexStoreDeserializer::GinIndexStoreDeserializer(const GinIndexStorePtr & store_) diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 8dbf5caa168..c76b17f3370 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -55,8 +55,6 @@ struct MergeTreeDataPartChecksums; class IReservation; using ReservationPtr = std::unique_ptr; -class IStoragePolicy; - class IDisk; using DiskPtr = std::shared_ptr; @@ -221,6 +219,7 @@ public: const NameSet & files_without_checksums, const String & path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, TemporaryFilesOnDisks * temp_dirs) const = 0; @@ -241,7 +240,7 @@ public: MergeTreeTransactionPtr txn = NO_TRANSACTION_PTR; HardlinkedFiles * hardlinked_files = nullptr; bool copy_instead_of_hardlink = false; - NameSet files_to_copy_instead_of_hardlinks; + NameSet files_to_copy_instead_of_hardlinks = {}; bool keep_metadata_version = false; bool make_source_readonly = false; DiskTransactionPtr external_transaction = nullptr; @@ -251,6 +250,7 @@ public: virtual std::shared_ptr freeze( const std::string & to, const std::string & dir_path, + const WriteSettings & settings, std::function save_metadata_callback, const ClonePartParams & params) const = 0; @@ -259,6 +259,7 @@ public: const std::string & to, const std::string & dir_path, const DiskPtr & disk, + const WriteSettings & write_settings, Poco::Logger * log) const = 0; /// Change part's root. from_root should be a prefix path of current root path. diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 7050a98a4bc..54a169fc779 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -320,7 +319,6 @@ IMergeTreeDataPart::IMergeTreeDataPart( , part_type(part_type_) , parent_part(parent_part_) , parent_part_name(parent_part ? parent_part->name : "") - , use_metadata_cache(storage.use_metadata_cache) { if (parent_part) { @@ -1673,14 +1671,7 @@ std::pair IMergeTreeDataPart::canRemovePart() const void IMergeTreeDataPart::initializePartMetadataManager() { -#if USE_ROCKSDB - if (use_metadata_cache) - metadata_manager = std::make_shared(this, storage.getContext()->getMergeTreeMetadataCache()); - else - metadata_manager = std::make_shared(this); -#else - metadata_manager = std::make_shared(this); -#endif + metadata_manager = std::make_shared(this); } void IMergeTreeDataPart::initializeIndexGranularityInfo() @@ -1780,7 +1771,8 @@ void IMergeTreeDataPart::renameToDetached(const String & prefix) part_is_probably_removed_from_disk = true; } -DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & /*metadata_snapshot*/) const +DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & /*metadata_snapshot*/, + const DiskTransactionPtr & disk_transaction) const { /// Avoid unneeded duplicates of broken parts if we try to detach the same broken part multiple times. /// Otherwise it may pollute detached/ with dirs with _tryN suffix and we will fail to remove broken part after 10 attempts. @@ -1795,16 +1787,18 @@ DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix IDataPartStorage::ClonePartParams params { .copy_instead_of_hardlink = isStoredOnRemoteDiskWithZeroCopySupport() && storage.supportsReplication() && storage_settings->allow_remote_fs_zero_copy_replication, - .make_source_readonly = true + .make_source_readonly = true, + .external_transaction = disk_transaction }; return getDataPartStorage().freeze( storage.relative_data_path, *maybe_path_in_detached, - /*save_metadata_callback=*/ {}, + Context::getGlobalContextInstance()->getWriteSettings(), + /* save_metadata_callback= */ {}, params); } -MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const +MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & disk, const String & directory_name, const WriteSettings & write_settings) const { assertOnDisk(); @@ -1814,7 +1808,7 @@ MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & di throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not clone data part {} to empty directory.", name); String path_to_clone = fs::path(storage.relative_data_path) / directory_name / ""; - return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, storage.log); + return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, write_settings, storage.log); } UInt64 IMergeTreeDataPart::getIndexSizeFromFile() const @@ -1915,6 +1909,13 @@ void IMergeTreeDataPart::checkConsistency(bool /* require_part_metadata */) cons throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'checkConsistency' is not implemented for part with type {}", getType().toString()); } +void IMergeTreeDataPart::checkConsistencyWithProjections(bool require_part_metadata) const +{ + checkConsistency(require_part_metadata); + for (const auto & [_, proj_part] : projection_parts) + proj_part->checkConsistency(require_part_metadata); +} + void IMergeTreeDataPart::calculateColumnsAndSecondaryIndicesSizesOnDisk() { calculateColumnsSizesOnDisk(); @@ -1983,6 +1984,12 @@ IndexSize IMergeTreeDataPart::getSecondaryIndexSize(const String & secondary_ind return ColumnSize{}; } +bool IMergeTreeDataPart::hasSecondaryIndex(const String & index_name) const +{ + auto file_name = INDEX_FILE_PREFIX + index_name; + return checksums.has(file_name + ".idx") || checksums.has(file_name + ".idx2"); +} + void IMergeTreeDataPart::accumulateColumnSizes(ColumnToSize & column_to_size) const { for (const auto & [column_name, size] : columns_sizes) @@ -2052,42 +2059,8 @@ String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const hash.update(token.data(), token.size()); } - union - { - char bytes[16]; - UInt64 words[2]; - } hash_value; - hash.get128(hash_value.bytes); - - return info.partition_id + "_" + toString(hash_value.words[0]) + "_" + toString(hash_value.words[1]); -} - -IMergeTreeDataPart::uint128 IMergeTreeDataPart::getActualChecksumByFile(const String & file_name) const -{ - assert(use_metadata_cache); - - const auto filenames_without_checksums = getFileNamesWithoutChecksums(); - auto it = checksums.files.find(file_name); - if (!filenames_without_checksums.contains(file_name) && it != checksums.files.end()) - { - return it->second.file_hash; - } - - if (!getDataPartStorage().exists(file_name)) - { - return {}; - } - std::unique_ptr in_file = getDataPartStorage().readFile(file_name, {}, std::nullopt, std::nullopt); - HashingReadBuffer in_hash(*in_file); - - String value; - readStringUntilEOF(value, in_hash); - return in_hash.getHash(); -} - -std::unordered_map IMergeTreeDataPart::checkMetadata() const -{ - return metadata_manager->check(); + const auto hash_value = hash.get128(); + return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); } bool isCompactPart(const MergeTreeDataPartPtr & data_part) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index af6906e004d..97c9b81ce87 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -89,7 +89,7 @@ public: virtual MergeTreeReaderPtr getReader( const NamesAndTypesList & columns_, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -122,6 +122,9 @@ public: /// Otherwise return information about secondary index size on disk. IndexSize getSecondaryIndexSize(const String & secondary_index_name) const; + /// Returns true if there is materialized index with specified name in part. + bool hasSecondaryIndex(const String & index_name) const; + /// Return information about column size on disk for all columns in part ColumnSize getTotalColumnsSize() const { return total_columns_size; } @@ -261,6 +264,8 @@ public: /// Some old parts don't have metadata version, so we set it to the current table's version when loading the part bool old_part_with_no_metadata_version_on_disk = false; + bool new_part_was_committed_to_zookeeper_after_rename_on_disk = false; + using TTLInfo = MergeTreeDataPartTTLInfo; using TTLInfos = MergeTreeDataPartTTLInfos; @@ -368,10 +373,11 @@ public: virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists); /// Makes clone of a part in detached/ directory via hard links - virtual DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const; + virtual DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, + const DiskTransactionPtr & disk_transaction) const; /// Makes full clone of part in specified subdirectory (relative to storage data directory, e.g. "detached") on another disk - MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const; + MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name, const WriteSettings & write_settings) const; /// Checks that .bin and .mrk files exist. /// @@ -475,12 +481,6 @@ public: /// Required for keep data on remote FS when part has shadow copies. UInt32 getNumberOfRefereneces() const; - /// Get checksums of metadata file in part directory - IMergeTreeDataPart::uint128 getActualChecksumByFile(const String & file_name) const; - - /// Check metadata in cache is consistent with actual metadata on disk(if use_metadata_cache is true) - std::unordered_map checkMetadata() const; - /// True if the part supports lightweight delete mutate. bool supportLightweightDeleteMutate() const; @@ -489,6 +489,12 @@ public: void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings); + /// Checks the consistency of this data part. + virtual void checkConsistency(bool require_part_metadata) const; + + /// Checks the consistency of this data part, and check the consistency of its projections (if any) as well. + void checkConsistencyWithProjections(bool require_part_metadata) const; + /// "delete-on-destroy.txt" is deprecated. It is no longer being created, only is removed. /// TODO: remove this method after some time. void removeDeleteOnDestroyMarker(); @@ -530,14 +536,10 @@ protected: std::map> projection_parts; - /// Disabled when USE_ROCKSDB is OFF or use_metadata_cache is set to false in merge tree settings - bool use_metadata_cache = false; - mutable PartMetadataManagerPtr metadata_manager; void removeIfNeeded(); - virtual void checkConsistency(bool require_part_metadata) const; void checkConsistencyBase() const; /// Fill each_columns_size and total_size with sizes from columns files on diff --git a/src/Storages/MergeTree/IMergeTreeReadPool.h b/src/Storages/MergeTree/IMergeTreeReadPool.h index efdfca51c0a..944d933e167 100644 --- a/src/Storages/MergeTree/IMergeTreeReadPool.h +++ b/src/Storages/MergeTree/IMergeTreeReadPool.h @@ -3,24 +3,27 @@ #include #include #include -#include +#include namespace DB { -struct MergeTreeReadTask; -using MergeTreeReadTaskPtr = std::unique_ptr; - +/// The interface that determines how tasks for reading (MergeTreeReadTask) +/// are distributed among data parts with ranges. class IMergeTreeReadPool : private boost::noncopyable { public: virtual ~IMergeTreeReadPool() = default; - + virtual String getName() const = 0; virtual Block getHeader() const = 0; - virtual MergeTreeReadTaskPtr getTask(size_t thread) = 0; + /// Returns true if tasks are returned in the same order as the order of ranges passed to pool + virtual bool preservesOrderOfRanges() const = 0; + /// task_idx is an implementation defined identifier that helps + /// to get required task. E.g. it may be number of thread in case of Default reading type or an index of a part in case of InOrder/InReverseOrder reading type. + virtual MergeTreeReadTaskPtr getTask(size_t task_idx, MergeTreeReadTask * previous_task) = 0; virtual void profileFeedback(ReadBufferFromFileBase::ProfileInfo info) = 0; }; diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index f9b97a6a05d..4bb8c400691 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -24,7 +24,7 @@ namespace ErrorCodes IMergeTreeReader::IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, const MarkRanges & all_mark_ranges_, @@ -35,7 +35,7 @@ IMergeTreeReader::IMergeTreeReader( , uncompressed_cache(uncompressed_cache_) , mark_cache(mark_cache_) , settings(settings_) - , metadata_snapshot(metadata_snapshot_) + , storage_snapshot(storage_snapshot_) , all_mark_ranges(all_mark_ranges_) , alter_conversions(data_part_info_for_read->getAlterConversions()) /// For wide parts convert plain arrays of Nested to subcolumns @@ -71,7 +71,7 @@ void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_e res_columns, num_rows, Nested::convertToSubcolumns(requested_columns), Nested::convertToSubcolumns(available_columns), - partially_read_columns, metadata_snapshot); + partially_read_columns, storage_snapshot->metadata); should_evaluate_missing_defaults = std::any_of( res_columns.begin(), res_columns.end(), [](const auto & column) { return column == nullptr; }); @@ -110,7 +110,10 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns } auto dag = DB::evaluateMissingDefaults( - additional_columns, requested_columns, metadata_snapshot->getColumns(), data_part_info_for_read->getContext()); + additional_columns, requested_columns, + storage_snapshot->metadata->getColumns(), + data_part_info_for_read->getContext()); + if (dag) { dag->addMaterializingOutputActions(); @@ -216,7 +219,7 @@ void IMergeTreeReader::performRequiredConversions(Columns & res_columns) const } } -IMergeTreeReader::ColumnPositionLevel IMergeTreeReader::findColumnForOffsets(const NameAndTypePair & required_column) const +IMergeTreeReader::ColumnNameLevel IMergeTreeReader::findColumnForOffsets(const NameAndTypePair & required_column) const { auto get_offsets_streams = [](const auto & serialization, const auto & name_in_storage) { @@ -238,11 +241,11 @@ IMergeTreeReader::ColumnPositionLevel IMergeTreeReader::findColumnForOffsets(con auto required_offsets_streams = get_offsets_streams(getSerializationInPart(required_column), required_name_in_storage); size_t max_matched_streams = 0; - ColumnPositionLevel position_level; + ColumnNameLevel name_level; /// Find column that has maximal number of matching /// offsets columns with required_column. - for (const auto & part_column : data_part_info_for_read->getColumns()) + for (const auto & part_column : Nested::convertToSubcolumns(data_part_info_for_read->getColumns())) { auto name_in_storage = Nested::extractTableName(part_column.name); if (name_in_storage != required_name_in_storage) @@ -261,14 +264,14 @@ IMergeTreeReader::ColumnPositionLevel IMergeTreeReader::findColumnForOffsets(con it = current_it; } - if (i && (!position_level || i > max_matched_streams)) + if (i && (!name_level || i > max_matched_streams)) { max_matched_streams = i; - position_level.emplace(*data_part_info_for_read->getColumnPosition(part_column.name), it->second); + name_level.emplace(part_column.name, it->second); } } - return position_level; + return name_level; } void IMergeTreeReader::checkNumberOfColumns(size_t num_columns_to_read) const diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index fcab35fb4c2..a7e60254217 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include @@ -24,7 +23,7 @@ public: IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, const MarkRanges & all_mark_ranges_, @@ -92,22 +91,23 @@ protected: MergeTreeReaderSettings settings; - StorageMetadataPtr metadata_snapshot; + StorageSnapshotPtr storage_snapshot; MarkRanges all_mark_ranges; /// Position and level (of nesting). - using ColumnPositionLevel = std::optional>; + using ColumnNameLevel = std::optional>; + /// In case of part of the nested column does not exists, offsets should be /// read, but only the offsets for the current column, that is why it /// returns pair of size_t, not just one. - ColumnPositionLevel findColumnForOffsets(const NameAndTypePair & column) const; + ColumnNameLevel findColumnForOffsets(const NameAndTypePair & column) const; NameSet partially_read_columns; -private: /// Alter conversions, which must be applied on fly if required AlterConversionsPtr alter_conversions; +private: /// Columns that are requested to read. NamesAndTypesList requested_columns; diff --git a/src/Storages/MergeTree/IPartMetadataManager.h b/src/Storages/MergeTree/IPartMetadataManager.h index d9e97d91518..cef1d10e4ad 100644 --- a/src/Storages/MergeTree/IPartMetadataManager.h +++ b/src/Storages/MergeTree/IPartMetadataManager.h @@ -20,7 +20,6 @@ using DiskPtr = std::shared_ptr; /// - PartMetadataManagerOrdinary: manage metadata from disk directly. deleteAll/assertAllDeleted/updateAll/check /// are all empty implementations because they are not needed for PartMetadataManagerOrdinary(those operations /// are done implicitly when removing or renaming part directory). -/// - PartMetadataManagerWithCache: manage metadata from RocksDB cache and disk. class IPartMetadataManager { public: diff --git a/src/Storages/MergeTree/InsertBlockInfo.cpp b/src/Storages/MergeTree/InsertBlockInfo.cpp new file mode 100644 index 00000000000..ac900f8cf09 --- /dev/null +++ b/src/Storages/MergeTree/InsertBlockInfo.cpp @@ -0,0 +1,150 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +AsyncInsertBlockInfo::AsyncInsertBlockInfo( + Poco::Logger * log_, + std::vector && block_id_, + BlockWithPartition && block_, + std::optional && unmerged_block_with_partition_) + : log(log_) + , block_id(std::move(block_id_)) + , block_with_partition(std::move(block_)) + , unmerged_block_with_partition(std::move(unmerged_block_with_partition_)) +{ + initBlockIDMap(); +} + +void AsyncInsertBlockInfo::initBlockIDMap() +{ + block_id_to_offset_idx.clear(); + for (size_t i = 0; i < block_id.size(); ++i) + { + block_id_to_offset_idx[block_id[i]].push_back(i); + } +} + +/// this function check if the block contains duplicate inserts. +/// if so, we keep only one insert for every duplicate ones. +bool AsyncInsertBlockInfo::filterSelfDuplicate() +{ + std::vector dup_block_ids; + for (const auto & [hash_id, offset_indexes] : block_id_to_offset_idx) + { + /// It means more than one inserts have the same hash id, in this case, we should keep only one of them. + if (offset_indexes.size() > 1) + dup_block_ids.push_back(hash_id); + } + if (dup_block_ids.empty()) + return false; + + filterBlockDuplicate(dup_block_ids, true); + return true; +} + +/// remove the conflict parts of block for rewriting again. +void AsyncInsertBlockInfo::filterBlockDuplicate(const std::vector & block_paths, bool self_dedup) +{ + auto * current_block_with_partition = unmerged_block_with_partition.has_value() ? &unmerged_block_with_partition.value() : &block_with_partition; + std::vector offset_idx; + for (const auto & raw_path : block_paths) + { + std::filesystem::path p(raw_path); + String conflict_block_id = p.filename(); + auto it = block_id_to_offset_idx.find(conflict_block_id); + if (it == block_id_to_offset_idx.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown conflict path {}", conflict_block_id); + /// if this filter is for self_dedup, that means the block paths is selected by `filterSelfDuplicate`, which is a self purge. + /// in this case, we don't know if zk has this insert, then we should keep one insert, to avoid missing this insert. + offset_idx.insert(std::end(offset_idx), std::begin(it->second) + self_dedup, std::end(it->second)); + } + std::sort(offset_idx.begin(), offset_idx.end()); + + auto & offsets = current_block_with_partition->offsets; + size_t idx = 0, remove_count = 0; + auto it = offset_idx.begin(); + std::vector new_offsets; + std::vector new_block_ids; + + /// construct filter + size_t rows = current_block_with_partition->block.rows(); + auto filter_col = ColumnUInt8::create(rows, 1u); + ColumnUInt8::Container & vec = filter_col->getData(); + UInt8 * pos = vec.data(); + for (auto & offset : offsets) + { + if (it != offset_idx.end() && *it == idx) + { + size_t start_pos = idx > 0 ? offsets[idx - 1] : 0; + size_t end_pos = offset; + remove_count += end_pos - start_pos; + while (start_pos < end_pos) + { + *(pos + start_pos) = 0; + start_pos++; + } + it++; + } + else + { + new_offsets.push_back(offset - remove_count); + new_block_ids.push_back(block_id[idx]); + } + idx++; + } + + LOG_TRACE(log, "New block IDs: {}, new offsets: {}, size: {}", toString(new_block_ids), toString(new_offsets), new_offsets.size()); + + current_block_with_partition->offsets = std::move(new_offsets); + block_id = std::move(new_block_ids); + auto cols = current_block_with_partition->block.getColumns(); + for (auto & col : cols) + { + col = col->filter(vec, rows - remove_count); + } + current_block_with_partition->block.setColumns(cols); + + LOG_TRACE(log, "New block rows {}", current_block_with_partition->block.rows()); + + initBlockIDMap(); + + if (unmerged_block_with_partition.has_value()) + block_with_partition.block = unmerged_block_with_partition->block; +} + +std::vector AsyncInsertBlockInfo::getHashesForBlocks(BlockWithPartition & block, String partition_id) +{ + size_t start = 0; + auto cols = block.block.getColumns(); + std::vector block_id_vec; + for (size_t i = 0; i < block.offsets.size(); ++i) + { + size_t offset = block.offsets[i]; + std::string_view token = block.tokens[i]; + if (token.empty()) + { + SipHash hash; + for (size_t j = start; j < offset; ++j) + { + for (const auto & col : cols) + col->updateHashWithValue(j, hash); + } + + const auto hash_value = hash.get128(); + block_id_vec.push_back(partition_id + "_" + DB::toString(hash_value.items[0]) + "_" + DB::toString(hash_value.items[1])); + } + else + block_id_vec.push_back(partition_id + "_" + std::string(token)); + + start = offset; + } + return block_id_vec; +} + +} diff --git a/src/Storages/MergeTree/InsertBlockInfo.h b/src/Storages/MergeTree/InsertBlockInfo.h new file mode 100644 index 00000000000..3882373c0fa --- /dev/null +++ b/src/Storages/MergeTree/InsertBlockInfo.h @@ -0,0 +1,55 @@ +#pragma once + +#include + +namespace DB +{ + +struct SyncInsertBlockInfo +{ + SyncInsertBlockInfo( + Poco::Logger * /*log_*/, + std::string && block_id_, + BlockWithPartition && /*block_*/, + std::optional && /*unmerged_block_with_partition_*/) + : block_id(std::move(block_id_)) + { + } + + explicit SyncInsertBlockInfo(std::string block_id_) + : block_id(std::move(block_id_)) + {} + + std::string block_id; +}; + +struct AsyncInsertBlockInfo +{ + Poco::Logger * log; + std::vector block_id; + BlockWithPartition block_with_partition; + /// Some merging algorithms can mofidy the block which loses the information about the async insert offsets + /// when preprocessing or filtering data for asnyc inserts deduplication we want to use the initial, unmerged block + std::optional unmerged_block_with_partition; + std::unordered_map> block_id_to_offset_idx; + + AsyncInsertBlockInfo( + Poco::Logger * log_, + std::vector && block_id_, + BlockWithPartition && block_, + std::optional && unmerged_block_with_partition_); + + void initBlockIDMap(); + + /// this function check if the block contains duplicate inserts. + /// if so, we keep only one insert for every duplicate ones. + bool filterSelfDuplicate(); + + /// remove the conflict parts of block for rewriting again. + void filterBlockDuplicate(const std::vector & block_paths, bool self_dedup); + /// Convert block id vector to string. Output at most 50 ids. + + static std::vector getHashesForBlocks(BlockWithPartition & block, String partition_id); +}; + +} diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 3f02a6b197e..42731bac19b 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -764,7 +764,9 @@ KeyCondition::KeyCondition( ++key_index; } - auto filter_node = buildFilterNode(query, additional_filter_asts); + ASTPtr filter_node; + if (query) + filter_node = buildFilterNode(query, additional_filter_asts); if (!filter_node) { diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index f694ecab8e3..2e48892563b 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -36,7 +36,7 @@ void checkNoOldLeaders(Poco::Logger * log, ZooKeeper & zookeeper, const String p if (code == Coordination::Error::ZNONODE) return; else if (code != Coordination::Error::ZOK) - throw KeeperException(code, path); + throw KeeperException::fromPath(code, path); Coordination::Requests ops; diff --git a/src/Storages/MergeTree/MarkRange.cpp b/src/Storages/MergeTree/MarkRange.cpp index c6c197919f4..bd8546f04cc 100644 --- a/src/Storages/MergeTree/MarkRange.cpp +++ b/src/Storages/MergeTree/MarkRange.cpp @@ -83,12 +83,12 @@ size_t MarkRanges::getNumberOfMarks() const void MarkRanges::serialize(WriteBuffer & out) const { - writeIntBinary(this->size(), out); + writeBinaryLittleEndian(this->size(), out); for (const auto & [begin, end] : *this) { - writeIntBinary(begin, out); - writeIntBinary(end, out); + writeBinaryLittleEndian(begin, out); + writeBinaryLittleEndian(end, out); } } @@ -100,13 +100,13 @@ String MarkRanges::describe() const void MarkRanges::deserialize(ReadBuffer & in) { size_t size = 0; - readIntBinary(size, in); + readBinaryLittleEndian(size, in); this->resize(size); for (size_t i = 0; i < size; ++i) { - readIntBinary((*this)[i].begin, in); - readIntBinary((*this)[i].end, in); + readBinaryLittleEndian((*this)[i].begin, in); + readBinaryLittleEndian((*this)[i].end, in); } } diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index 1cd2d11da50..8fbb163384e 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -1,20 +1,16 @@ +#include #include #include -#include +#include #include #include #include -#include namespace DB { - -MergeListElement::MergeListElement( - const StorageID & table_id_, - FutureMergedMutatedPartPtr future_part, - const ContextPtr & context) +MergeListElement::MergeListElement(const StorageID & table_id_, FutureMergedMutatedPartPtr future_part, const ContextPtr & context) : table_id{table_id_} , partition_id{future_part->part_info.partition_id} , result_part_name{future_part->name} @@ -40,6 +36,10 @@ MergeListElement::MergeListElement( { source_data_version = future_part->parts[0]->info.getDataVersion(); is_mutation = (result_part_info.getDataVersion() != source_data_version); + + WriteBufferFromString out(partition); + const auto & part = future_part->parts[0]; + part->partition.serializeText(part->storage, out, {}); } thread_group = ThreadGroup::createForBackgroundProcess(context); @@ -53,6 +53,7 @@ MergeInfo MergeListElement::getInfo() const res.result_part_name = result_part_name; res.result_part_path = result_part_path; res.partition_id = partition_id; + res.partition = partition; res.is_mutation = is_mutation; res.elapsed = watch.elapsedSeconds(); res.progress = progress.load(std::memory_order_relaxed); diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h index ffa87e75505..d40af6abf43 100644 --- a/src/Storages/MergeTree/MergeList.h +++ b/src/Storages/MergeTree/MergeList.h @@ -35,6 +35,7 @@ struct MergeInfo Array source_part_names; Array source_part_paths; std::string partition_id; + std::string partition; bool is_mutation; Float64 elapsed; Float64 progress; @@ -67,6 +68,7 @@ struct MergeListElement : boost::noncopyable { const StorageID table_id; std::string partition_id; + std::string partition; const std::string result_part_name; const std::string result_part_path; diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index 3f5753a0c95..c218acce903 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -40,7 +40,6 @@ bool MergePlainMergeTreeTask::executeStep() if (merge_list_entry) { switcher.emplace((*merge_list_entry)->thread_group); - } switch (state) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 8f39c31eae0..df607d36402 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -487,6 +487,7 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const size_t sum_input_rows_exact = global_ctx->merge_list_element_ptr->rows_read; size_t input_rows_filtered = *global_ctx->input_rows_filtered; + size_t cleanedup_rows_count = global_ctx->cleanedup_rows_count; global_ctx->merge_list_element_ptr->columns_written = global_ctx->merging_column_names.size(); global_ctx->merge_list_element_ptr->progress.store(ctx->column_sizes->keyColumnsWeight(), std::memory_order_relaxed); @@ -499,12 +500,13 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const /// In special case, when there is only one source part, and no rows were skipped, we may have /// skipped writing rows_sources file. Otherwise rows_sources_count must be equal to the total /// number of input rows. - if ((rows_sources_count > 0 || global_ctx->future_part->parts.size() > 1) && sum_input_rows_exact != rows_sources_count + input_rows_filtered) + if ((rows_sources_count > 0 || global_ctx->future_part->parts.size() > 1) + && sum_input_rows_exact != rows_sources_count + input_rows_filtered + cleanedup_rows_count) throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Number of rows in source parts ({}) excluding filtered rows ({}) differs from number " - "of bytes written to rows_sources file ({}). It is a bug.", - sum_input_rows_exact, input_rows_filtered, rows_sources_count); + ErrorCodes::LOGICAL_ERROR, + "Number of rows in source parts ({}) excluding filtered rows ({}) and cleaned up rows ({}) differs from number " + "of bytes written to rows_sources file ({}). It is a bug.", + sum_input_rows_exact, input_rows_filtered, cleanedup_rows_count, rows_sources_count); ctx->rows_sources_read_buf = std::make_unique(ctx->tmp_disk->readFile(fileName(ctx->rows_sources_file->path()))); @@ -975,7 +977,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() merged_transform = std::make_shared( header, pipes.size(), sort_description, ctx->merging_params.is_deleted_column, ctx->merging_params.version_column, merge_block_size_rows, merge_block_size_bytes, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size, - (data_settings->clean_deleted_rows != CleanDeletedRows::Never) || global_ctx->cleanup); + (data_settings->clean_deleted_rows != CleanDeletedRows::Never) || global_ctx->cleanup, &global_ctx->cleanedup_rows_count); break; case MergeTreeData::MergingParams::Graphite: diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 9b777345c1d..402d3c26e49 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -145,6 +145,7 @@ private: bool deduplicate{false}; Names deduplicate_by_columns{}; bool cleanup{false}; + size_t cleanedup_rows_count{0}; NamesAndTypesList gathering_columns{}; NamesAndTypesList merging_columns{}; diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp deleted file mode 100644 index 48adf36e678..00000000000 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ /dev/null @@ -1,706 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace ProfileEvents -{ - extern const Event WaitPrefetchTaskMicroseconds; -}; - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; - extern const int LOGICAL_ERROR; - extern const int QUERY_WAS_CANCELLED; -} - -static void injectNonConstVirtualColumns( - size_t rows, - Block & block, - const Names & virtual_columns); - -static void injectPartConstVirtualColumns( - size_t rows, - Block & block, - MergeTreeReadTask * task, - const DataTypePtr & partition_value_type, - const Names & virtual_columns); - - -IMergeTreeSelectAlgorithm::IMergeTreeSelectAlgorithm( - Block header, - const MergeTreeData & storage_, - const StorageSnapshotPtr & storage_snapshot_, - const PrewhereInfoPtr & prewhere_info_, - const ExpressionActionsSettings & actions_settings_, - UInt64 max_block_size_rows_, - UInt64 preferred_block_size_bytes_, - UInt64 preferred_max_column_in_block_size_bytes_, - const MergeTreeReaderSettings & reader_settings_, - bool use_uncompressed_cache_, - const Names & virt_column_names_) - : storage(storage_) - , storage_snapshot(storage_snapshot_) - , prewhere_info(prewhere_info_) - , actions_settings(actions_settings_) - , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps)) - , max_block_size_rows(max_block_size_rows_) - , preferred_block_size_bytes(preferred_block_size_bytes_) - , preferred_max_column_in_block_size_bytes(preferred_max_column_in_block_size_bytes_) - , reader_settings(reader_settings_) - , use_uncompressed_cache(use_uncompressed_cache_) - , virt_column_names(virt_column_names_) - , partition_value_type(storage.getPartitionValueType()) - , owned_uncompressed_cache(use_uncompressed_cache ? storage.getContext()->getUncompressedCache() : nullptr) - , owned_mark_cache(storage.getContext()->getMarkCache()) -{ - header_without_const_virtual_columns = applyPrewhereActions(std::move(header), prewhere_info); - size_t non_const_columns_offset = header_without_const_virtual_columns.columns(); - injectNonConstVirtualColumns(0, header_without_const_virtual_columns, virt_column_names); - - for (size_t col_num = non_const_columns_offset; col_num < header_without_const_virtual_columns.columns(); ++col_num) - non_const_virtual_column_names.emplace_back(header_without_const_virtual_columns.getByPosition(col_num).name); - - result_header = header_without_const_virtual_columns; - injectPartConstVirtualColumns(0, result_header, nullptr, partition_value_type, virt_column_names); - - if (!prewhere_actions.steps.empty()) - LOG_TRACE(log, "PREWHERE condition was split into {} steps: {}", prewhere_actions.steps.size(), prewhere_actions.dumpConditions()); - - if (prewhere_info) - LOG_TEST(log, "Original PREWHERE DAG:\n{}\nPREWHERE actions:\n{}", - (prewhere_info->prewhere_actions ? prewhere_info->prewhere_actions->dumpDAG(): std::string("")), - (!prewhere_actions.steps.empty() ? prewhere_actions.dump() : std::string(""))); -} - -bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, PrewhereExprInfo & prewhere); - -PrewhereExprInfo IMergeTreeSelectAlgorithm::getPrewhereActions(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, bool enable_multiple_prewhere_read_steps) -{ - PrewhereExprInfo prewhere_actions; - if (prewhere_info) - { - if (prewhere_info->row_level_filter) - { - PrewhereExprStep row_level_filter_step - { - .type = PrewhereExprStep::Filter, - .actions = std::make_shared(prewhere_info->row_level_filter, actions_settings), - .filter_column_name = prewhere_info->row_level_column_name, - .remove_filter_column = true, - .need_filter = true, - .perform_alter_conversions = true, - }; - - prewhere_actions.steps.emplace_back(std::make_shared(std::move(row_level_filter_step))); - } - - if (!enable_multiple_prewhere_read_steps || - !tryBuildPrewhereSteps(prewhere_info, actions_settings, prewhere_actions)) - { - PrewhereExprStep prewhere_step - { - .type = PrewhereExprStep::Filter, - .actions = std::make_shared(prewhere_info->prewhere_actions, actions_settings), - .filter_column_name = prewhere_info->prewhere_column_name, - .remove_filter_column = prewhere_info->remove_prewhere_column, - .need_filter = prewhere_info->need_filter, - .perform_alter_conversions = true, - }; - - prewhere_actions.steps.emplace_back(std::make_shared(std::move(prewhere_step))); - } - } - - return prewhere_actions; -} - - -bool IMergeTreeSelectAlgorithm::getNewTask() -{ - if (getNewTaskImpl()) - { - finalizeNewTask(); - return true; - } - return false; -} - - -ChunkAndProgress IMergeTreeSelectAlgorithm::read() -{ - size_t num_read_rows = 0; - size_t num_read_bytes = 0; - - while (!is_cancelled) - { - try - { - if ((!task || task->isFinished()) && !getNewTask()) - break; - } - catch (const Exception & e) - { - /// See MergeTreeBaseSelectProcessor::getTaskFromBuffer() - if (e.code() == ErrorCodes::QUERY_WAS_CANCELLED) - break; - throw; - } - - auto res = readFromPart(); - - if (res.row_count) - { - injectVirtualColumns(res.block, res.row_count, task.get(), partition_value_type, virt_column_names); - - /// Reorder the columns according to result_header - Columns ordered_columns; - ordered_columns.reserve(result_header.columns()); - for (size_t i = 0; i < result_header.columns(); ++i) - { - auto name = result_header.getByPosition(i).name; - ordered_columns.push_back(res.block.getByName(name).column); - } - - /// Account a progress from previous empty chunks. - res.num_read_rows += num_read_rows; - res.num_read_bytes += num_read_bytes; - - return ChunkAndProgress{ - .chunk = Chunk(ordered_columns, res.row_count), - .num_read_rows = res.num_read_rows, - .num_read_bytes = res.num_read_bytes, - .is_finished = false}; - } - else - { - return {Chunk(), res.num_read_rows, res.num_read_bytes, false}; - } - } - - return {Chunk(), num_read_rows, num_read_bytes, true}; -} - -void IMergeTreeSelectAlgorithm::initializeMergeTreeReadersForCurrentTask( - const StorageMetadataPtr & metadata_snapshot, - const IMergeTreeReader::ValueSizeMap & value_size_map, - const ReadBufferFromFileBase::ProfileCallback & profile_callback) -{ - if (!task) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no task"); - - if (task->reader.valid()) - { - ProfileEventTimeIncrement watch(ProfileEvents::WaitPrefetchTaskMicroseconds); - reader = task->reader.get(); - } - else - { - reader = task->data_part->getReader( - task->task_columns.columns, metadata_snapshot, task->mark_ranges, - owned_uncompressed_cache.get(), owned_mark_cache.get(), - task->alter_conversions, reader_settings, value_size_map, profile_callback); - } - - if (!task->pre_reader_for_step.empty()) - { - ProfileEventTimeIncrement watch(ProfileEvents::WaitPrefetchTaskMicroseconds); - pre_reader_for_step.clear(); - for (auto & pre_reader : task->pre_reader_for_step) - pre_reader_for_step.push_back(pre_reader.get()); - } - else - { - initializeMergeTreePreReadersForPart( - task->data_part, task->alter_conversions, - task->task_columns, metadata_snapshot, - task->mark_ranges, value_size_map, profile_callback); - } -} - -void IMergeTreeSelectAlgorithm::initializeMergeTreeReadersForPart( - const MergeTreeData::DataPartPtr & data_part, - const AlterConversionsPtr & alter_conversions, - const MergeTreeReadTaskColumns & task_columns, - const StorageMetadataPtr & metadata_snapshot, - const MarkRanges & mark_ranges, - const IMergeTreeReader::ValueSizeMap & value_size_map, - const ReadBufferFromFileBase::ProfileCallback & profile_callback) -{ - reader = data_part->getReader( - task_columns.columns, metadata_snapshot, mark_ranges, - owned_uncompressed_cache.get(), owned_mark_cache.get(), - alter_conversions, reader_settings, value_size_map, profile_callback); - - initializeMergeTreePreReadersForPart( - data_part, alter_conversions, task_columns, metadata_snapshot, - mark_ranges, value_size_map, profile_callback); -} - -void IMergeTreeSelectAlgorithm::initializeMergeTreePreReadersForPart( - const MergeTreeData::DataPartPtr & data_part, - const AlterConversionsPtr & alter_conversions, - const MergeTreeReadTaskColumns & task_columns, - const StorageMetadataPtr & metadata_snapshot, - const MarkRanges & mark_ranges, - const IMergeTreeReader::ValueSizeMap & value_size_map, - const ReadBufferFromFileBase::ProfileCallback & profile_callback) -{ - pre_reader_for_step.clear(); - - /// Add lightweight delete filtering step - if (reader_settings.apply_deleted_mask && data_part->hasLightweightDelete()) - { - pre_reader_for_step.push_back( - data_part->getReader( - {LightweightDeleteDescription::FILTER_COLUMN}, metadata_snapshot, - mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), - alter_conversions, reader_settings, value_size_map, profile_callback)); - } - - for (const auto & pre_columns_per_step : task_columns.pre_columns) - { - pre_reader_for_step.push_back( - data_part->getReader( - pre_columns_per_step, metadata_snapshot, mark_ranges, - owned_uncompressed_cache.get(), owned_mark_cache.get(), - alter_conversions, reader_settings, value_size_map, profile_callback)); - } -} - -void IMergeTreeSelectAlgorithm::initializeRangeReaders(MergeTreeReadTask & current_task) -{ - return initializeRangeReadersImpl( - current_task.range_reader, current_task.pre_range_readers, prewhere_actions, - reader.get(), current_task.data_part->hasLightweightDelete(), reader_settings, - pre_reader_for_step, lightweight_delete_filter_step, non_const_virtual_column_names); -} - -void IMergeTreeSelectAlgorithm::initializeRangeReadersImpl( - MergeTreeRangeReader & range_reader, - std::deque & pre_range_readers, - const PrewhereExprInfo & prewhere_actions, - IMergeTreeReader * reader, - bool has_lightweight_delete, - const MergeTreeReaderSettings & reader_settings, - const std::vector> & pre_reader_for_step, - const PrewhereExprStep & lightweight_delete_filter_step, - const Names & non_const_virtual_column_names) -{ - MergeTreeRangeReader * prev_reader = nullptr; - bool last_reader = false; - size_t pre_readers_shift = 0; - - /// Add filtering step with lightweight delete mask - if (reader_settings.apply_deleted_mask && has_lightweight_delete) - { - MergeTreeRangeReader pre_range_reader(pre_reader_for_step[0].get(), prev_reader, &lightweight_delete_filter_step, last_reader, non_const_virtual_column_names); - pre_range_readers.push_back(std::move(pre_range_reader)); - prev_reader = &pre_range_readers.back(); - pre_readers_shift++; - } - - if (prewhere_actions.steps.size() + pre_readers_shift != pre_reader_for_step.size()) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "PREWHERE steps count mismatch, actions: {}, readers: {}", - prewhere_actions.steps.size(), pre_reader_for_step.size()); - } - - for (size_t i = 0; i < prewhere_actions.steps.size(); ++i) - { - last_reader = reader->getColumns().empty() && (i + 1 == prewhere_actions.steps.size()); - - MergeTreeRangeReader current_reader( - pre_reader_for_step[i + pre_readers_shift].get(), - prev_reader, prewhere_actions.steps[i].get(), - last_reader, non_const_virtual_column_names); - - pre_range_readers.push_back(std::move(current_reader)); - prev_reader = &pre_range_readers.back(); - } - - if (!last_reader) - { - range_reader = MergeTreeRangeReader(reader, prev_reader, nullptr, true, non_const_virtual_column_names); - } - else - { - /// If all columns are read by pre_range_readers than move last pre_range_reader into range_reader - range_reader = std::move(pre_range_readers.back()); - pre_range_readers.pop_back(); - } -} - -static UInt64 estimateNumRows(const MergeTreeReadTask & current_task, UInt64 current_preferred_block_size_bytes, - UInt64 current_max_block_size_rows, UInt64 current_preferred_max_column_in_block_size_bytes, double min_filtration_ratio, size_t min_marks_to_read) -{ - const MergeTreeRangeReader & current_reader = current_task.range_reader; - - if (!current_task.size_predictor) - return static_cast(current_max_block_size_rows); - - /// Calculates number of rows will be read using preferred_block_size_bytes. - /// Can't be less than avg_index_granularity. - size_t rows_to_read = current_task.size_predictor->estimateNumRows(current_preferred_block_size_bytes); - if (!rows_to_read) - return rows_to_read; - auto total_row_in_current_granule = current_reader.numRowsInCurrentGranule(); - rows_to_read = std::max(total_row_in_current_granule, rows_to_read); - - if (current_preferred_max_column_in_block_size_bytes) - { - /// Calculates number of rows will be read using preferred_max_column_in_block_size_bytes. - auto rows_to_read_for_max_size_column - = current_task.size_predictor->estimateNumRowsForMaxSizeColumn(current_preferred_max_column_in_block_size_bytes); - double filtration_ratio = std::max(min_filtration_ratio, 1.0 - current_task.size_predictor->filtered_rows_ratio); - auto rows_to_read_for_max_size_column_with_filtration - = static_cast(rows_to_read_for_max_size_column / filtration_ratio); - - /// If preferred_max_column_in_block_size_bytes is used, number of rows to read can be less than current_index_granularity. - rows_to_read = std::min(rows_to_read, rows_to_read_for_max_size_column_with_filtration); - } - - auto unread_rows_in_current_granule = current_reader.numPendingRowsInCurrentGranule(); - if (unread_rows_in_current_granule >= rows_to_read) - return rows_to_read; - - const MergeTreeIndexGranularity & index_granularity = current_task.data_part->index_granularity; - - return index_granularity.countMarksForRows(current_reader.currentMark(), rows_to_read, current_reader.numReadRowsInCurrentGranule(), min_marks_to_read); -} - - -IMergeTreeSelectAlgorithm::BlockAndProgress IMergeTreeSelectAlgorithm::readFromPartImpl() -{ - if (task->size_predictor) - task->size_predictor->startBlock(); - - const UInt64 current_max_block_size_rows = max_block_size_rows; - const UInt64 current_preferred_block_size_bytes = preferred_block_size_bytes; - const UInt64 current_preferred_max_column_in_block_size_bytes = preferred_max_column_in_block_size_bytes; - const double min_filtration_ratio = 0.00001; - - UInt64 recommended_rows = estimateNumRows(*task, current_preferred_block_size_bytes, - current_max_block_size_rows, current_preferred_max_column_in_block_size_bytes, min_filtration_ratio, min_marks_to_read); - UInt64 rows_to_read = std::max(static_cast(1), std::min(current_max_block_size_rows, recommended_rows)); - - auto read_result = task->range_reader.read(rows_to_read, task->mark_ranges); - - /// All rows were filtered. Repeat. - if (read_result.num_rows == 0) - read_result.columns.clear(); - - const auto & sample_block = task->range_reader.getSampleBlock(); - if (read_result.num_rows != 0 && sample_block.columns() != read_result.columns.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent number of columns got from MergeTreeRangeReader. " - "Have {} in sample block and {} columns in list", - toString(sample_block.columns()), toString(read_result.columns.size())); - - /// TODO: check columns have the same types as in header. - - UInt64 num_filtered_rows = read_result.numReadRows() - read_result.num_rows; - - size_t num_read_rows = read_result.numReadRows(); - size_t num_read_bytes = read_result.numBytesRead(); - - if (task->size_predictor) - { - task->size_predictor->updateFilteredRowsRation(read_result.numReadRows(), num_filtered_rows); - - if (!read_result.columns.empty()) - task->size_predictor->update(sample_block, read_result.columns, read_result.num_rows); - } - - Block block; - if (read_result.num_rows != 0) - block = sample_block.cloneWithColumns(read_result.columns); - - BlockAndProgress res = { - .block = std::move(block), - .row_count = read_result.num_rows, - .num_read_rows = num_read_rows, - .num_read_bytes = num_read_bytes }; - - return res; -} - - -IMergeTreeSelectAlgorithm::BlockAndProgress IMergeTreeSelectAlgorithm::readFromPart() -{ - if (!task->range_reader.isInitialized()) - initializeRangeReaders(*task); - - return readFromPartImpl(); -} - - -namespace -{ - struct VirtualColumnsInserter - { - explicit VirtualColumnsInserter(Block & block_) : block(block_) {} - - bool columnExists(const String & name) const { return block.has(name); } - - void insertUInt8Column(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(), name}); - } - - void insertUInt64Column(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(), name}); - } - - void insertUUIDColumn(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(), name}); - } - - void insertLowCardinalityColumn(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(std::make_shared()), name}); - } - - void insertPartitionValueColumn( - size_t rows, const Row & partition_value, const DataTypePtr & partition_value_type, const String & name) - { - ColumnPtr column; - if (rows) - column = partition_value_type->createColumnConst(rows, Tuple(partition_value.begin(), partition_value.end())) - ->convertToFullColumnIfConst(); - else - column = partition_value_type->createColumn(); - - block.insert({column, partition_value_type, name}); - } - - Block & block; - }; -} - -/// Adds virtual columns that are not const for all rows -static void injectNonConstVirtualColumns( - size_t rows, - Block & block, - const Names & virtual_columns) -{ - VirtualColumnsInserter inserter(block); - for (const auto & virtual_column_name : virtual_columns) - { - if (virtual_column_name == "_part_offset") - { - if (!rows) - { - inserter.insertUInt64Column(DataTypeUInt64().createColumn(), virtual_column_name); - } - else - { - if (!inserter.columnExists(virtual_column_name)) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Column {} must have been filled part reader", - virtual_column_name); - } - } - - if (virtual_column_name == LightweightDeleteDescription::FILTER_COLUMN.name) - { - /// If _row_exists column isn't present in the part then fill it here with 1s - ColumnPtr column; - if (rows) - column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumnConst(rows, 1)->convertToFullColumnIfConst(); - else - column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumn(); - - inserter.insertUInt8Column(column, virtual_column_name); - } - } -} - -/// Adds virtual columns that are const for the whole part -static void injectPartConstVirtualColumns( - size_t rows, - Block & block, - MergeTreeReadTask * task, - const DataTypePtr & partition_value_type, - const Names & virtual_columns) -{ - VirtualColumnsInserter inserter(block); - /// add virtual columns - /// Except _sample_factor, which is added from the outside. - if (!virtual_columns.empty()) - { - if (unlikely(rows && !task)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insert virtual columns to non-empty chunk without specified task."); - - const IMergeTreeDataPart * part = nullptr; - if (rows) - { - part = task->data_part.get(); - if (part->isProjectionPart()) - part = part->getParentPart(); - } - for (const auto & virtual_column_name : virtual_columns) - { - if (virtual_column_name == "_part") - { - ColumnPtr column; - if (rows) - column = DataTypeLowCardinality{std::make_shared()} - .createColumnConst(rows, part->name) - ->convertToFullColumnIfConst(); - else - column = DataTypeLowCardinality{std::make_shared()}.createColumn(); - - inserter.insertLowCardinalityColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_part_index") - { - ColumnPtr column; - if (rows) - column = DataTypeUInt64().createColumnConst(rows, task->part_index_in_query)->convertToFullColumnIfConst(); - else - column = DataTypeUInt64().createColumn(); - - inserter.insertUInt64Column(column, virtual_column_name); - } - else if (virtual_column_name == "_part_uuid") - { - ColumnPtr column; - if (rows) - column = DataTypeUUID().createColumnConst(rows, part->uuid)->convertToFullColumnIfConst(); - else - column = DataTypeUUID().createColumn(); - - inserter.insertUUIDColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_partition_id") - { - ColumnPtr column; - if (rows) - column = DataTypeLowCardinality{std::make_shared()} - .createColumnConst(rows, part->info.partition_id) - ->convertToFullColumnIfConst(); - else - column = DataTypeLowCardinality{std::make_shared()}.createColumn(); - - inserter.insertLowCardinalityColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_partition_value") - { - if (rows) - inserter.insertPartitionValueColumn(rows, part->partition.value, partition_value_type, virtual_column_name); - else - inserter.insertPartitionValueColumn(rows, {}, partition_value_type, virtual_column_name); - } - } - } -} - -void IMergeTreeSelectAlgorithm::injectVirtualColumns( - Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns) -{ - /// First add non-const columns that are filled by the range reader and then const columns that we will fill ourselves. - /// Note that the order is important: virtual columns filled by the range reader must go first - injectNonConstVirtualColumns(row_count, block, virtual_columns); - injectPartConstVirtualColumns(row_count, block, task, partition_value_type, virtual_columns); -} - -Block IMergeTreeSelectAlgorithm::applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info) -{ - if (prewhere_info) - { - if (prewhere_info->row_level_filter) - { - block = prewhere_info->row_level_filter->updateHeader(std::move(block)); - auto & row_level_column = block.getByName(prewhere_info->row_level_column_name); - if (!row_level_column.type->canBeUsedInBooleanContext()) - { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", - row_level_column.type->getName()); - } - - block.erase(prewhere_info->row_level_column_name); - } - - if (prewhere_info->prewhere_actions) - { - block = prewhere_info->prewhere_actions->updateHeader(std::move(block)); - - auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); - if (!prewhere_column.type->canBeUsedInBooleanContext()) - { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", - prewhere_column.type->getName()); - } - - if (prewhere_info->remove_prewhere_column) - { - block.erase(prewhere_info->prewhere_column_name); - } - else if (prewhere_info->need_filter) - { - WhichDataType which(removeNullable(recursiveRemoveLowCardinality(prewhere_column.type))); - - if (which.isNativeInt() || which.isNativeUInt()) - prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1u)->convertToFullColumnIfConst(); - else if (which.isFloat()) - prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1.0f)->convertToFullColumnIfConst(); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, - "Illegal type {} of column for filter", - prewhere_column.type->getName()); - } - } - } - - return block; -} - -Block IMergeTreeSelectAlgorithm::transformHeader( - Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns) -{ - auto transformed = applyPrewhereActions(std::move(block), prewhere_info); - injectVirtualColumns(transformed, 0, nullptr, partition_value_type, virtual_columns); - return transformed; -} - -std::unique_ptr IMergeTreeSelectAlgorithm::getSizePredictor( - const MergeTreeData::DataPartPtr & data_part, - const MergeTreeReadTaskColumns & task_columns, - const Block & sample_block) -{ - const auto & required_column_names = task_columns.columns.getNames(); - NameSet complete_column_names(required_column_names.begin(), required_column_names.end()); - for (const auto & pre_columns_per_step : task_columns.pre_columns) - { - const auto & required_pre_column_names = pre_columns_per_step.getNames(); - complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end()); - } - - return std::make_unique( - data_part, Names(complete_column_names.begin(), complete_column_names.end()), sample_block); -} - - -IMergeTreeSelectAlgorithm::~IMergeTreeSelectAlgorithm() = default; - -} diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h deleted file mode 100644 index 7b6dc50060a..00000000000 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ /dev/null @@ -1,220 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -class IMergeTreeReader; -class UncompressedCache; -class MarkCache; -struct PrewhereExprInfo; - -struct ChunkAndProgress -{ - Chunk chunk; - size_t num_read_rows = 0; - size_t num_read_bytes = 0; - /// Explicitly indicate that we have read all data. - /// This is needed to occasionally return empty chunk to indicate the progress while the rows are filtered out in PREWHERE. - bool is_finished = false; -}; - -struct ParallelReadingExtension -{ - MergeTreeAllRangesCallback all_callback; - MergeTreeReadTaskCallback callback; - size_t count_participating_replicas{0}; - size_t number_of_current_replica{0}; - /// This is needed to estimate the number of bytes - /// between a pair of marks to perform one request - /// over the network for a 1Gb of data. - Names columns_to_read; -}; - -/// Base class for MergeTreeThreadSelectAlgorithm and MergeTreeSelectAlgorithm -class IMergeTreeSelectAlgorithm -{ -public: - IMergeTreeSelectAlgorithm( - Block header, - const MergeTreeData & storage_, - const StorageSnapshotPtr & storage_snapshot_, - const PrewhereInfoPtr & prewhere_info_, - const ExpressionActionsSettings & actions_settings, - UInt64 max_block_size_rows_, - UInt64 preferred_block_size_bytes_, - UInt64 preferred_max_column_in_block_size_bytes_, - const MergeTreeReaderSettings & reader_settings_, - bool use_uncompressed_cache_, - const Names & virt_column_names_ = {}); - - virtual ~IMergeTreeSelectAlgorithm(); - - static Block transformHeader( - Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns); - - static std::unique_ptr getSizePredictor( - const MergeTreeData::DataPartPtr & data_part, - const MergeTreeReadTaskColumns & task_columns, - const Block & sample_block); - - Block getHeader() const { return result_header; } - - ChunkAndProgress read(); - - void cancel() { is_cancelled = true; } - - const MergeTreeReaderSettings & getSettings() const { return reader_settings; } - - virtual std::string getName() const = 0; - - static PrewhereExprInfo getPrewhereActions(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, bool enable_multiple_prewhere_read_steps); - -protected: - /// This struct allow to return block with no columns but with non-zero number of rows similar to Chunk - struct BlockAndProgress - { - Block block; - size_t row_count = 0; - size_t num_read_rows = 0; - size_t num_read_bytes = 0; - }; - - /// Creates new this->task and return a flag whether it was successful or not - virtual bool getNewTaskImpl() = 0; - /// Creates new readers for a task it is needed. These methods are separate, because - /// in case of parallel reading from replicas the whole task could be denied by a coodinator - /// or it could modified somehow. - virtual void finalizeNewTask() = 0; - - size_t estimateMaxBatchSizeForHugeRanges(); - - /// Closes readers and unlock part locks - virtual void finish() = 0; - - virtual BlockAndProgress readFromPart(); - - BlockAndProgress readFromPartImpl(); - - /// Used for filling header with no rows as well as block with data - static void - injectVirtualColumns(Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns); - -protected: - static void initializeRangeReadersImpl( - MergeTreeRangeReader & range_reader, - std::deque & pre_range_readers, - const PrewhereExprInfo & prewhere_actions, - IMergeTreeReader * reader, - bool has_lightweight_delete, - const MergeTreeReaderSettings & reader_settings, - const std::vector> & pre_reader_for_step, - const PrewhereExprStep & lightweight_delete_filter_step, - const Names & non_const_virtual_column_names); - - /// Sets up data readers for each step of prewhere and where - void initializeMergeTreeReadersForCurrentTask( - const StorageMetadataPtr & metadata_snapshot, - const IMergeTreeReader::ValueSizeMap & value_size_map, - const ReadBufferFromFileBase::ProfileCallback & profile_callback); - - void initializeMergeTreeReadersForPart( - const MergeTreeData::DataPartPtr & data_part, - const AlterConversionsPtr & alter_conversions, - const MergeTreeReadTaskColumns & task_columns, - const StorageMetadataPtr & metadata_snapshot, - const MarkRanges & mark_ranges, - const IMergeTreeReader::ValueSizeMap & value_size_map, - const ReadBufferFromFileBase::ProfileCallback & profile_callback); - - /// Sets up range readers corresponding to data readers - void initializeRangeReaders(MergeTreeReadTask & task); - - const MergeTreeData & storage; - StorageSnapshotPtr storage_snapshot; - - /// This step is added when the part has lightweight delete mask - const PrewhereExprStep lightweight_delete_filter_step - { - .type = PrewhereExprStep::Filter, - .actions = nullptr, - .filter_column_name = LightweightDeleteDescription::FILTER_COLUMN.name, - .remove_filter_column = true, - .need_filter = true, - .perform_alter_conversions = true, - }; - - PrewhereInfoPtr prewhere_info; - ExpressionActionsSettings actions_settings; - PrewhereExprInfo prewhere_actions; - - UInt64 max_block_size_rows; - UInt64 preferred_block_size_bytes; - UInt64 preferred_max_column_in_block_size_bytes; - - MergeTreeReaderSettings reader_settings; - - bool use_uncompressed_cache; - - Names virt_column_names; - - /// These columns will be filled by the merge tree range reader - Names non_const_virtual_column_names; - - DataTypePtr partition_value_type; - - /// This header is used for chunks from readFromPart(). - Block header_without_const_virtual_columns; - /// A result of getHeader(). A chunk which this header is returned from read(). - Block result_header; - - UncompressedCachePtr owned_uncompressed_cache; - MarkCachePtr owned_mark_cache; - - using MergeTreeReaderPtr = std::unique_ptr; - MergeTreeReaderPtr reader; - std::vector pre_reader_for_step; - - MergeTreeReadTaskPtr task; - - /// This setting is used in base algorithm only to additionally limit the number of granules to read. - /// It is changed in ctor of MergeTreeThreadSelectAlgorithm. - /// - /// The reason why we have it here is because MergeTreeReadPool takes the full task - /// ignoring min_marks_to_read setting in case of remote disk (see MergeTreeReadPool::getTask). - /// In this case, we won't limit the number of rows to read based on adaptive granularity settings. - /// - /// Big reading tasks are better for remote disk and prefetches. - /// So, for now it's easier to limit max_rows_to_read. - /// Somebody need to refactor this later. - size_t min_marks_to_read = 0; - -private: - Poco::Logger * log = &Poco::Logger::get("MergeTreeBaseSelectProcessor"); - - std::atomic is_cancelled{false}; - - bool getNewTask(); - - /// Initialize pre readers. - void initializeMergeTreePreReadersForPart( - const MergeTreeData::DataPartPtr & data_part, - const AlterConversionsPtr & alter_conversions, - const MergeTreeReadTaskColumns & task_columns, - const StorageMetadataPtr & metadata_snapshot, - const MarkRanges & mark_ranges, - const IMergeTreeReader::ValueSizeMap & value_size_map, - const ReadBufferFromFileBase::ProfileCallback & profile_callback); - - static Block applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info); -}; - -using MergeTreeSelectAlgorithmPtr = std::unique_ptr; - -} diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index d830ba37e71..f5f0fa6f726 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -16,6 +16,7 @@ namespace DB { + namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -136,43 +137,6 @@ NameSet injectRequiredColumns( return injected_columns; } - -MergeTreeReadTask::MergeTreeReadTask( - const DataPartPtr & data_part_, - const AlterConversionsPtr & alter_conversions_, - const MarkRanges & mark_ranges_, - size_t part_index_in_query_, - const NameSet & column_name_set_, - const MergeTreeReadTaskColumns & task_columns_, - MergeTreeBlockSizePredictorPtr size_predictor_, - Priority priority_, - std::future reader_, - std::vector> && pre_reader_for_step_) - : data_part{data_part_} - , alter_conversions{alter_conversions_} - , mark_ranges{mark_ranges_} - , part_index_in_query{part_index_in_query_} - , column_name_set{column_name_set_} - , task_columns{task_columns_} - , size_predictor{size_predictor_} - , reader(std::move(reader_)) - , pre_reader_for_step(std::move(pre_reader_for_step_)) - , priority(priority_) -{ -} - -MergeTreeReadTask::~MergeTreeReadTask() -{ - if (reader.valid()) - reader.wait(); - - for (const auto & pre_reader : pre_reader_for_step) - { - if (pre_reader.valid()) - pre_reader.wait(); - } -} - MergeTreeBlockSizePredictor::MergeTreeBlockSizePredictor( const DataPartPtr & data_part_, const Names & columns, const Block & sample_block) : data_part(data_part_) @@ -195,9 +159,8 @@ void MergeTreeBlockSizePredictor::initialize(const Block & sample_block, const C for (size_t pos = 0; pos < num_columns; ++pos) { const auto & column_with_type_and_name = sample_block.getByPosition(pos); - const String & column_name = column_with_type_and_name.name; - const ColumnPtr & column_data = from_update ? columns[pos] - : column_with_type_and_name.column; + const auto & column_name = column_with_type_and_name.name; + const auto & column_data = from_update ? columns[pos] : column_with_type_and_name.column; if (!from_update && !names_set.contains(column_name)) continue; @@ -246,7 +209,6 @@ void MergeTreeBlockSizePredictor::startBlock() info.size_bytes = 0; } - /// TODO: add last_read_row_in_part parameter to take into account gaps between adjacent ranges void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay) { @@ -296,7 +258,7 @@ void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Colum } -MergeTreeReadTaskColumns getReadTaskColumns( +MergeTreeReadTask::Columns getReadTaskColumns( const IMergeTreeDataPartInfoForReader & data_part_info_for_reader, const StorageSnapshotPtr & storage_snapshot, const Names & required_columns, @@ -317,7 +279,7 @@ MergeTreeReadTaskColumns getReadTaskColumns( injectRequiredColumns( data_part_info_for_reader, storage_snapshot, with_subcolumns, column_to_read_after_prewhere); - MergeTreeReadTaskColumns result; + MergeTreeReadTask::Columns result; auto options = GetColumnsOptions(GetColumnsOptions::All) .withExtendedObjects() .withSystemColumns(); @@ -340,8 +302,10 @@ MergeTreeReadTaskColumns getReadTaskColumns( if (!columns_from_previous_steps.contains(name)) step_column_names.push_back(name); - injectRequiredColumns( - data_part_info_for_reader, storage_snapshot, with_subcolumns, step_column_names); + if (!step_column_names.empty()) + injectRequiredColumns( + data_part_info_for_reader, storage_snapshot, + with_subcolumns, step_column_names); /// More columns could have been added, filter them as well by the list of columns from previous steps. Names columns_to_read_in_step; @@ -363,7 +327,7 @@ MergeTreeReadTaskColumns getReadTaskColumns( if (prewhere_info) { - auto prewhere_actions = IMergeTreeSelectAlgorithm::getPrewhereActions( + auto prewhere_actions = MergeTreeSelectProcessor::getPrewhereActions( prewhere_info, actions_settings, reader_settings.enable_multiple_prewhere_read_steps); @@ -385,16 +349,4 @@ MergeTreeReadTaskColumns getReadTaskColumns( return result; } - -std::string MergeTreeReadTaskColumns::dump() const -{ - WriteBufferFromOwnString s; - for (size_t i = 0; i < pre_columns.size(); ++i) - { - s << "STEP " << i << ": " << pre_columns[i].toString() << "\n"; - } - s << "COLUMNS: " << columns.toString() << "\n"; - return s.str(); -} - } diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index 5df3e600a1a..9417d47814a 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -2,29 +2,15 @@ #include #include -#include -#include -#include -#include -#include +#include namespace DB { -class MergeTreeData; -struct MergeTreeReadTask; struct MergeTreeReaderSettings; -struct MergeTreeBlockSizePredictor; class IMergeTreeDataPartInfoForReader; -using MergeTreeReadTaskPtr = std::unique_ptr; -using MergeTreeBlockSizePredictorPtr = std::shared_ptr; - -class IMergeTreeDataPart; -using DataPartPtr = std::shared_ptr; - - /** If some of the requested columns are not in the part, * then find out which columns may need to be read further, * so that you can calculate the DEFAULT expression for these columns. @@ -36,64 +22,7 @@ NameSet injectRequiredColumns( bool with_subcolumns, Names & columns); -struct MergeTreeReadTaskColumns -{ - /// column names to read during WHERE - NamesAndTypesList columns; - /// column names to read during each PREWHERE step - std::vector pre_columns; - - std::string dump() const; -}; - -/// A batch of work for MergeTreeThreadSelectProcessor -struct MergeTreeReadTask -{ - /// Data part which should be read while performing this task - DataPartPtr data_part; - /// Alter converversionss that should be applied on-fly for part. - AlterConversionsPtr alter_conversions; - /// Ranges to read from `data_part`. - MarkRanges mark_ranges; - /// for virtual `part_index` virtual column - size_t part_index_in_query; - /// used to determine whether column should be filtered during PREWHERE or WHERE - const NameSet & column_name_set; - /// column names to read during PREWHERE and WHERE - const MergeTreeReadTaskColumns & task_columns; - /// Used to satistfy preferred_block_size_bytes limitation - MergeTreeBlockSizePredictorPtr size_predictor; - /// Used to save current range processing status - MergeTreeRangeReader range_reader; - /// Range readers for multiple filtering steps: row level security, PREWHERE etc. - /// NOTE: we take references to elements and push_back new elements, that's why it is a deque but not a vector - std::deque pre_range_readers; - - using MergeTreeReaderPtr = std::unique_ptr; - std::future reader; - std::vector> pre_reader_for_step; - - Priority priority; - - bool isFinished() const { return mark_ranges.empty() && range_reader.isCurrentRangeFinished(); } - - MergeTreeReadTask( - const DataPartPtr & data_part_, - const AlterConversionsPtr & alter_conversions_, - const MarkRanges & mark_ranges_, - size_t part_index_in_query_, - const NameSet & column_name_set_, - const MergeTreeReadTaskColumns & task_columns_, - MergeTreeBlockSizePredictorPtr size_predictor_, - Priority priority_ = {}, - std::future reader_ = {}, - std::vector> && pre_reader_for_step_ = {}); - - ~MergeTreeReadTask(); -}; - - -MergeTreeReadTaskColumns getReadTaskColumns( +MergeTreeReadTask::Columns getReadTaskColumns( const IMergeTreeDataPartInfoForReader & data_part_info_for_reader, const StorageSnapshotPtr & storage_snapshot, const Names & required_columns, @@ -119,7 +48,6 @@ struct MergeTreeBlockSizePredictor return block_size_bytes; } - /// Predicts what number of rows should be read to exhaust byte quota per column inline size_t estimateNumRowsForMaxSizeColumn(size_t bytes_quota) const { @@ -153,7 +81,6 @@ struct MergeTreeBlockSizePredictor static double calculateDecay() { return 1. - std::pow(TARGET_WEIGHT, 1. / NUM_UPDATES_TO_TARGET_WEIGHT); } protected: - DataPartPtr data_part; struct ColumnInfo diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d773f380377..3337e136c16 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -68,7 +68,7 @@ #include #include #include -#include +#include #include #include #include @@ -103,6 +103,10 @@ #include #include +#if USE_AZURE_BLOB_STORAGE +#include +#endif + template <> struct fmt::formatter : fmt::formatter { @@ -128,6 +132,8 @@ namespace ProfileEvents extern const Event RejectedMutations; extern const Event DelayedMutations; extern const Event DelayedMutationsMilliseconds; + extern const Event PartsLockWaitMicroseconds; + extern const Event PartsLockHoldMicroseconds; } namespace CurrentMetrics @@ -187,8 +193,8 @@ static void checkSuspiciousIndices(const ASTFunction * index_function) std::unordered_set unique_index_expression_hashes; for (const auto & child : index_function->arguments->children) { - IAST::Hash hash = child->getTreeHash(); - UInt64 first_half_of_hash = hash.first; + const IAST::Hash hash = child->getTreeHash(); + const auto & first_half_of_hash = hash.low64; if (!unique_index_expression_hashes.emplace(first_half_of_hash).second) throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -308,6 +314,20 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re throw Exception(ErrorCodes::METADATA_MISMATCH, "MergeTree data format version on disk doesn't support custom partitioning"); } } +DataPartsLock::DataPartsLock(std::mutex & data_parts_mutex_) + : wait_watch(Stopwatch(CLOCK_MONOTONIC)) + , lock(data_parts_mutex_) + , lock_watch(Stopwatch(CLOCK_MONOTONIC)) +{ + ProfileEvents::increment(ProfileEvents::PartsLockWaitMicroseconds, wait_watch->elapsedMicroseconds()); +} + + +DataPartsLock::~DataPartsLock() +{ + if (lock_watch.has_value()) + ProfileEvents::increment(ProfileEvents::PartsLockHoldMicroseconds, lock_watch->elapsedMicroseconds()); +} MergeTreeData::MergeTreeData( const StorageID & table_id_, @@ -334,7 +354,6 @@ MergeTreeData::MergeTreeData( , parts_mover(this) , background_operations_assignee(*this, BackgroundJobsAssignee::Type::DataProcessing, getContext()) , background_moves_assignee(*this, BackgroundJobsAssignee::Type::Moving, getContext()) - , use_metadata_cache(getSettings()->use_metadata_cache) { context_->getGlobalContext()->initializeBackgroundExecutorsIfNeeded(); @@ -385,11 +404,6 @@ MergeTreeData::MergeTreeData( if (!canUsePolymorphicParts(*settings, reason) && !reason.empty()) LOG_WARNING(log, "{} Settings 'min_rows_for_wide_part'and 'min_bytes_for_wide_part' will be ignored.", reason); -#if !USE_ROCKSDB - if (use_metadata_cache) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't use merge tree metadata cache if clickhouse was compiled without rocksdb"); -#endif - common_assignee_trigger = [this] (bool delay) noexcept { if (delay) @@ -1217,10 +1231,10 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( auto part_size_str = res.size_of_part ? formatReadableSizeWithBinarySuffix(*res.size_of_part) : "failed to calculate size"; LOG_ERROR(log, - "Detaching broken part {}{} (size: {}). " + "Detaching broken part {} (size: {}). " "If it happened after update, it is likely because of backward incompatibility. " "You need to resolve this manually", - getFullPathOnDisk(part_disk_ptr), part_name, part_size_str); + fs::path(getFullPathOnDisk(part_disk_ptr)) / part_name, part_size_str); }; try @@ -1230,26 +1244,13 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( .withPartFormatFromDisk() .build(); } - catch (const Exception & e) + catch (...) { /// Don't count the part as broken if there was a retryalbe error /// during loading, such as "not enough memory" or network error. - if (isRetryableException(e)) + if (isRetryableException(std::current_exception())) throw; - - mark_broken(); - return res; - } - catch (const Poco::Net::NetException &) - { - throw; - } - catch (const Poco::TimeoutException &) - { - throw; - } - catch (...) - { + LOG_DEBUG(log, "Failed to load data part {}, unknown exception", part_name); mark_broken(); return res; } @@ -1274,18 +1275,12 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( { res.part->loadColumnsChecksumsIndexes(require_part_metadata, true); } - catch (const Exception & e) + catch (...) { /// Don't count the part as broken if there was a retryalbe error /// during loading, such as "not enough memory" or network error. - if (isRetryableException(e)) + if (isRetryableException(std::current_exception())) throw; - - mark_broken(); - return res; - } - catch (...) - { mark_broken(); return res; } @@ -1396,22 +1391,47 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPartWithRetries( size_t max_backoff_ms, size_t max_tries) { + auto handle_exception = [&, this](std::exception_ptr exception_ptr, size_t try_no) + { + if (try_no + 1 == max_tries) + throw; + + String exception_message; + try + { + rethrow_exception(exception_ptr); + } + catch (const Exception & e) + { + exception_message = e.message(); + } + #if USE_AZURE_BLOB_STORAGE + catch (const Azure::Core::RequestFailedException & e) + { + exception_message = e.Message; + } + #endif + + + LOG_DEBUG(log, "Failed to load data part {} at try {} with retryable error: {}. Will retry in {} ms", + part_name, try_no, exception_message, initial_backoff_ms); + + std::this_thread::sleep_for(std::chrono::milliseconds(initial_backoff_ms)); + initial_backoff_ms = std::min(initial_backoff_ms * 2, max_backoff_ms); + }; + for (size_t try_no = 0; try_no < max_tries; ++try_no) { try { return loadDataPart(part_info, part_name, part_disk_ptr, to_state, part_loading_mutex); } - catch (const Exception & e) + catch (...) { - if (!isRetryableException(e) || try_no + 1 == max_tries) + if (isRetryableException(std::current_exception())) + handle_exception(std::current_exception(),try_no); + else throw; - - LOG_DEBUG(log, "Failed to load data part {} at try {} with retryable error: {}. Will retry in {} ms", - part_name, try_no, e.message(), initial_backoff_ms); - - std::this_thread::sleep_for(std::chrono::milliseconds(initial_backoff_ms)); - initial_backoff_ms = std::min(initial_backoff_ms * 2, max_backoff_ms); } } UNREACHABLE(); @@ -2328,7 +2348,7 @@ size_t MergeTreeData::clearOldPartsFromFilesystem(bool force) removePartsFinally(parts_to_remove); /// This is needed to close files to avoid they reside on disk after being deleted. /// NOTE: we can drop files from cache more selectively but this is good enough. - getContext()->dropMMappedFileCache(); + getContext()->clearMMappedFileCache(); return parts_to_remove.size(); } @@ -2619,8 +2639,50 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory() if (detached_parts.empty()) return 0; - PartsTemporaryRename renamed_parts(*this, "detached/"); + auto get_last_touched_time = [&](const DetachedPartInfo & part_info) -> time_t + { + auto path = fs::path(relative_data_path) / "detached" / part_info.dir_name; + time_t last_change_time = part_info.disk->getLastChanged(path); + time_t last_modification_time = part_info.disk->getLastModified(path).epochTime(); + return std::max(last_change_time, last_modification_time); + }; + time_t ttl_seconds = getSettings()->merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds; + + size_t unfinished_deleting_parts = 0; + time_t current_time = time(nullptr); + for (const auto & part_info : detached_parts) + { + if (!part_info.dir_name.starts_with("deleting_")) + continue; + + time_t startup_time = current_time - static_cast(Context::getGlobalContextInstance()->getUptimeSeconds()); + time_t last_touch_time = get_last_touched_time(part_info); + + /// Maybe it's being deleted right now (for example, in ALTER DROP DETACHED) + bool had_restart = last_touch_time < startup_time; + bool ttl_expired = last_touch_time + ttl_seconds <= current_time; + if (!had_restart && !ttl_expired) + continue; + + /// We were trying to delete this detached part but did not finish deleting, probably because the server crashed + LOG_INFO(log, "Removing detached part {} that we failed to remove previously", part_info.dir_name); + try + { + removeDetachedPart(part_info.disk, fs::path(relative_data_path) / "detached" / part_info.dir_name / "", part_info.dir_name); + ++unfinished_deleting_parts; + } + catch (...) + { + tryLogCurrentException(log); + } + } + + if (!getSettings()->merge_tree_enable_clear_old_broken_detached) + return unfinished_deleting_parts; + + const auto full_path = fs::path(relative_data_path) / "detached"; + size_t removed_count = 0; for (const auto & part_info : detached_parts) { if (!part_info.valid_name || part_info.prefix.empty()) @@ -2635,31 +2697,24 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory() if (!can_be_removed_by_timeout) continue; - time_t current_time = time(nullptr); - ssize_t threshold = current_time - getSettings()->merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds; - auto path = fs::path(relative_data_path) / "detached" / part_info.dir_name; - time_t last_change_time = part_info.disk->getLastChanged(path); - time_t last_modification_time = part_info.disk->getLastModified(path).epochTime(); - time_t last_touch_time = std::max(last_change_time, last_modification_time); + ssize_t threshold = current_time - ttl_seconds; + time_t last_touch_time = get_last_touched_time(part_info); if (last_touch_time == 0 || last_touch_time >= threshold) continue; - renamed_parts.addPart(part_info.dir_name, "deleting_" + part_info.dir_name, part_info.disk); - } + const String & old_name = part_info.dir_name; + String new_name = "deleting_" + part_info.dir_name; + part_info.disk->moveFile(fs::path(full_path) / old_name, fs::path(full_path) / new_name); - LOG_INFO(log, "Will clean up {} detached parts", renamed_parts.old_and_new_names.size()); - - renamed_parts.tryRenameAll(); - - for (auto & [old_name, new_name, disk] : renamed_parts.old_and_new_names) - { - removeDetachedPart(disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name); + removeDetachedPart(part_info.disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name); LOG_WARNING(log, "Removed broken detached part {} due to a timeout for broken detached parts", old_name); - old_name.clear(); + ++removed_count; } - return renamed_parts.old_and_new_names.size(); + LOG_INFO(log, "Cleaned up {} detached parts", removed_count); + + return removed_count + unfinished_deleting_parts; } size_t MergeTreeData::clearOldWriteAheadLogs() @@ -2799,7 +2854,7 @@ void MergeTreeData::rename(const String & new_table_path, const StorageID & new_ } if (!getStorageID().hasUUID()) - getContext()->dropCaches(); + getContext()->clearCaches(); /// TODO: remove const_cast for (const auto & part : data_parts_by_info) @@ -2840,9 +2895,9 @@ void MergeTreeData::dropAllData() } /// Tables in atomic databases have UUID and stored in persistent locations. - /// No need to drop caches (that are keyed by filesystem path) because collision is not possible. + /// No need to clear caches (that are keyed by filesystem path) because collision is not possible. if (!getStorageID().hasUUID()) - getContext()->dropCaches(); + getContext()->clearCaches(); /// Removing of each data part before recursive removal of directory is to speed-up removal, because there will be less number of syscalls. NameSet part_names_failed; @@ -3166,6 +3221,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context } } + if (command.type == AlterCommand::MODIFY_QUERY) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "ALTER MODIFY QUERY is not supported by MergeTree engines family"); + if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned) { throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -4031,7 +4090,7 @@ void MergeTreeData::restoreAndActivatePart(const DataPartPtr & part, DataPartsLo void MergeTreeData::outdateUnexpectedPartAndCloneToDetached(const DataPartPtr & part_to_detach) { LOG_INFO(log, "Cloning part {} to unexpected_{} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name); - part_to_detach->makeCloneInDetached("unexpected", getInMemoryMetadataPtr()); + part_to_detach->makeCloneInDetached("unexpected", getInMemoryMetadataPtr(), /*disk_transaction*/ {}); DataPartsLock lock = lockParts(); part_to_detach->is_unexpected_local_part = true; @@ -4563,7 +4622,7 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy, DataPar /// Move parts are non replicated operations, so we take lock here. /// All other locks are taken in StorageReplicatedMergeTree - lockSharedData(*part_copy); + lockSharedData(*part_copy, /* replace_existing_lock */ true); return; } @@ -4779,17 +4838,18 @@ void MergeTreeData::removePartContributionToColumnAndSecondaryIndexSizes(const D log_subtract(total_column_size.marks, part_column_size.marks, ".marks"); } - auto indexes_descriptions = getInMemoryMetadataPtr()->secondary_indices; - for (const auto & index : indexes_descriptions) + for (auto & [secondary_index_name, total_secondary_index_size] : secondary_index_sizes) { - IndexSize & total_secondary_index_size = secondary_index_sizes[index.name]; - IndexSize part_secondary_index_size = part->getSecondaryIndexSize(index.name); + if (!part->hasSecondaryIndex(secondary_index_name)) + continue; + + IndexSize part_secondary_index_size = part->getSecondaryIndexSize(secondary_index_name); auto log_subtract = [&](size_t & from, size_t value, const char * field) { if (value > from) LOG_ERROR(log, "Possibly incorrect index size subtraction: {} - {} = {}, index: {}, field: {}", - from, value, from - value, index.name, field); + from, value, from - value, secondary_index_name, field); from -= value; }; @@ -4906,7 +4966,7 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & throw Exception(ErrorCodes::UNKNOWN_DISK, "All parts of partition '{}' are already on disk '{}'", partition_id, disk->getName()); } - MovePartsOutcome moves_outcome = movePartsToSpace(parts, std::static_pointer_cast(disk)); + MovePartsOutcome moves_outcome = movePartsToSpace(parts, std::static_pointer_cast(disk), local_context->getWriteSettings()); switch (moves_outcome) { case MovePartsOutcome::MovesAreCancelled: @@ -4969,7 +5029,7 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String throw Exception(ErrorCodes::UNKNOWN_DISK, "All parts of partition '{}' are already on volume '{}'", partition_id, volume->getName()); } - MovePartsOutcome moves_outcome = movePartsToSpace(parts, std::static_pointer_cast(volume)); + MovePartsOutcome moves_outcome = movePartsToSpace(parts, std::static_pointer_cast(volume), local_context->getWriteSettings()); switch (moves_outcome) { case MovePartsOutcome::MovesAreCancelled: @@ -5148,12 +5208,12 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( const DataPartsVector & data_parts, const String & data_path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, const ContextPtr & local_context) { MergeTreeData::PartsBackupEntries res; std::map> temp_dirs; TableLockHolder table_lock; - ReadSettings read_settings = local_context->getBackupReadSettings(); for (const auto & part : data_parts) { @@ -5181,12 +5241,16 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( if (hold_table_lock && !table_lock) table_lock = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); + if (backup_settings.check_parts) + part->checkConsistencyWithProjections(/* require_part_metadata= */ true); + BackupEntries backup_entries_from_part; part->getDataPartStorage().backup( part->checksums, part->getFileNamesWithoutChecksums(), data_path_in_backup, backup_settings, + read_settings, make_temporary_hard_links, backup_entries_from_part, &temp_dirs); @@ -5199,6 +5263,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( projection_part->getFileNamesWithoutChecksums(), fs::path{data_path_in_backup} / part->name, backup_settings, + read_settings, make_temporary_hard_links, backup_entries_from_part, &temp_dirs); @@ -5239,8 +5304,8 @@ void MergeTreeData::restoreDataFromBackup(RestorerFromBackup & restorer, const S class MergeTreeData::RestoredPartsHolder { public: - RestoredPartsHolder(const std::shared_ptr & storage_, const BackupPtr & backup_, size_t num_parts_) - : storage(storage_), backup(backup_), num_parts(num_parts_) + RestoredPartsHolder(const std::shared_ptr & storage_, const BackupPtr & backup_) + : storage(storage_), backup(backup_) { } @@ -5253,6 +5318,13 @@ public: attachIfAllPartsRestored(); } + void increaseNumBrokenParts() + { + std::lock_guard lock{mutex}; + ++num_broken_parts; + attachIfAllPartsRestored(); + } + void addPart(MutableDataPartPtr part) { std::lock_guard lock{mutex}; @@ -5266,13 +5338,13 @@ public: auto it = temp_dirs.find(disk); if (it == temp_dirs.end()) it = temp_dirs.emplace(disk, std::make_shared(disk, "tmp/")).first; - return it->second->getPath(); + return it->second->getRelativePath(); } private: void attachIfAllPartsRestored() { - if (!num_parts || (parts.size() < num_parts)) + if (!num_parts || (parts.size() + num_broken_parts < num_parts)) return; /// Sort parts by min_block (because we need to preserve the order of parts). @@ -5287,9 +5359,10 @@ private: num_parts = 0; } - std::shared_ptr storage; - BackupPtr backup; + const std::shared_ptr storage; + const BackupPtr backup; size_t num_parts = 0; + size_t num_broken_parts = 0; MutableDataPartsVector parts; std::map> temp_dirs; mutable std::mutex mutex; @@ -5305,8 +5378,9 @@ void MergeTreeData::restorePartsFromBackup(RestorerFromBackup & restorer, const Strings part_names = backup->listFiles(data_path_in_backup); boost::remove_erase(part_names, "mutations"); - auto restored_parts_holder - = std::make_shared(std::static_pointer_cast(shared_from_this()), backup, part_names.size()); + bool restore_broken_parts_as_detached = restorer.getRestoreSettings().restore_broken_parts_as_detached; + + auto restored_parts_holder = std::make_shared(std::static_pointer_cast(shared_from_this()), backup); fs::path data_path_in_backup_fs = data_path_in_backup; size_t num_parts = 0; @@ -5328,8 +5402,9 @@ void MergeTreeData::restorePartsFromBackup(RestorerFromBackup & restorer, const backup, part_path_in_backup = data_path_in_backup_fs / part_name, my_part_info = *part_info, + restore_broken_parts_as_detached, restored_parts_holder] - { storage->restorePartFromBackup(restored_parts_holder, my_part_info, part_path_in_backup); }); + { storage->restorePartFromBackup(restored_parts_holder, my_part_info, part_path_in_backup, restore_broken_parts_as_detached); }); ++num_parts; } @@ -5337,11 +5412,12 @@ void MergeTreeData::restorePartsFromBackup(RestorerFromBackup & restorer, const restored_parts_holder->setNumParts(num_parts); } -void MergeTreeData::restorePartFromBackup(std::shared_ptr restored_parts_holder, const MergeTreePartInfo & part_info, const String & part_path_in_backup) const +void MergeTreeData::restorePartFromBackup(std::shared_ptr restored_parts_holder, const MergeTreePartInfo & part_info, const String & part_path_in_backup, bool detach_if_broken) const { String part_name = part_info.getPartNameAndCheckFormat(format_version); auto backup = restored_parts_holder->getBackup(); + /// Calculate the total size of the part. UInt64 total_size_of_part = 0; Strings filenames = backup->listFiles(part_path_in_backup, /* recursive= */ true); fs::path part_path_in_backup_fs = part_path_in_backup; @@ -5349,21 +5425,22 @@ void MergeTreeData::restorePartFromBackup(std::shared_ptr r total_size_of_part += backup->getFileSize(part_path_in_backup_fs / filename); std::shared_ptr reservation = getStoragePolicy()->reserveAndCheck(total_size_of_part); - auto disk = reservation->getDisk(); - fs::path temp_dir = restored_parts_holder->getTemporaryDirectory(disk); - fs::path temp_part_dir = temp_dir / part_path_in_backup_fs.relative_path(); - disk->createDirectories(temp_part_dir); - - /// For example: + /// Calculate paths, for example: /// part_name = 0_1_1_0 /// part_path_in_backup = /data/test/table/0_1_1_0 /// tmp_dir = tmp/1aaaaaa /// tmp_part_dir = tmp/1aaaaaa/data/test/table/0_1_1_0 + auto disk = reservation->getDisk(); + fs::path temp_dir = restored_parts_holder->getTemporaryDirectory(disk); + fs::path temp_part_dir = temp_dir / part_path_in_backup_fs.relative_path(); /// Subdirectories in the part's directory. It's used to restore projections. std::unordered_set subdirs; + /// Copy files from the backup to the directory `tmp_part_dir`. + disk->createDirectories(temp_part_dir); + for (const String & filename : filenames) { /// Needs to create subdirectories before copying the files. Subdirectories are used to represent projections. @@ -5383,14 +5460,102 @@ void MergeTreeData::restorePartFromBackup(std::shared_ptr r reservation->update(reservation->getSize() - file_size); } - auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); - MergeTreeDataPartBuilder builder(*this, part_name, single_disk_volume, temp_part_dir.parent_path(), part_name); - builder.withPartFormatFromDisk(); - auto part = std::move(builder).build(); - part->version.setCreationTID(Tx::PrehistoricTID, nullptr); - part->loadColumnsChecksumsIndexes(false, true); + if (auto part = loadPartRestoredFromBackup(disk, temp_part_dir.parent_path(), part_name, detach_if_broken)) + restored_parts_holder->addPart(part); + else + restored_parts_holder->increaseNumBrokenParts(); +} - restored_parts_holder->addPart(part); +MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartRestoredFromBackup(const DiskPtr & disk, const String & temp_dir, const String & part_name, bool detach_if_broken) const +{ + MutableDataPartPtr part; + + auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); + + /// Load this part from the directory `tmp_part_dir`. + auto load_part = [&] + { + MergeTreeDataPartBuilder builder(*this, part_name, single_disk_volume, temp_dir, part_name); + builder.withPartFormatFromDisk(); + part = std::move(builder).build(); + part->version.setCreationTID(Tx::PrehistoricTID, nullptr); + part->loadColumnsChecksumsIndexes(/* require_columns_checksums= */ false, /* check_consistency= */ true); + }; + + /// Broken parts can appear in a backup sometimes. + auto mark_broken = [&](const std::exception_ptr error) + { + tryLogException(error, log, + fmt::format("Part {} will be restored as detached because it's broken. You need to resolve this manually", part_name)); + if (!part) + { + /// Make a fake data part only to copy its files to /detached/. + part = MergeTreeDataPartBuilder{*this, part_name, single_disk_volume, temp_dir, part_name} + .withPartStorageType(MergeTreeDataPartStorageType::Full) + .withPartType(MergeTreeDataPartType::Wide) + .build(); + } + part->renameToDetached("broken-from-backup"); + }; + + /// Try to load this part multiple times. + auto backoff_ms = loading_parts_initial_backoff_ms; + for (size_t try_no = 0; try_no < loading_parts_max_tries; ++try_no) + { + std::exception_ptr error; + bool retryable = false; + try + { + load_part(); + } + catch (const Poco::Net::NetException &) + { + error = std::current_exception(); + retryable = true; + } + catch (const Poco::TimeoutException &) + { + error = std::current_exception(); + retryable = true; + } + catch (...) + { + error = std::current_exception(); + retryable = isRetryableException(std::current_exception()); + } + + if (!error) + return part; + + if (!retryable && detach_if_broken) + { + mark_broken(error); + return nullptr; + } + + if (!retryable) + { + LOG_ERROR(log, + "Failed to restore part {} because it's broken. You can skip broken parts while restoring by setting " + "'restore_broken_parts_as_detached = true'", + part_name); + } + + if (!retryable || (try_no + 1 == loading_parts_max_tries)) + { + if (Exception * e = exception_cast(error)) + e->addMessage("while restoring part {} of table {}", part->name, getStorageID()); + std::rethrow_exception(error); + } + + tryLogException(error, log, + fmt::format("Failed to load part {} at try {} with a retryable error. Will retry in {} ms", part_name, try_no, backoff_ms)); + + std::this_thread::sleep_for(std::chrono::milliseconds(backoff_ms)); + backoff_ms = std::min(backoff_ms * 2, loading_parts_max_backoff_ms); + } + + UNREACHABLE(); } @@ -5693,6 +5858,10 @@ bool MergeTreeData::supportsLightweightDelete() const auto lock = lockParts(); for (const auto & part : data_parts_by_info) { + if (part->getState() == MergeTreeDataPartState::Outdated + || part->getState() == MergeTreeDataPartState::Deleting) + continue; + if (!part->supportLightweightDeleteMutate()) return false; } @@ -5787,18 +5956,21 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const { const String source_dir = "detached/"; - std::map name_to_disk; - /// Let's compose a list of parts that should be added. if (attach_part) { const String part_id = partition->as().value.safeGet(); validateDetachedPartName(part_id); - auto disk = getDiskForDetachedPart(part_id); - renamed_parts.addPart(part_id, "attaching_" + part_id, disk); - - if (MergeTreePartInfo::tryParsePartName(part_id, format_version)) - name_to_disk[part_id] = getDiskForDetachedPart(part_id); + if (temporary_parts.contains(String(DETACHED_DIR_NAME) + "/" + part_id)) + { + LOG_WARNING(log, "Will not try to attach part {} because its directory is temporary, " + "probably it's being detached right now", part_id); + } + else + { + auto disk = getDiskForDetachedPart(part_id); + renamed_parts.addPart(part_id, "attaching_" + part_id, disk); + } } else { @@ -5815,6 +5987,12 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const for (const auto & part_info : detached_parts) { + if (temporary_parts.contains(String(DETACHED_DIR_NAME) + "/" + part_info.dir_name)) + { + LOG_WARNING(log, "Will not try to attach part {} because its directory is temporary, " + "probably it's being detached right now", part_info.dir_name); + continue; + } LOG_DEBUG(log, "Found part {}", part_info.dir_name); active_parts.add(part_info.dir_name); } @@ -5825,6 +6003,8 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const for (const auto & part_info : detached_parts) { const String containing_part = active_parts.getContainingPart(part_info.dir_name); + if (containing_part.empty()) + continue; LOG_DEBUG(log, "Found containing part {} for part {}", containing_part, part_info.dir_name); @@ -6188,14 +6368,14 @@ void MergeTreeData::Transaction::clear() precommitted_parts.clear(); } -MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData::DataPartsLock * acquired_parts_lock) +MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(DataPartsLock * acquired_parts_lock) { DataPartsVector total_covered_parts; if (!isEmpty()) { auto settings = data.getSettings(); - auto parts_lock = acquired_parts_lock ? MergeTreeData::DataPartsLock() : data.lockParts(); + auto parts_lock = acquired_parts_lock ? DataPartsLock() : data.lockParts(); auto * owing_parts_lock = acquired_parts_lock ? acquired_parts_lock : &parts_lock; for (const auto & part : precommitted_parts) @@ -7305,7 +7485,8 @@ std::pair MergeTreeData::cloneAn const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info, const StorageMetadataPtr & metadata_snapshot, - const IDataPartStorage::ClonePartParams & params) + const IDataPartStorage::ClonePartParams & params, + const WriteSettings & write_settings) { /// Check that the storage policy contains the disk where the src_part is located. bool does_storage_policy_allow_same_disk = false; @@ -7362,7 +7543,8 @@ std::pair MergeTreeData::cloneAn auto dst_part_storage = src_part_storage->freeze( relative_data_path, tmp_dst_part_name, - /*save_metadata_callback=*/ {}, + write_settings, + /* save_metadata_callback= */ {}, params); if (params.metadata_version_to_write.has_value()) @@ -7619,11 +7801,13 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher( auto new_storage = data_part_storage->freeze( backup_part_path, part->getDataPartStorage().getPartDirectory(), + local_context->getWriteSettings(), callback, params); part->is_frozen.store(true, std::memory_order_relaxed); result.push_back(PartitionCommandResultInfo{ + .command_type = "FREEZE PART", .partition_id = part->info.partition_id, .part_name = part->name, .backup_path = new_storage->getFullRootPath(), @@ -7633,7 +7817,7 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher( ++parts_processed; } - LOG_DEBUG(log, "Freezed {} parts", parts_processed); + LOG_DEBUG(log, "Froze {} parts", parts_processed); return result; } @@ -7771,7 +7955,7 @@ try LOG_WARNING(log, "Profile counters are not set"); } - part_log->add(part_log_elem); + part_log->add(std::move(part_log_elem)); } catch (...) { @@ -7816,7 +8000,8 @@ bool MergeTreeData::scheduleDataMovingJob(BackgroundJobsAssignee & assignee) assignee.scheduleMoveTask(std::make_shared( [this, moving_tagger] () mutable { - return moveParts(moving_tagger) == MovePartsOutcome::PartsMoved; + WriteSettings write_settings = Context::getGlobalContextInstance()->getWriteSettings(); + return moveParts(moving_tagger, write_settings, /* wait_for_move_if_zero_copy= */ false) == MovePartsOutcome::PartsMoved; }, moves_assignee_trigger, getStorageID())); return true; } @@ -7831,7 +8016,7 @@ bool MergeTreeData::areBackgroundMovesNeeded() const return policy->getVolumes().size() == 1 && policy->getVolumes()[0]->getDisks().size() > 1; } -MovePartsOutcome MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space) +MovePartsOutcome MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space, const WriteSettings & write_settings) { if (parts_mover.moves_blocker.isCancelled()) return MovePartsOutcome::MovesAreCancelled; @@ -7840,7 +8025,7 @@ MovePartsOutcome MergeTreeData::movePartsToSpace(const DataPartsVector & parts, if (moving_tagger->parts_to_move.empty()) return MovePartsOutcome::NothingToMove; - return moveParts(moving_tagger, true); + return moveParts(moving_tagger, write_settings, /* wait_for_move_if_zero_copy= */ true); } MergeTreeData::CurrentlyMovingPartsTaggerPtr MergeTreeData::selectPartsForMove() @@ -7895,7 +8080,7 @@ MergeTreeData::CurrentlyMovingPartsTaggerPtr MergeTreeData::checkPartsForMove(co return std::make_shared(std::move(parts_to_move), *this); } -MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger, bool wait_for_move_if_zero_copy) +MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger, const WriteSettings & write_settings, bool wait_for_move_if_zero_copy) { LOG_INFO(log, "Got {} parts to move.", moving_tagger->parts_to_move.size()); @@ -7956,7 +8141,7 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & { if (lock->isLocked()) { - cloned_part = parts_mover.clonePart(moving_part); + cloned_part = parts_mover.clonePart(moving_part, write_settings); parts_mover.swapClonedPart(cloned_part); break; } @@ -7983,7 +8168,7 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & } else /// Ordinary move as it should be { - cloned_part = parts_mover.clonePart(moving_part); + cloned_part = parts_mover.clonePart(moving_part, write_settings); parts_mover.swapClonedPart(cloned_part); } write_part_log({}); @@ -8425,7 +8610,7 @@ void MergeTreeData::incrementMergedPartsProfileEvent(MergeTreeDataPartType type) } } -MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart( +std::pair MergeTreeData::createEmptyPart( MergeTreePartInfo & new_part_info, const MergeTreePartition & partition, const String & new_part_name, const MergeTreeTransactionPtr & txn) { @@ -8444,6 +8629,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart( ReservationPtr reservation = reserveSpacePreferringTTLRules(metadata_snapshot, 0, move_ttl_infos, time(nullptr), 0, true); VolumePtr data_part_volume = createVolumeFromReservation(reservation, volume); + auto tmp_dir_holder = getTemporaryPartDirectoryHolder(EMPTY_PART_TMP_PREFIX + new_part_name); auto new_data_part = getDataPartBuilder(new_part_name, data_part_volume, EMPTY_PART_TMP_PREFIX + new_part_name) .withBytesAndRowsOnDisk(0, 0) .withPartInfo(new_part_info) @@ -8503,12 +8689,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart( out.finalizePart(new_data_part, sync_on_insert); new_data_part_storage->precommitTransaction(); - return new_data_part; + return std::make_pair(std::move(new_data_part), std::move(tmp_dir_holder)); } bool MergeTreeData::allowRemoveStaleMovingParts() const { - return ConfigHelper::getBool(getContext()->getConfigRef(), "allow_remove_stale_moving_parts"); + return ConfigHelper::getBool(getContext()->getConfigRef(), "allow_remove_stale_moving_parts", /* default_ = */ true); } CurrentlySubmergingEmergingTagger::~CurrentlySubmergingEmergingTagger() diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 41fc4657854..95d8e74f32c 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -40,7 +41,6 @@ #include #include - namespace DB { @@ -63,6 +63,8 @@ using BackupEntries = std::vector; +struct WriteSettings; + /// Auxiliary struct holding information about the future merged or mutated part. struct EmergingPartInfo { @@ -84,6 +86,16 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +struct DataPartsLock +{ + std::optional wait_watch; + std::unique_lock lock; + std::optional lock_watch; + DataPartsLock() = default; + explicit DataPartsLock(std::mutex & data_parts_mutex_); + + ~DataPartsLock(); +}; /// Data structure for *MergeTree engines. /// Merge tree is used for incremental sorting of data. @@ -220,7 +232,6 @@ public: using MutableDataParts = std::set; using DataPartsVector = std::vector; - using DataPartsLock = std::unique_lock; DataPartsLock lockParts() const { return DataPartsLock(data_parts_mutex); } using OperationDataPartsLock = std::unique_lock; @@ -240,7 +251,7 @@ public: public: Transaction(MergeTreeData & data_, MergeTreeTransaction * txn_); - DataPartsVector commit(MergeTreeData::DataPartsLock * acquired_parts_lock = nullptr); + DataPartsVector commit(DataPartsLock * acquired_parts_lock = nullptr); void addPart(MutableDataPartPtr & part); @@ -434,6 +445,8 @@ public: bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; } + bool supportsTrivialCountOptimization() const override { return !hasLightweightDeletedMask(); } + NamesAndTypesList getVirtuals() const override; bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const override; @@ -830,9 +843,12 @@ public: MergeTreeData & checkStructureAndGetMergeTreeData(IStorage & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const; std::pair cloneAndLoadDataPartOnSameDisk( - const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, - const MergeTreePartInfo & dst_part_info, const StorageMetadataPtr & metadata_snapshot, - const IDataPartStorage::ClonePartParams & params); + const MergeTreeData::DataPartPtr & src_part, + const String & tmp_part_prefix, + const MergeTreePartInfo & dst_part_info, + const StorageMetadataPtr & metadata_snapshot, + const IDataPartStorage::ClonePartParams & params, + const WriteSettings & write_settings); virtual std::vector getMutationsStatus() const = 0; @@ -934,7 +950,9 @@ public: WriteAheadLogPtr getWriteAheadLog(); constexpr static auto EMPTY_PART_TMP_PREFIX = "tmp_empty_"; - MergeTreeData::MutableDataPartPtr createEmptyPart(MergeTreePartInfo & new_part_info, const MergeTreePartition & partition, const String & new_part_name, const MergeTreeTransactionPtr & txn); + std::pair createEmptyPart( + MergeTreePartInfo & new_part_info, const MergeTreePartition & partition, + const String & new_part_name, const MergeTreeTransactionPtr & txn); MergeTreeDataFormatVersion format_version; @@ -1165,7 +1183,6 @@ protected: /// And for ReplicatedMergeTree we don't have LogEntry type for this operation. BackgroundJobsAssignee background_operations_assignee; BackgroundJobsAssignee background_moves_assignee; - bool use_metadata_cache; /// Strongly connected with two fields above. /// Every task that is finished will ask to assign a new one into an executor. @@ -1323,7 +1340,7 @@ protected: /// MergeTree because they store mutations in different way. virtual std::map getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0; /// Moves part to specified space, used in ALTER ... MOVE ... queries - MovePartsOutcome movePartsToSpace(const DataPartsVector & parts, SpacePtr space); + MovePartsOutcome movePartsToSpace(const DataPartsVector & parts, SpacePtr space, const WriteSettings & write_settings); struct PartBackupEntries { @@ -1334,13 +1351,14 @@ protected: using PartsBackupEntries = std::vector; /// Makes backup entries to backup the parts of this table. - PartsBackupEntries backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const BackupSettings & backup_settings, const ContextPtr & local_context); + PartsBackupEntries backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const BackupSettings & backup_settings, const ReadSettings & read_settings, const ContextPtr & local_context); class RestoredPartsHolder; /// Restores the parts of this table from backup. void restorePartsFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions); - void restorePartFromBackup(std::shared_ptr restored_parts_holder, const MergeTreePartInfo & part_info, const String & part_path_in_backup) const; + void restorePartFromBackup(std::shared_ptr restored_parts_holder, const MergeTreePartInfo & part_info, const String & part_path_in_backup, bool detach_if_broken) const; + MutableDataPartPtr loadPartRestoredFromBackup(const DiskPtr & disk, const String & temp_dir, const String & part_name, bool detach_if_broken) const; /// Attaches restored parts to the storage. virtual void attachRestoredParts(MutableDataPartsVector && parts) = 0; @@ -1476,7 +1494,7 @@ private: using CurrentlyMovingPartsTaggerPtr = std::shared_ptr; /// Move selected parts to corresponding disks - MovePartsOutcome moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger, bool wait_for_move_if_zero_copy=false); + MovePartsOutcome moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger, const WriteSettings & write_settings, bool wait_for_move_if_zero_copy); /// Select parts for move and disks for them. Used in background moving processes. CurrentlyMovingPartsTaggerPtr selectPartsForMove(); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index e89cd8da232..b3a91add879 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -534,11 +534,22 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges( String MergeTreeDataMergerMutator::getBestPartitionToOptimizeEntire( const PartitionsInfo & partitions_info) const { - const auto data_settings = data.getSettings(); + const auto & data_settings = data.getSettings(); if (!data_settings->min_age_to_force_merge_on_partition_only) return {}; if (!data_settings->min_age_to_force_merge_seconds) return {}; + size_t occupied = CurrentMetrics::values[CurrentMetrics::BackgroundMergesAndMutationsPoolTask].load(std::memory_order_relaxed); + size_t max_tasks_count = data.getContext()->getMergeMutateExecutor()->getMaxTasksCount(); + if (occupied > 1 && max_tasks_count - occupied < data_settings->number_of_free_entries_in_pool_to_execute_optimize_entire_partition) + { + LOG_INFO( + log, + "Not enough idle threads to execute optimizing entire partition. See settings " + "'number_of_free_entries_in_pool_to_execute_optimize_entire_partition' " + "and 'background_pool_size'"); + return {}; + } auto best_partition_it = std::max_element( partitions_info.begin(), diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 6628cd68eaf..b4d405312e0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -187,15 +187,15 @@ bool MergeTreeDataPartChecksums::readV3(ReadBuffer & in) String name; Checksum sum; - readBinary(name, in); + readStringBinary(name, in); readVarUInt(sum.file_size, in); - readPODBinary(sum.file_hash, in); - readBinary(sum.is_compressed, in); + readBinaryLittleEndian(sum.file_hash, in); + readBinaryLittleEndian(sum.is_compressed, in); if (sum.is_compressed) { readVarUInt(sum.uncompressed_size, in); - readPODBinary(sum.uncompressed_hash, in); + readBinaryLittleEndian(sum.uncompressed_hash, in); } files.emplace(std::move(name), sum); @@ -223,15 +223,15 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const const String & name = it.first; const Checksum & sum = it.second; - writeBinary(name, out); + writeStringBinary(name, out); writeVarUInt(sum.file_size, out); - writePODBinary(sum.file_hash, out); - writeBinary(sum.is_compressed, out); + writeBinaryLittleEndian(sum.file_hash, out); + writeBinaryLittleEndian(sum.is_compressed, out); if (sum.is_compressed) { writeVarUInt(sum.uncompressed_size, out); - writePODBinary(sum.uncompressed_hash, out); + writeBinaryLittleEndian(sum.uncompressed_hash, out); } } } @@ -323,9 +323,7 @@ MergeTreeDataPartChecksums::Checksum::uint128 MergeTreeDataPartChecksums::getTot hash_of_all_files.update(checksum.file_hash); } - MergeTreeDataPartChecksums::Checksum::uint128 ret; - hash_of_all_files.get128(reinterpret_cast(&ret)); - return ret; + return getSipHash128AsPair(hash_of_all_files); } void MinimalisticDataPartChecksums::serialize(WriteBuffer & to) const @@ -339,9 +337,9 @@ void MinimalisticDataPartChecksums::serializeWithoutHeader(WriteBuffer & to) con writeVarUInt(num_compressed_files, to); writeVarUInt(num_uncompressed_files, to); - writePODBinary(hash_of_all_files, to); - writePODBinary(hash_of_uncompressed_files, to); - writePODBinary(uncompressed_hash_of_compressed_files, to); + writeBinaryLittleEndian(hash_of_all_files, to); + writeBinaryLittleEndian(hash_of_uncompressed_files, to); + writeBinaryLittleEndian(uncompressed_hash_of_compressed_files, to); } String MinimalisticDataPartChecksums::getSerializedString() const @@ -382,9 +380,9 @@ void MinimalisticDataPartChecksums::deserializeWithoutHeader(ReadBuffer & in) readVarUInt(num_compressed_files, in); readVarUInt(num_uncompressed_files, in); - readPODBinary(hash_of_all_files, in); - readPODBinary(hash_of_uncompressed_files, in); - readPODBinary(uncompressed_hash_of_compressed_files, in); + readBinaryLittleEndian(hash_of_all_files, in); + readBinaryLittleEndian(hash_of_uncompressed_files, in); + readBinaryLittleEndian(uncompressed_hash_of_compressed_files, in); } void MinimalisticDataPartChecksums::computeTotalChecksums(const MergeTreeDataPartChecksums & full_checksums_) @@ -415,14 +413,9 @@ void MinimalisticDataPartChecksums::computeTotalChecksums(const MergeTreeDataPar } } - auto get_hash = [] (SipHash & hash, uint128 & data) - { - hash.get128(data); - }; - - get_hash(hash_of_all_files_state, hash_of_all_files); - get_hash(hash_of_uncompressed_files_state, hash_of_uncompressed_files); - get_hash(uncompressed_hash_of_compressed_files_state, uncompressed_hash_of_compressed_files); + hash_of_all_files = getSipHash128AsPair(hash_of_all_files_state); + hash_of_uncompressed_files = getSipHash128AsPair(hash_of_uncompressed_files_state); + uncompressed_hash_of_compressed_files = getSipHash128AsPair(uncompressed_hash_of_compressed_files_state); } String MinimalisticDataPartChecksums::getSerializedString(const MergeTreeDataPartChecksums & full_checksums, bool minimalistic) diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 9c47608e364..c6d059498ff 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -30,7 +30,7 @@ MergeTreeDataPartCompact::MergeTreeDataPartCompact( IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( const NamesAndTypesList & columns_to_read, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -43,7 +43,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( auto * load_marks_threadpool = reader_settings.read_settings.load_marks_asynchronously ? &read_info->getContext()->getLoadMarksThreadpool() : nullptr; return std::make_unique( - read_info, columns_to_read, metadata_snapshot, uncompressed_cache, + read_info, columns_to_read, storage_snapshot, uncompressed_cache, mark_cache, mark_ranges, reader_settings, load_marks_threadpool, avg_value_size_hints, profile_callback); } @@ -115,7 +115,7 @@ void MergeTreeDataPartCompact::loadIndexGranularityImpl( { marks_reader->ignore(columns_count * sizeof(MarkInCompressedFile)); size_t granularity; - readIntBinary(granularity, *marks_reader); + readBinaryLittleEndian(granularity, *marks_reader); index_granularity_.appendMark(granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 08764eedb43..2bbac766c8e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -30,7 +30,7 @@ public: MergeTreeReaderPtr getReader( const NamesAndTypesList & columns, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index 468747a6c36..f04e08838a9 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -17,6 +17,7 @@ namespace DB namespace ErrorCodes { extern const int DIRECTORY_ALREADY_EXISTS; + extern const int NOT_IMPLEMENTED; } MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( @@ -32,7 +33,7 @@ MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( const NamesAndTypesList & columns_to_read, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * /* uncompressed_cache */, MarkCache * /* mark_cache */, @@ -45,7 +46,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( auto ptr = std::static_pointer_cast(shared_from_this()); return std::make_unique( - read_info, ptr, columns_to_read, metadata_snapshot, mark_ranges, reader_settings); + read_info, ptr, columns_to_read, storage_snapshot, mark_ranges, reader_settings); } IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter( @@ -138,8 +139,12 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & return new_data_part_storage; } -DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const +DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, + const StorageMetadataPtr & metadata_snapshot, + const DiskTransactionPtr & disk_transaction) const { + if (disk_transaction) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "InMemory parts are not compatible with disk transactions"); String detached_path = *getRelativePathForDetachedPart(prefix, /* broken */ false); return flushToDisk(detached_path, metadata_snapshot); } @@ -167,7 +172,7 @@ IMergeTreeDataPart::Checksum MergeTreeDataPartInMemory::calculateBlockChecksum() column.column->updateHashFast(hash); checksum.uncompressed_size = block.bytes(); - hash.get128(checksum.uncompressed_hash); + checksum.uncompressed_hash = getSipHash128AsPair(hash); return checksum; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index 2698b69b38e..95f7b796f9a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -19,7 +19,7 @@ public: MergeTreeReaderPtr getReader( const NamesAndTypesList & columns, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -42,7 +42,8 @@ public: bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.getNameInStorage()); } String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override; - DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override; + DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, + const DiskTransactionPtr & disk_transaction) const override; std::optional getColumnModificationTime(const String & /* column_name */) const override { return {}; } MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 2d886e2058b..20600909ce4 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -29,7 +29,7 @@ MergeTreeDataPartWide::MergeTreeDataPartWide( IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( const NamesAndTypesList & columns_to_read, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -41,7 +41,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( auto read_info = std::make_shared(shared_from_this(), alter_conversions); return std::make_unique( read_info, columns_to_read, - metadata_snapshot, uncompressed_cache, + storage_snapshot, uncompressed_cache, mark_cache, mark_ranges, reader_settings, avg_value_size_hints, profile_callback); } @@ -130,13 +130,13 @@ void MergeTreeDataPartWide::loadIndexGranularityImpl( MarkInCompressedFile mark; size_t granularity; - readBinary(mark.offset_in_compressed_file, *marks_reader); - readBinary(mark.offset_in_decompressed_block, *marks_reader); + readBinaryLittleEndian(mark.offset_in_compressed_file, *marks_reader); + readBinaryLittleEndian(mark.offset_in_decompressed_block, *marks_reader); ++marks_count; if (index_granularity_info_.mark_type.adaptive) { - readIntBinary(granularity, *marks_reader); + readBinaryLittleEndian(granularity, *marks_reader); index_granularity_.appendMark(granularity); } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 0b2ffeb4b18..2076a1ec028 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -25,7 +25,7 @@ public: MergeTreeReaderPtr getReader( const NamesAndTypesList & columns, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 5e1da21da5b..3475130bf24 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -228,8 +228,8 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G }; - writeIntBinary(plain_hashing.count(), marks_out); - writeIntBinary(static_cast(0), marks_out); + writeBinaryLittleEndian(plain_hashing.count(), marks_out); + writeBinaryLittleEndian(static_cast(0), marks_out); writeColumnSingleGranule( block.getByName(name_and_type->name), data_part->getSerialization(name_and_type->name), @@ -239,7 +239,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G prev_stream->hashing_buf.next(); } - writeIntBinary(granule.rows_to_write, marks_out); + writeBinaryLittleEndian(granule.rows_to_write, marks_out); } } @@ -270,10 +270,10 @@ void MergeTreeDataPartWriterCompact::fillDataChecksums(IMergeTreeDataPart::Check { for (size_t i = 0; i < columns_list.size(); ++i) { - writeIntBinary(plain_hashing.count(), marks_out); - writeIntBinary(static_cast(0), marks_out); + writeBinaryLittleEndian(plain_hashing.count(), marks_out); + writeBinaryLittleEndian(static_cast(0), marks_out); } - writeIntBinary(static_cast(0), marks_out); + writeBinaryLittleEndian(static_cast(0), marks_out); } for (const auto & [_, stream] : streams_by_codec) @@ -365,8 +365,9 @@ void MergeTreeDataPartWriterCompact::addToChecksums(MergeTreeDataPartChecksums & { uncompressed_size += stream->hashing_buf.count(); auto stream_hash = stream->hashing_buf.getHash(); + transformEndianness(stream_hash); uncompressed_hash = CityHash_v1_0_2::CityHash128WithSeed( - reinterpret_cast(&stream_hash), sizeof(stream_hash), uncompressed_hash); + reinterpret_cast(&stream_hash), sizeof(stream_hash), uncompressed_hash); } checksums.files[data_file_name].is_compressed = true; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index f57ffa5ee14..79b72d4ae39 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -313,13 +313,13 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block if (stream.compressed_hashing.offset() >= settings.min_compress_block_size) stream.compressed_hashing.next(); - writeIntBinary(stream.plain_hashing.count(), marks_out); - writeIntBinary(stream.compressed_hashing.offset(), marks_out); + writeBinaryLittleEndian(stream.plain_hashing.count(), marks_out); + writeBinaryLittleEndian(stream.compressed_hashing.offset(), marks_out); /// Actually this numbers is redundant, but we have to store them /// to be compatible with the normal .mrk2 file format if (settings.can_use_adaptive_granularity) - writeIntBinary(1UL, marks_out); + writeBinaryLittleEndian(1UL, marks_out); } size_t pos = granule.start_row; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index f9fe6f2c8ab..bcf340e0f55 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -275,10 +275,10 @@ void MergeTreeDataPartWriterWide::flushMarkToFile(const StreamNameAndMark & stre Stream & stream = *column_streams[stream_with_mark.stream_name]; WriteBuffer & marks_out = stream.compress_marks ? stream.marks_compressed_hashing : stream.marks_hashing; - writeIntBinary(stream_with_mark.mark.offset_in_compressed_file, marks_out); - writeIntBinary(stream_with_mark.mark.offset_in_decompressed_block, marks_out); + writeBinaryLittleEndian(stream_with_mark.mark.offset_in_compressed_file, marks_out); + writeBinaryLittleEndian(stream_with_mark.mark.offset_in_decompressed_block, marks_out); if (settings.can_use_adaptive_granularity) - writeIntBinary(rows_in_mark, marks_out); + writeBinaryLittleEndian(rows_in_mark, marks_out); } StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( @@ -452,10 +452,10 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai "Incorrect number of marks in memory {}, on disk (at least) {}", index_granularity.getMarksCount(), mark_num + 1); - DB::readBinary(offset_in_compressed_file, *mrk_in); - DB::readBinary(offset_in_decompressed_block, *mrk_in); + readBinaryLittleEndian(offset_in_compressed_file, *mrk_in); + readBinaryLittleEndian(offset_in_decompressed_block, *mrk_in); if (settings.can_use_adaptive_granularity) - DB::readBinary(index_granularity_rows, *mrk_in); + readBinaryLittleEndian(index_granularity_rows, *mrk_in); else index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index ee515106591..31aa2dbb61f 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -43,6 +43,7 @@ #include #include #include +#include #include @@ -772,6 +773,37 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( return sampling; } +std::optional> MergeTreeDataSelectExecutor::filterPartsByVirtualColumns( + const MergeTreeData & data, + const MergeTreeData::DataPartsVector & parts, + const ActionsDAGPtr & filter_dag, + ContextPtr context) +{ + if (!filter_dag) + return {}; + + auto sample = data.getSampleBlockWithVirtualColumns(); + std::unordered_set allowed_inputs; + for (const auto * input : filter_dag->getInputs()) + if (sample.has(input->result_name)) + allowed_inputs.insert(input); + + if (allowed_inputs.empty()) + return {}; + + auto atoms = filter_dag->extractConjunctionAtoms(filter_dag->getOutputs().at(0)); + atoms = ActionsDAG::filterNodesByAllowedInputs(std::move(atoms), allowed_inputs); + if (atoms.empty()) + return {}; + + auto dag = ActionsDAG::buildFilterActionsDAG(atoms, {}, context); + + auto virtual_columns_block = data.getBlockWithVirtualPartColumns(parts, false /* one_part */); + VirtualColumnUtils::filterBlockWithQuery(dag, virtual_columns_block, context); + return VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); +} + + std::optional> MergeTreeDataSelectExecutor::filterPartsByVirtualColumns( const MergeTreeData & data, const MergeTreeData::DataPartsVector & parts, @@ -981,9 +1013,9 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd const auto & index_and_condition = skip_indexes.useful_indices[idx]; auto & stat = useful_indices_stat[idx]; stat.total_parts.fetch_add(1, std::memory_order_relaxed); - stat.total_granules.fetch_add(ranges.ranges.getNumberOfMarks(), std::memory_order_relaxed); + size_t total_granules = ranges.ranges.getNumberOfMarks(); + stat.total_granules.fetch_add(total_granules, std::memory_order_relaxed); - size_t granules_dropped = 0; ranges.ranges = filterMarksUsingIndex( index_and_condition.index, index_and_condition.condition, @@ -991,12 +1023,11 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd ranges.ranges, settings, reader_settings, - granules_dropped, mark_cache.get(), uncompressed_cache.get(), log); - stat.granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed); + stat.granules_dropped.fetch_add(total_granules - ranges.ranges.getNumberOfMarks(), std::memory_order_relaxed); if (ranges.ranges.empty()) stat.parts_dropped.fetch_add(1, std::memory_order_relaxed); } @@ -1010,17 +1041,15 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd auto & stat = merged_indices_stat[idx]; stat.total_parts.fetch_add(1, std::memory_order_relaxed); - size_t total_granules = 0; - size_t granules_dropped = 0; + size_t total_granules = ranges.ranges.getNumberOfMarks(); ranges.ranges = filterMarksUsingMergedIndex( indices_and_condition.indices, indices_and_condition.condition, part, ranges.ranges, settings, reader_settings, - total_granules, granules_dropped, mark_cache.get(), uncompressed_cache.get(), log); stat.total_granules.fetch_add(total_granules, std::memory_order_relaxed); - stat.granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed); + stat.granules_dropped.fetch_add(total_granules - ranges.ranges.getNumberOfMarks(), std::memory_order_relaxed); if (ranges.ranges.empty()) stat.parts_dropped.fetch_add(1, std::memory_order_relaxed); @@ -1031,6 +1060,10 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd }; size_t num_threads = std::min(num_streams, parts.size()); + if (settings.max_threads_for_indexes) + { + num_threads = std::min(num_streams, settings.max_threads_for_indexes); + } if (num_threads <= 1) { @@ -1574,7 +1607,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( const MarkRanges & ranges, const Settings & settings, const MergeTreeReaderSettings & reader_settings, - size_t & granules_dropped, MarkCache * mark_cache, UncompressedCache * uncompressed_cache, Poco::Logger * log) @@ -1638,14 +1670,12 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( { if (index_mark != index_range.begin || !granule || last_index_mark != index_range.begin) granule = reader.read(); - // Cast to Ann condition + auto ann_condition = std::dynamic_pointer_cast(condition); if (ann_condition != nullptr) { // vector of indexes of useful ranges auto result = ann_condition->getUsefulRanges(granule); - if (result.empty()) - ++granules_dropped; for (auto range : result) { @@ -1670,10 +1700,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( result = cache_in_store.store ? gin_filter_condition->mayBeTrueOnGranuleInPart(granule, cache_in_store) : true; if (!result) - { - ++granules_dropped; continue; - } MarkRange data_range( std::max(ranges[i].begin, index_mark * index_granularity), @@ -1698,8 +1725,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex( const MarkRanges & ranges, const Settings & settings, const MergeTreeReaderSettings & reader_settings, - size_t & total_granules, - size_t & granules_dropped, MarkCache * mark_cache, UncompressedCache * uncompressed_cache, Poco::Logger * log) @@ -1756,8 +1781,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex( for (auto & reader : readers) reader->seek(index_range.begin); - total_granules += index_range.end - index_range.begin; - for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark) { if (index_mark != index_range.begin || !granules_filled || last_index_mark != index_range.begin) @@ -1770,10 +1793,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex( } if (!condition->mayBeTrueOnGranule(granules)) - { - ++granules_dropped; continue; - } MarkRange data_range( std::max(range.begin, index_mark * index_granularity), diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index a5dcbfe6650..d5d8107db48 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -93,7 +93,6 @@ private: const MarkRanges & ranges, const Settings & settings, const MergeTreeReaderSettings & reader_settings, - size_t & granules_dropped, MarkCache * mark_cache, UncompressedCache * uncompressed_cache, Poco::Logger * log); @@ -105,8 +104,6 @@ private: const MarkRanges & ranges, const Settings & settings, const MergeTreeReaderSettings & reader_settings, - size_t & total_granules, - size_t & granules_dropped, MarkCache * mark_cache, UncompressedCache * uncompressed_cache, Poco::Logger * log); @@ -173,6 +170,12 @@ public: const ASTPtr & query, ContextPtr context); + static std::optional> filterPartsByVirtualColumns( + const MergeTreeData & data, + const MergeTreeData::DataPartsVector & parts, + const ActionsDAGPtr & filter_dag, + ContextPtr context); + /// Filter parts using minmax index and partition key. static void filterPartsByPartition( std::optional & partition_pruner, diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index ea5d64212f5..d6ba7cb3093 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -39,6 +39,7 @@ namespace ProfileEvents extern const Event MergeTreeDataProjectionWriterRows; extern const Event MergeTreeDataProjectionWriterUncompressedBytes; extern const Event MergeTreeDataProjectionWriterCompressedBytes; + extern const Event RejectedInserts; } namespace DB @@ -58,7 +59,8 @@ void buildScatterSelector( const ColumnRawPtrs & columns, PODArray & partition_num_to_first_row, IColumn::Selector & selector, - size_t max_parts) + size_t max_parts, + ContextPtr context) { /// Use generic hashed variant since partitioning is unlikely to be a bottleneck. using Data = HashMap; @@ -66,6 +68,8 @@ void buildScatterSelector( size_t num_rows = columns[0]->size(); size_t partitions_count = 0; + size_t throw_on_limit = context->getSettingsRef().throw_on_max_partitions_per_insert_block; + for (size_t i = 0; i < num_rows; ++i) { Data::key_type key = hash128(i, columns.size(), columns); @@ -75,7 +79,9 @@ void buildScatterSelector( if (inserted) { - if (max_parts && partitions_count >= max_parts) + if (max_parts && partitions_count >= max_parts && throw_on_limit) + { + ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception(ErrorCodes::TOO_MANY_PARTS, "Too many partitions for single INSERT block (more than {}). " "The limit is controlled by 'max_partitions_per_insert_block' setting. " @@ -85,6 +91,7 @@ void buildScatterSelector( "for a table is under 1000..10000. Please note, that partitioning is not intended " "to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). " "Partitions are intended for data manipulation (DROP PARTITION, etc).", max_parts); + } partition_num_to_first_row.push_back(i); it->getMapped() = partitions_count; @@ -102,6 +109,18 @@ void buildScatterSelector( if (partitions_count > 1) selector[i] = it->getMapped(); } + // Checking partitions per insert block again here outside the loop above + // so we can log the total number of partitions that would have parts created + if (max_parts && partitions_count >= max_parts && !throw_on_limit) + { + const auto & client_info = context->getClientInfo(); + Poco::Logger * log = &Poco::Logger::get("MergeTreeDataWriter"); + + LOG_WARNING(log, "INSERT query from initial_user {} (query ID: {}) inserted a block " + "that created parts in {} partitions. This is being logged " + "rather than throwing an exception as throw_on_max_partitions_per_insert_block=false.", + client_info.initial_user, client_info.initial_query_id, partitions_count); + } } /// Computes ttls and updates ttl infos @@ -240,7 +259,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( PODArray partition_num_to_first_row; IColumn::Selector selector; - buildScatterSelector(partition_columns, partition_num_to_first_row, selector, max_parts); + buildScatterSelector(partition_columns, partition_num_to_first_row, selector, max_parts, context); auto async_insert_info_with_partition = scatterAsyncInsertInfoBySelector(async_insert_info, selector, partition_num_to_first_row.size()); diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 09a04f13fc7..548b61ce422 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -10,9 +10,16 @@ #include #include +#include + namespace DB { +namespace ErrorCodes +{ + extern const int ABORTED; +} + namespace { @@ -231,6 +238,11 @@ std::pair MergeTreeDeduplicationLog::addPart(const std: return std::make_pair(info, false); } + if (stopped) + { + throw Exception(ErrorCodes::ABORTED, "Storage has been shutdown when we add this part."); + } + chassert(current_writer != nullptr); /// Create new record @@ -261,6 +273,11 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_inf if (deduplication_window == 0) return; + if (stopped) + { + throw Exception(ErrorCodes::ABORTED, "Storage has been shutdown when we drop this part."); + } + chassert(current_writer != nullptr); for (auto itr = deduplication_map.begin(); itr != deduplication_map.end(); /* no increment here, we erasing from map */) diff --git a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp deleted file mode 100644 index 2676adfac2d..00000000000 --- a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp +++ /dev/null @@ -1,76 +0,0 @@ -#include -#include "Storages/MergeTree/RangesInDataPart.h" -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int MEMORY_LIMIT_EXCEEDED; -} - -bool MergeTreeInOrderSelectAlgorithm::getNewTaskImpl() -try -{ - if (all_mark_ranges.empty()) - return false; - - if (!reader) - initializeReaders(); - - MarkRanges mark_ranges_for_task; - - if (!pool) - { - /// If we need to read few rows, set one range per task to reduce number of read data. - if (has_limit_below_one_block) - { - mark_ranges_for_task = MarkRanges{}; - mark_ranges_for_task.emplace_front(std::move(all_mark_ranges.front())); - all_mark_ranges.pop_front(); - } - else - { - mark_ranges_for_task = std::move(all_mark_ranges); - all_mark_ranges.clear(); - } - } - else - { - auto description = RangesInDataPartDescription{ - .info = data_part->info, - /// We just ignore all the distribution done before - /// Everything will be done on coordinator side - .ranges = {}, - }; - - mark_ranges_for_task = pool->getNewTask(description); - - if (mark_ranges_for_task.empty()) - return false; - } - - auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr - : getSizePredictor(data_part, task_columns, sample_block); - - task = std::make_unique( - data_part, - alter_conversions, - mark_ranges_for_task, - part_index_in_query, - column_name_set, - task_columns, - std::move(size_predictor)); - - return true; -} -catch (...) -{ - /// Suspicion of the broken part. A part is added to the queue for verification. - if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED) - storage.reportBrokenPart(data_part); - throw; -} - -} diff --git a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h deleted file mode 100644 index da332c64232..00000000000 --- a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once -#include -#include - -namespace DB -{ - - -/// Used to read data from single part with select query in order of primary key. -/// Cares about PREWHERE, virtual columns, indexes etc. -/// To read data from multiple parts, Storage (MergeTree) creates multiple such objects. -class MergeTreeInOrderSelectAlgorithm final : public MergeTreeSelectAlgorithm -{ -public: - template - explicit MergeTreeInOrderSelectAlgorithm(Args &&... args) - : MergeTreeSelectAlgorithm{std::forward(args)...} - { - LOG_TRACE(log, "Reading {} ranges in order from part {}, approx. {} rows starting from {}", - all_mark_ranges.size(), data_part->name, total_rows, - data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin)); - } - - String getName() const override { return "MergeTreeInOrder"; } - -private: - bool getNewTaskImpl() override; - void finalizeNewTask() override {} - - Poco::Logger * log = &Poco::Logger::get("MergeTreeInOrderSelectProcessor"); -}; - -} diff --git a/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp index ef98accfbc6..c69c54f1c0d 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes MergeTreeIndexAggregatorBloomFilter::MergeTreeIndexAggregatorBloomFilter( size_t bits_per_row_, size_t hash_functions_, const Names & columns_name_) - : bits_per_row(bits_per_row_), hash_functions(hash_functions_), index_columns_name(columns_name_) + : bits_per_row(bits_per_row_), hash_functions(hash_functions_), index_columns_name(columns_name_), column_hashes(columns_name_.size()) { assert(bits_per_row != 0); assert(hash_functions != 0); @@ -32,9 +32,9 @@ bool MergeTreeIndexAggregatorBloomFilter::empty() const MergeTreeIndexGranulePtr MergeTreeIndexAggregatorBloomFilter::getGranuleAndReset() { - const auto granule = std::make_shared(bits_per_row, hash_functions, total_rows, granule_index_blocks); + const auto granule = std::make_shared(bits_per_row, hash_functions, column_hashes); total_rows = 0; - granule_index_blocks.clear(); + column_hashes.clear(); return granule; } @@ -47,17 +47,19 @@ void MergeTreeIndexAggregatorBloomFilter::update(const Block & block, size_t * p Block granule_index_block; size_t max_read_rows = std::min(block.rows() - *pos, limit); - for (const auto & index_column_name : index_columns_name) + for (size_t column = 0; column < index_columns_name.size(); ++column) { - const auto & column_and_type = block.getByName(index_column_name); + const auto & column_and_type = block.getByName(index_columns_name[column]); auto index_column = BloomFilterHash::hashWithColumn(column_and_type.type, column_and_type.column, *pos, max_read_rows); - granule_index_block.insert({index_column, std::make_shared(), column_and_type.name}); + const auto & index_col = checkAndGetColumn(index_column.get()); + const auto & index_data = index_col->getData(); + for (const auto & hash: index_data) + column_hashes[column].insert(hash); } *pos += max_read_rows; total_rows += max_read_rows; - granule_index_blocks.push_back(granule_index_block); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h index 9877db8ee30..d20653b7689 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -22,8 +23,8 @@ private: size_t hash_functions; const Names index_columns_name; + std::vector> column_hashes; size_t total_rows = 0; - Blocks granule_index_blocks; }; } diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index f77cfe4fed0..f6e3f310ad9 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -25,7 +25,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } - template AnnoyIndexWithSerialization::AnnoyIndexWithSerialization(size_t dimensions) : Base::AnnoyIndex(dimensions) @@ -167,7 +166,8 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t if (offsets[i + 1] - offsets[i] != size) throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name); - index = std::make_shared>(size); + if (!index) + index = std::make_shared>(size); /// Add all rows of block index->add_item(index->get_n_items(), array.data()); @@ -190,7 +190,8 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t if (data.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Tuple has 0 rows, {} rows expected", rows_read); - index = std::make_shared>(data[0].size()); + if (!index) + index = std::make_shared>(data[0].size()); for (const auto & item : data) index->add_item(index->get_n_items(), item.data()); @@ -224,9 +225,9 @@ bool MergeTreeIndexConditionAnnoy::alwaysUnknownOrTrue() const std::vector MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const { - if (distance_function == "L2Distance") + if (distance_function == DISTANCE_FUNCTION_L2) return getUsefulRangesImpl(idx_granule); - else if (distance_function == "cosineDistance") + else if (distance_function == DISTANCE_FUNCTION_COSINE) return getUsefulRangesImpl(idx_granule); std::unreachable(); } @@ -289,9 +290,9 @@ MergeTreeIndexAnnoy::MergeTreeIndexAnnoy(const IndexDescription & index_, UInt64 MergeTreeIndexGranulePtr MergeTreeIndexAnnoy::createIndexGranule() const { - if (distance_function == "L2Distance") + if (distance_function == DISTANCE_FUNCTION_L2) return std::make_shared>(index.name, index.sample_block); - else if (distance_function == "cosineDistance") + else if (distance_function == DISTANCE_FUNCTION_COSINE) return std::make_shared>(index.name, index.sample_block); std::unreachable(); } @@ -299,9 +300,9 @@ MergeTreeIndexGranulePtr MergeTreeIndexAnnoy::createIndexGranule() const MergeTreeIndexAggregatorPtr MergeTreeIndexAnnoy::createIndexAggregator() const { /// TODO: Support more metrics. Available metrics: https://github.com/spotify/annoy/blob/master/src/annoymodule.cc#L151-L171 - if (distance_function == "L2Distance") + if (distance_function == DISTANCE_FUNCTION_L2) return std::make_shared>(index.name, index.sample_block, trees); - else if (distance_function == "cosineDistance") + else if (distance_function == DISTANCE_FUNCTION_COSINE) return std::make_shared>(index.name, index.sample_block, trees); std::unreachable(); } @@ -313,14 +314,13 @@ MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const Selec MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index) { - static constexpr auto default_trees = 100uz; - static constexpr auto default_distance_function = "L2Distance"; - - String distance_function = default_distance_function; + static constexpr auto DEFAULT_DISTANCE_FUNCTION = DISTANCE_FUNCTION_L2; + String distance_function = DEFAULT_DISTANCE_FUNCTION; if (!index.arguments.empty()) distance_function = index.arguments[0].get(); - UInt64 trees = default_trees; + static constexpr auto DEFAULT_TREES = 100uz; + UInt64 trees = DEFAULT_TREES; if (index.arguments.size() > 1) trees = index.arguments[1].get(); @@ -350,8 +350,8 @@ void annoyIndexValidator(const IndexDescription & index, bool /* attach */) if (!index.arguments.empty()) { String distance_name = index.arguments[0].get(); - if (distance_name != "L2Distance" && distance_name != "cosineDistance") - throw Exception(ErrorCodes::INCORRECT_DATA, "Annoy index only supports distance functions 'L2Distance' and 'cosineDistance'"); + if (distance_name != DISTANCE_FUNCTION_L2 && distance_name != DISTANCE_FUNCTION_COSINE) + throw Exception(ErrorCodes::INCORRECT_DATA, "Annoy index only supports distance functions '{}' and '{}'", DISTANCE_FUNCTION_L2, DISTANCE_FUNCTION_COSINE); } /// Check data type of indexed column: diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h index cfc3b7519b8..d2765f10a22 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h @@ -25,6 +25,7 @@ public: template using AnnoyIndexWithSerializationPtr = std::shared_ptr>; + template struct MergeTreeIndexGranuleAnnoy final : public IMergeTreeIndexGranule { @@ -43,6 +44,7 @@ struct MergeTreeIndexGranuleAnnoy final : public IMergeTreeIndexGranule AnnoyIndexWithSerializationPtr index; }; + template struct MergeTreeIndexAggregatorAnnoy final : IMergeTreeIndexAggregator { @@ -104,7 +106,6 @@ private: const String distance_function; }; - } #endif diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index b6a2cafe245..754340352dc 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -737,7 +737,7 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool /*attach*/) data_type = WhichDataType(low_cardinality.getDictionaryType()); } - if (!data_type.isString() && !data_type.isFixedString()) + if (!data_type.isString() && !data_type.isFixedString() && !data_type.isIPv6()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Ngram and token bloom filter indexes can only be used with column types `String`, `FixedString`, `LowCardinality(String)`, `LowCardinality(FixedString)`, `Array(String)` or `Array(FixedString)`"); } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp index 267708b5312..7db3aa3a6b1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp @@ -8,7 +8,6 @@ #include #include - namespace DB { namespace ErrorCodes @@ -16,21 +15,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -static void assertGranuleBlocksStructure(const Blocks & granule_index_blocks) -{ - Block prev_block; - for (size_t index = 0; index < granule_index_blocks.size(); ++index) - { - const Block & granule_index_block = granule_index_blocks[index]; - - if (index != 0) - assertBlocksHaveEqualStructure(prev_block, granule_index_block, "Granule blocks of bloom filter has difference structure."); - - prev_block = granule_index_block; - } -} - - MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter(size_t bits_per_row_, size_t hash_functions_, size_t index_columns_) : bits_per_row(bits_per_row_), hash_functions(hash_functions_) { @@ -39,42 +23,28 @@ MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter(size_t bits_p } MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter( - size_t bits_per_row_, size_t hash_functions_, size_t total_rows_, const Blocks & granule_index_blocks_) - : total_rows(total_rows_), bits_per_row(bits_per_row_), hash_functions(hash_functions_) + size_t bits_per_row_, size_t hash_functions_, const std::vector>& column_hashes_) + : bits_per_row(bits_per_row_), hash_functions(hash_functions_), bloom_filters(column_hashes_.size()) { - if (granule_index_blocks_.empty() || !total_rows) - throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: granule_index_blocks empty or total_rows is zero."); + if (column_hashes_.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule_index_blocks empty or total_rows is zero."); - assertGranuleBlocksStructure(granule_index_blocks_); + size_t bloom_filter_max_size = 0; + for (const auto & column_hash : column_hashes_) + bloom_filter_max_size = std::max(bloom_filter_max_size, column_hash.size()); - for (size_t index = 0; index < granule_index_blocks_.size(); ++index) + static size_t atom_size = 8; + + // If multiple columns are given, we will initialize all the bloom filters + // with the size of the highest-cardinality one. This is done for compatibility with + // existing binary serialization format + total_rows = bloom_filter_max_size; + size_t bytes_size = (bits_per_row * total_rows + atom_size - 1) / atom_size; + + for (size_t column = 0, columns = column_hashes_.size(); column < columns; ++column) { - Block granule_index_block = granule_index_blocks_[index]; - - if (unlikely(!granule_index_block || !granule_index_block.rows())) - throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: granule_index_block is empty."); - - if (index == 0) - { - static size_t atom_size = 8; - - for (size_t column = 0, columns = granule_index_block.columns(); column < columns; ++column) - { - size_t total_items = total_rows; - - if (const auto * array_col = typeid_cast(granule_index_block.getByPosition(column).column.get())) - { - const IColumn * nested_col = array_col->getDataPtr().get(); - total_items = nested_col->size(); - } - - size_t bytes_size = (bits_per_row * total_items + atom_size - 1) / atom_size; - bloom_filters.emplace_back(std::make_shared(bytes_size, hash_functions, 0)); - } - } - - for (size_t column = 0, columns = granule_index_block.columns(); column < columns; ++column) - fillingBloomFilter(bloom_filters[column], granule_index_block, column); + bloom_filters[column] = std::make_shared(bytes_size, hash_functions, 0); + fillingBloomFilter(bloom_filters[column], column_hashes_[column]); } } @@ -123,18 +93,11 @@ void MergeTreeIndexGranuleBloomFilter::serializeBinary(WriteBuffer & ostr) const } } -void MergeTreeIndexGranuleBloomFilter::fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column) const +void MergeTreeIndexGranuleBloomFilter::fillingBloomFilter(BloomFilterPtr & bf, const HashSet &hashes) const { - const auto & column = granule_index_block.getByPosition(index_hash_column); - - if (const auto * hash_column = typeid_cast(column.column.get())) - { - const auto & hash_column_vec = hash_column->getData(); - - for (const auto & bf_base_hash : hash_column_vec) - for (size_t i = 0; i < hash_functions; ++i) - bf->addHashWithSeed(bf_base_hash, BloomFilterHash::bf_hash_seed[i]); - } + for (const auto & bf_base_hash : hashes) + for (size_t i = 0; i < hash_functions; ++i) + bf->addHashWithSeed(bf_base_hash.getKey(), BloomFilterHash::bf_hash_seed[i]); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h index 82bd91138a7..35335f5d0d2 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -11,7 +12,7 @@ class MergeTreeIndexGranuleBloomFilter final : public IMergeTreeIndexGranule public: MergeTreeIndexGranuleBloomFilter(size_t bits_per_row_, size_t hash_functions_, size_t index_columns_); - MergeTreeIndexGranuleBloomFilter(size_t bits_per_row_, size_t hash_functions_, size_t total_rows_, const Blocks & granule_index_blocks_); + MergeTreeIndexGranuleBloomFilter(size_t bits_per_row_, size_t hash_functions_, const std::vector> & column_hashes); bool empty() const override; @@ -21,12 +22,12 @@ public: const std::vector & getFilters() const { return bloom_filters; } private: - size_t total_rows; + size_t total_rows = 0; size_t bits_per_row; size_t hash_functions; std::vector bloom_filters; - void fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column) const; + void fillingBloomFilter(BloomFilterPtr & bf, const HashSet & hashes) const; }; diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp index 6b4919c545d..325df6ffb6f 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp @@ -491,6 +491,10 @@ bool MergeTreeConditionInverted::traverseASTEquals( DataTypePtr const_type; if (argument.tryGetConstant(const_value, const_type)) { + auto const_data_type = WhichDataType(const_type); + if (!const_data_type.isStringOrFixedString() && !const_data_type.isArray()) + return false; + key_column_num = header.getPositionByName(map_keys_index_column_name); key_exists = true; } diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 9c34a149128..9868d4f37f3 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -256,10 +256,6 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( if (!key_columns.contains(name)) key_columns.insert(name); - ASTPtr ast_filter_node = buildFilterNode(query_info.query); - if (!ast_filter_node) - return; - if (context->getSettingsRef().allow_experimental_analyzer) { if (!query_info.filter_actions_dag) @@ -280,6 +276,10 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( } else { + ASTPtr ast_filter_node = buildFilterNode(query_info.query); + if (!ast_filter_node) + return; + if (checkASTUseless(ast_filter_node)) return; @@ -457,8 +457,10 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const Actio if (arguments_size != 1) return nullptr; + const ActionsDAG::Node * argument = &traverseDAG(*arguments[0], result_dag, context, node_to_result_node); + auto bit_swap_last_two_function = FunctionFactory::instance().get("__bitSwapLastTwo", context); - return &result_dag->addFunction(bit_swap_last_two_function, {arguments[0]}, {}); + return &result_dag->addFunction(bit_swap_last_two_function, {argument}, {}); } else if (function_name == "and" || function_name == "indexHint" || function_name == "or") { @@ -612,6 +614,9 @@ bool MergeTreeIndexConditionSet::operatorFromAST(ASTPtr & node) } else if (func->name == "and" || func->name == "indexHint") { + if (args.size() < 2) + return false; + auto last_arg = args.back(); args.pop_back(); @@ -631,6 +636,9 @@ bool MergeTreeIndexConditionSet::operatorFromAST(ASTPtr & node) } else if (func->name == "or") { + if (args.size() < 2) + return false; + auto last_arg = args.back(); args.pop_back(); diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp new file mode 100644 index 00000000000..70e2b8f76df --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp @@ -0,0 +1,375 @@ +#ifdef ENABLE_USEARCH + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpass-failed" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_ALLOCATE_MEMORY; + extern const int ILLEGAL_COLUMN; + extern const int INCORRECT_DATA; + extern const int INCORRECT_NUMBER_OF_COLUMNS; + extern const int INCORRECT_QUERY; + extern const int LOGICAL_ERROR; +} + +template +USearchIndexWithSerialization::USearchIndexWithSerialization(size_t dimensions) + : Base(Base::make(unum::usearch::metric_punned_t(dimensions, Metric))) +{ +} + +template +void USearchIndexWithSerialization::serialize(WriteBuffer & ostr) const +{ + auto callback = [&ostr](void * from, size_t n) + { + ostr.write(reinterpret_cast(from), n); + return true; + }; + + Base::save_to_stream(callback); +} + +template +void USearchIndexWithSerialization::deserialize(ReadBuffer & istr) +{ + auto callback = [&istr](void * from, size_t n) + { + istr.readStrict(reinterpret_cast(from), n); + return true; + }; + + Base::load_from_stream(callback); +} + +template +size_t USearchIndexWithSerialization::getDimensions() const +{ + return Base::dimensions(); +} + +template +MergeTreeIndexGranuleUSearch::MergeTreeIndexGranuleUSearch( + const String & index_name_, + const Block & index_sample_block_) + : index_name(index_name_) + , index_sample_block(index_sample_block_) + , index(nullptr) +{ +} + +template +MergeTreeIndexGranuleUSearch::MergeTreeIndexGranuleUSearch( + const String & index_name_, + const Block & index_sample_block_, + USearchIndexWithSerializationPtr index_) + : index_name(index_name_) + , index_sample_block(index_sample_block_) + , index(std::move(index_)) +{ +} + +template +void MergeTreeIndexGranuleUSearch::serializeBinary(WriteBuffer & ostr) const +{ + /// Number of dimensions is required in the index constructor, + /// so it must be written and read separately from the other part + writeIntBinary(static_cast(index->getDimensions()), ostr); // write dimension + index->serialize(ostr); +} + +template +void MergeTreeIndexGranuleUSearch::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/) +{ + UInt64 dimension; + readIntBinary(dimension, istr); + index = std::make_shared>(dimension); + index->deserialize(istr); +} + +template +MergeTreeIndexAggregatorUSearch::MergeTreeIndexAggregatorUSearch( + const String & index_name_, + const Block & index_sample_block_) + : index_name(index_name_) + , index_sample_block(index_sample_block_) +{ +} + +template +MergeTreeIndexGranulePtr MergeTreeIndexAggregatorUSearch::getGranuleAndReset() +{ + auto granule = std::make_shared>(index_name, index_sample_block, index); + index = nullptr; + return granule; +} + +template +void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t * pos, size_t limit) +{ + if (*pos >= block.rows()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "The provided position is not less than the number of block rows. Position: {}, Block rows: {}.", + *pos, + block.rows()); + + size_t rows_read = std::min(limit, block.rows() - *pos); + if (rows_read == 0) + return; + + if (index_sample_block.columns() > 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected block with single column"); + + const String & index_column_name = index_sample_block.getByPosition(0).name; + ColumnPtr column_cut = block.getByName(index_column_name).column->cut(*pos, rows_read); + + if (const auto & column_array = typeid_cast(column_cut.get())) + { + const auto & data = column_array->getData(); + const auto & array = typeid_cast(data).getData(); + + if (array.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Array has 0 rows, {} rows expected", rows_read); + + const auto & offsets = column_array->getOffsets(); + const size_t num_rows = offsets.size(); + + + /// Check all sizes are the same + size_t size = offsets[0]; + for (size_t i = 0; i < num_rows - 1; ++i) + if (offsets[i + 1] - offsets[i] != size) + throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name); + + if (!index) + index = std::make_shared>(size); + + /// Add all rows of block + if (!index->reserve(unum::usearch::ceil2(index->size() + num_rows))) + throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for usearch index"); + + if (auto rc = index->add(index->size(), array.data()); !rc) + throw Exception(ErrorCodes::INCORRECT_DATA, rc.error.release()); + for (size_t current_row = 1; current_row < num_rows; ++current_row) + if (auto rc = index->add(index->size(), &array[offsets[current_row - 1]]); !rc) + throw Exception(ErrorCodes::INCORRECT_DATA, rc.error.release()); + + } + else if (const auto & column_tuple = typeid_cast(column_cut.get())) + { + const auto & columns = column_tuple->getColumns(); + std::vector> data{column_tuple->size(), std::vector()}; + for (const auto & column : columns) + { + const auto & pod_array = typeid_cast(column.get())->getData(); + for (size_t i = 0; i < pod_array.size(); ++i) + data[i].push_back(pod_array[i]); + } + + if (data.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Tuple has 0 rows, {} rows expected", rows_read); + + if (!index) + index = std::make_shared>(data[0].size()); + + if (!index->reserve(unum::usearch::ceil2(index->size() + data.size()))) + throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for usearch index"); + + for (const auto & item : data) + if (auto rc = index->add(index->size(), item.data()); !rc) + throw Exception(ErrorCodes::INCORRECT_DATA, rc.error.release()); + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected Array or Tuple column"); + + *pos += rows_read; +} + +MergeTreeIndexConditionUSearch::MergeTreeIndexConditionUSearch( + const IndexDescription & /*index_description*/, + const SelectQueryInfo & query, + const String & distance_function_, + ContextPtr context) + : ann_condition(query, context) + , distance_function(distance_function_) +{ +} + +bool MergeTreeIndexConditionUSearch::mayBeTrueOnGranule(MergeTreeIndexGranulePtr /*idx_granule*/) const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "mayBeTrueOnGranule is not supported for ANN skip indexes"); +} + +bool MergeTreeIndexConditionUSearch::alwaysUnknownOrTrue() const +{ + return ann_condition.alwaysUnknownOrTrue(distance_function); +} + +std::vector MergeTreeIndexConditionUSearch::getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const +{ + if (distance_function == DISTANCE_FUNCTION_L2) + return getUsefulRangesImpl(idx_granule); + else if (distance_function == DISTANCE_FUNCTION_COSINE) + return getUsefulRangesImpl(idx_granule); + std::unreachable(); +} + +template +std::vector MergeTreeIndexConditionUSearch::getUsefulRangesImpl(MergeTreeIndexGranulePtr idx_granule) const +{ + const UInt64 limit = ann_condition.getLimit(); + const UInt64 index_granularity = ann_condition.getIndexGranularity(); + const std::optional comparison_distance = ann_condition.getQueryType() == ApproximateNearestNeighborInformation::Type::Where + ? std::optional(ann_condition.getComparisonDistanceForWhereQuery()) + : std::nullopt; + + if (comparison_distance && comparison_distance.value() < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to optimize query with where without distance"); + + const std::vector reference_vector = ann_condition.getReferenceVector(); + + const auto granule = std::dynamic_pointer_cast>(idx_granule); + if (granule == nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule has the wrong type"); + + const USearchIndexWithSerializationPtr index = granule->index; + + if (ann_condition.getDimensions() != index->dimensions()) + throw Exception(ErrorCodes::INCORRECT_QUERY, "The dimension of the space in the request ({}) " + "does not match the dimension in the index ({})", + ann_condition.getDimensions(), index->dimensions()); + + auto result = index->search(reference_vector.data(), limit); + std::vector neighbors(result.size()); /// indexes of dots which were closest to the reference vector + std::vector distances(result.size()); + result.dump_to(neighbors.data(), distances.data()); + + std::vector granule_numbers; + granule_numbers.reserve(neighbors.size()); + for (size_t i = 0; i < neighbors.size(); ++i) + { + if (comparison_distance && distances[i] > comparison_distance) + continue; + granule_numbers.push_back(neighbors[i] / index_granularity); + } + + /// make unique + std::sort(granule_numbers.begin(), granule_numbers.end()); + granule_numbers.erase(std::unique(granule_numbers.begin(), granule_numbers.end()), granule_numbers.end()); + + return granule_numbers; +} + +MergeTreeIndexUSearch::MergeTreeIndexUSearch(const IndexDescription & index_, const String & distance_function_) + : IMergeTreeIndex(index_) + , distance_function(distance_function_) +{ +} + +MergeTreeIndexGranulePtr MergeTreeIndexUSearch::createIndexGranule() const +{ + if (distance_function == DISTANCE_FUNCTION_L2) + return std::make_shared>(index.name, index.sample_block); + else if (distance_function == DISTANCE_FUNCTION_COSINE) + return std::make_shared>(index.name, index.sample_block); + std::unreachable(); +} + +MergeTreeIndexAggregatorPtr MergeTreeIndexUSearch::createIndexAggregator() const +{ + if (distance_function == DISTANCE_FUNCTION_L2) + return std::make_shared>(index.name, index.sample_block); + else if (distance_function == DISTANCE_FUNCTION_COSINE) + return std::make_shared>(index.name, index.sample_block); + std::unreachable(); +} + +MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const +{ + return std::make_shared(index, query, distance_function, context); +}; + +MergeTreeIndexPtr usearchIndexCreator(const IndexDescription & index) +{ + static constexpr auto default_distance_function = DISTANCE_FUNCTION_L2; + String distance_function = default_distance_function; + if (!index.arguments.empty()) + distance_function = index.arguments[0].get(); + + return std::make_shared(index, distance_function); +} + +void usearchIndexValidator(const IndexDescription & index, bool /* attach */) +{ + /// Check number and type of USearch index arguments: + + if (index.arguments.size() > 1) + throw Exception(ErrorCodes::INCORRECT_QUERY, "USearch index must not have more than one parameters"); + + if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::String) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Distance function argument of USearch index must be of type String"); + + /// Check that the index is created on a single column + + if (index.column_names.size() != 1 || index.data_types.size() != 1) + throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "USearch indexes must be created on a single column"); + + /// Check that a supported metric was passed as first argument + + if (!index.arguments.empty()) + { + String distance_name = index.arguments[0].get(); + if (distance_name != DISTANCE_FUNCTION_L2 && distance_name != DISTANCE_FUNCTION_COSINE) + throw Exception(ErrorCodes::INCORRECT_DATA, "USearch index only supports distance functions '{}' and '{}'", DISTANCE_FUNCTION_L2, DISTANCE_FUNCTION_COSINE); + } + + /// Check data type of indexed column: + + auto throw_unsupported_underlying_column_exception = []() + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, "USearch indexes can only be created on columns of type Array(Float32) and Tuple(Float32)"); + }; + + DataTypePtr data_type = index.sample_block.getDataTypes()[0]; + + if (const auto * data_type_array = typeid_cast(data_type.get())) + { + TypeIndex nested_type_index = data_type_array->getNestedType()->getTypeId(); + if (!WhichDataType(nested_type_index).isFloat32()) + throw_unsupported_underlying_column_exception(); + } + else if (const auto * data_type_tuple = typeid_cast(data_type.get())) + { + const DataTypes & inner_types = data_type_tuple->getElements(); + for (const auto & inner_type : inner_types) + { + TypeIndex nested_type_index = inner_type->getTypeId(); + if (!WhichDataType(nested_type_index).isFloat32()) + throw_unsupported_underlying_column_exception(); + } + } + else + throw_unsupported_underlying_column_exception(); +} + +} + +#endif diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h new file mode 100644 index 00000000000..98fb05b6f1a --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h @@ -0,0 +1,111 @@ +#pragma once + +#ifdef ENABLE_USEARCH + +#include + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpass-failed" +#include +#pragma clang diagnostic pop + +namespace DB +{ + +template +class USearchIndexWithSerialization : public unum::usearch::index_dense_t +{ + using Base = unum::usearch::index_dense_t; + +public: + explicit USearchIndexWithSerialization(size_t dimensions); + void serialize(WriteBuffer & ostr) const; + void deserialize(ReadBuffer & istr); + size_t getDimensions() const; +}; + +template +using USearchIndexWithSerializationPtr = std::shared_ptr>; + + +template +struct MergeTreeIndexGranuleUSearch final : public IMergeTreeIndexGranule +{ + MergeTreeIndexGranuleUSearch(const String & index_name_, const Block & index_sample_block_); + MergeTreeIndexGranuleUSearch(const String & index_name_, const Block & index_sample_block_, USearchIndexWithSerializationPtr index_); + + ~MergeTreeIndexGranuleUSearch() override = default; + + void serializeBinary(WriteBuffer & ostr) const override; + void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override; + + bool empty() const override { return !index.get(); } + + const String index_name; + const Block index_sample_block; + USearchIndexWithSerializationPtr index; +}; + + +template +struct MergeTreeIndexAggregatorUSearch final : IMergeTreeIndexAggregator +{ + MergeTreeIndexAggregatorUSearch(const String & index_name_, const Block & index_sample_block); + ~MergeTreeIndexAggregatorUSearch() override = default; + + bool empty() const override { return !index || index->size() == 0; } + MergeTreeIndexGranulePtr getGranuleAndReset() override; + void update(const Block & block, size_t * pos, size_t limit) override; + + const String index_name; + const Block index_sample_block; + USearchIndexWithSerializationPtr index; +}; + + +class MergeTreeIndexConditionUSearch final : public IMergeTreeIndexConditionApproximateNearestNeighbor +{ +public: + MergeTreeIndexConditionUSearch( + const IndexDescription & index_description, + const SelectQueryInfo & query, + const String & distance_function, + ContextPtr context); + + ~MergeTreeIndexConditionUSearch() override = default; + + bool alwaysUnknownOrTrue() const override; + bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override; + std::vector getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const override; + +private: + template + std::vector getUsefulRangesImpl(MergeTreeIndexGranulePtr idx_granule) const; + + const ApproximateNearestNeighborCondition ann_condition; + const String distance_function; +}; + + +class MergeTreeIndexUSearch : public IMergeTreeIndex +{ +public: + MergeTreeIndexUSearch(const IndexDescription & index_, const String & distance_function_); + + ~MergeTreeIndexUSearch() override = default; + + MergeTreeIndexGranulePtr createIndexGranule() const override; + MergeTreeIndexAggregatorPtr createIndexAggregator() const override; + MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const override; + + bool mayBenefitFromIndexForIn(const ASTPtr & /*node*/) const override { return false; } + +private: + const String distance_function; +}; + +} + + +#endif + diff --git a/src/Storages/MergeTree/MergeTreeIndices.cpp b/src/Storages/MergeTree/MergeTreeIndices.cpp index 6ae96d00171..322cdd35afe 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.cpp +++ b/src/Storages/MergeTree/MergeTreeIndices.cpp @@ -132,6 +132,11 @@ MergeTreeIndexFactory::MergeTreeIndexFactory() registerValidator("annoy", annoyIndexValidator); #endif +#ifdef ENABLE_USEARCH + registerCreator("usearch", usearchIndexCreator); + registerValidator("usearch", usearchIndexValidator); +#endif + registerCreator("inverted", invertedIndexCreator); registerValidator("inverted", invertedIndexValidator); diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 1ad6b082223..40128bab9d0 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -238,6 +238,11 @@ MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index); void annoyIndexValidator(const IndexDescription & index, bool attach); #endif +#ifdef ENABLE_USEARCH +MergeTreeIndexPtr usearchIndexCreator(const IndexDescription& index); +void usearchIndexValidator(const IndexDescription& index, bool attach); +#endif + MergeTreeIndexPtr invertedIndexCreator(const IndexDescription& index); void invertedIndexValidator(const IndexDescription& index, bool attach); diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 5c722eec380..c9b22c8a03e 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -160,7 +160,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() size_t granularity; reader->readStrict( reinterpret_cast(plain_marks.data() + i * columns_in_mark), columns_in_mark * sizeof(MarkInCompressedFile)); - readIntBinary(granularity, *reader); + readBinaryLittleEndian(granularity, *reader); } if (!reader->eof()) @@ -170,6 +170,16 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() mrk_path, marks_count, expected_uncompressed_size); } +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + std::ranges::for_each( + plain_marks, + [](auto & plain_mark) + { + plain_mark.offset_in_compressed_file = std::byteswap(plain_mark.offset_in_compressed_file); + plain_mark.offset_in_decompressed_block = std::byteswap(plain_mark.offset_in_decompressed_block); + }); +#endif + auto res = std::make_shared(plain_marks); ProfileEvents::increment(ProfileEvents::LoadedMarksCount, marks_count * columns_in_mark); diff --git a/src/Storages/MergeTree/MergeTreeMetadataCache.cpp b/src/Storages/MergeTree/MergeTreeMetadataCache.cpp deleted file mode 100644 index 32f9fc6ac62..00000000000 --- a/src/Storages/MergeTree/MergeTreeMetadataCache.cpp +++ /dev/null @@ -1,107 +0,0 @@ -#include "MergeTreeMetadataCache.h" - -#if USE_ROCKSDB -#include -#include - -namespace ProfileEvents -{ - extern const Event MergeTreeMetadataCachePut; - extern const Event MergeTreeMetadataCacheGet; - extern const Event MergeTreeMetadataCacheDelete; - extern const Event MergeTreeMetadataCacheSeek; -} - -namespace DB -{ -namespace ErrorCodes -{ - extern const int SYSTEM_ERROR; -} - - -std::unique_ptr MergeTreeMetadataCache::create(const String & dir, size_t size) -{ - assert(size != 0); - rocksdb::Options options; - rocksdb::BlockBasedTableOptions table_options; - rocksdb::DB * db; - - options.create_if_missing = true; - auto cache = rocksdb::NewLRUCache(size); - table_options.block_cache = cache; - options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options)); - rocksdb::Status status = rocksdb::DB::Open(options, dir, &db); - if (status != rocksdb::Status::OK()) - throw Exception( - ErrorCodes::SYSTEM_ERROR, - "Fail to open rocksdb path at: {} status:{}. You can try to remove the cache (this will not affect any table data).", - dir, - status.ToString()); - return std::make_unique(db); -} - -MergeTreeMetadataCache::Status MergeTreeMetadataCache::put(const String & key, const String & value) -{ - auto options = rocksdb::WriteOptions(); - options.sync = true; - options.disableWAL = false; - auto status = rocksdb->Put(options, key, value); - ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCachePut); - return status; -} - -MergeTreeMetadataCache::Status MergeTreeMetadataCache::del(const String & key) -{ - auto options = rocksdb::WriteOptions(); - options.sync = true; - options.disableWAL = false; - auto status = rocksdb->Delete(options, key); - ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheDelete); - LOG_TRACE(log, "Delete key:{} from MergeTreeMetadataCache status:{}", key, status.ToString()); - return status; -} - -MergeTreeMetadataCache::Status MergeTreeMetadataCache::get(const String & key, String & value) -{ - auto status = rocksdb->Get(rocksdb::ReadOptions(), key, &value); - ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheGet); - LOG_TRACE(log, "Get key:{} from MergeTreeMetadataCache status:{}", key, status.ToString()); - return status; -} - -void MergeTreeMetadataCache::getByPrefix(const String & prefix, Strings & keys, Strings & values) -{ - auto * it = rocksdb->NewIterator(rocksdb::ReadOptions()); - rocksdb::Slice target(prefix); - for (it->Seek(target); it->Valid(); it->Next()) - { - const auto key = it->key(); - if (!key.starts_with(target)) - break; - - const auto value = it->value(); - keys.emplace_back(key.data(), key.size()); - values.emplace_back(value.data(), value.size()); - } - LOG_TRACE(log, "Seek with prefix:{} from MergeTreeMetadataCache items:{}", prefix, keys.size()); - ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheSeek); - delete it; -} - -uint64_t MergeTreeMetadataCache::getEstimateNumKeys() const -{ - uint64_t keys = 0; - rocksdb->GetAggregatedIntProperty("rocksdb.estimate-num-keys", &keys); - return keys; -} - -void MergeTreeMetadataCache::shutdown() -{ - rocksdb->Close(); - rocksdb.reset(); -} - -} - -#endif diff --git a/src/Storages/MergeTree/MergeTreeMetadataCache.h b/src/Storages/MergeTree/MergeTreeMetadataCache.h deleted file mode 100644 index 57fb9ed88c4..00000000000 --- a/src/Storages/MergeTree/MergeTreeMetadataCache.h +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_ROCKSDB -#include -#include -#include -#include -#include - -namespace DB -{ -class MergeTreeMetadataCache -{ -public: - using Status = rocksdb::Status; - - static std::unique_ptr create(const String & dir, size_t size); - - explicit MergeTreeMetadataCache(rocksdb::DB * rocksdb_) : rocksdb{rocksdb_} - { - assert(rocksdb); - } - - MergeTreeMetadataCache(const MergeTreeMetadataCache &) = delete; - - MergeTreeMetadataCache & operator=(const MergeTreeMetadataCache &) = delete; - - Status put(const String & key, const String & value); - Status del(const String & key); - Status get(const String & key, String & value); - void getByPrefix(const String & prefix, Strings & keys, Strings & values); - uint64_t getEstimateNumKeys() const; - - void shutdown(); -private: - std::unique_ptr rocksdb; - Poco::Logger * log = &Poco::Logger::get("MergeTreeMetadataCache"); -}; - -using MergeTreeMetadataCachePtr = std::shared_ptr; -} - -#endif diff --git a/src/Storages/MergeTree/MergeTreeMutationStatus.h b/src/Storages/MergeTree/MergeTreeMutationStatus.h index 5f29b777293..b6f62e1218f 100644 --- a/src/Storages/MergeTree/MergeTreeMutationStatus.h +++ b/src/Storages/MergeTree/MergeTreeMutationStatus.h @@ -13,20 +13,20 @@ namespace DB struct MergeTreeMutationStatus { - String id; - String command; + String id = ""; + String command = ""; time_t create_time = 0; - std::map block_numbers; + std::map block_numbers{}; /// Parts that should be mutated/merged or otherwise moved to Obsolete state for this mutation to complete. - Names parts_to_do_names; + Names parts_to_do_names = {}; /// If the mutation is done. Note that in case of ReplicatedMergeTree parts_to_do == 0 doesn't imply is_done == true. bool is_done = false; - String latest_failed_part; + String latest_failed_part = ""; time_t latest_fail_time = 0; - String latest_fail_reason; + String latest_fail_reason = ""; /// FIXME: currently unused, but would be much better to report killed mutations with this flag. bool is_killed = false; diff --git a/src/Storages/MergeTree/MergeTreePartInfo.h b/src/Storages/MergeTree/MergeTreePartInfo.h index a869701ae20..9b14d4a2ace 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/src/Storages/MergeTree/MergeTreePartInfo.h @@ -163,7 +163,8 @@ struct DetachedPartInfo : public MergeTreePartInfo "tmp-fetch", "covered-by-broken", "merge-not-byte-identical", - "mutate-not-byte-identical" + "mutate-not-byte-identical", + "broken-from-backup", }); static constexpr auto DETACHED_REASONS_REMOVABLE_BY_TIMEOUT = std::to_array({ @@ -175,7 +176,8 @@ struct DetachedPartInfo : public MergeTreePartInfo "deleting", "clone", "merge-not-byte-identical", - "mutate-not-byte-identical" + "mutate-not-byte-identical", + "broken-from-backup", }); /// NOTE: It may parse part info incorrectly. diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index bce33438229..ddeaf69136a 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -84,15 +84,7 @@ namespace } void operator() (const UUID & x) const { -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - auto tmp_x = x.toUnderType(); - char * start = reinterpret_cast(&tmp_x); - char * end = start + sizeof(tmp_x); - std::reverse(start, end); - operator()(tmp_x); -#else operator()(x.toUnderType()); -#endif } void operator() (const IPv4 & x) const { @@ -265,12 +257,12 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const for (const Field & field : value) applyVisitor(hashing_visitor, field); - char hash_data[16]; - hash.get128(hash_data); - result.resize(32); - for (size_t i = 0; i < 16; ++i) + const auto hash_data = getSipHash128AsArray(hash); + const auto hash_size = hash_data.size(); + result.resize(hash_size * 2); + for (size_t i = 0; i < hash_size; ++i) #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - writeHexByteLowercase(hash_data[16 - 1 - i], &result[2 * i]); + writeHexByteLowercase(hash_data[hash_size - 1 - i], &result[2 * i]); #else writeHexByteLowercase(hash_data[i], &result[2 * i]); #endif @@ -371,6 +363,12 @@ void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffe const auto & partition_key_sample = metadata_snapshot->getPartitionKey().sample_block; size_t key_size = partition_key_sample.columns(); + // In some cases we create empty parts and then value is empty. + if (value.empty()) + { + writeCString("tuple()", out); + return; + } if (key_size == 0) { writeCString("tuple()", out); diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index 029558883f1..51e4cee19f8 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -208,7 +208,7 @@ bool MergeTreePartsMover::selectPartsForMove( return false; } -MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const MergeTreeMoveEntry & moving_part) const +MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const MergeTreeMoveEntry & moving_part, const WriteSettings & write_settings) const { if (moves_blocker.isCancelled()) throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts."); @@ -248,12 +248,13 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me else { LOG_INFO(log, "Part {} was not fetched, we are the first who move it to another disk, so we will copy it", part->name); - cloned_part_storage = part->getDataPartStorage().clonePart(path_to_clone, part->getDataPartStorage().getPartDirectory(), disk, log); + cloned_part_storage = part->getDataPartStorage().clonePart( + path_to_clone, part->getDataPartStorage().getPartDirectory(), disk, write_settings, log); } } else { - cloned_part_storage = part->makeCloneOnDisk(disk, MergeTreeData::MOVING_DIR_NAME); + cloned_part_storage = part->makeCloneOnDisk(disk, MergeTreeData::MOVING_DIR_NAME, write_settings); } MergeTreeDataPartBuilder builder(*data, part->name, cloned_part_storage); diff --git a/src/Storages/MergeTree/MergeTreePartsMover.h b/src/Storages/MergeTree/MergeTreePartsMover.h index 82fd271ee5f..5dcc364a4e9 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.h +++ b/src/Storages/MergeTree/MergeTreePartsMover.h @@ -65,7 +65,7 @@ public: const std::lock_guard & moving_parts_lock); /// Copies part to selected reservation in detached folder. Throws exception if part already exists. - TemporaryClonedPart clonePart(const MergeTreeMoveEntry & moving_part) const; + TemporaryClonedPart clonePart(const MergeTreeMoveEntry & moving_part, const WriteSettings & write_settings) const; /// Replaces cloned part from detached directory into active data parts set. /// Replacing part changes state to DeleteOnDestroy and will be removed from disk after destructor of diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index fbad7d2f7be..75f2fd26600 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -14,10 +13,10 @@ #include #include - namespace ProfileEvents { extern const Event MergeTreePrefetchedReadPoolInit; + extern const Event WaitPrefetchTaskMicroseconds; } namespace DB @@ -29,145 +28,130 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +bool MergeTreePrefetchedReadPool::TaskHolder::operator<(const TaskHolder & other) const +{ + chassert(task->priority >= 0); + chassert(other.task->priority >= 0); + /// With default std::priority_queue, top() returns largest element. + /// So closest to 0 will be on top with this comparator. + return task->priority > other.task->priority; /// Less is better. +} + +MergeTreePrefetchedReadPool::PrefetechedReaders::PrefetechedReaders( + MergeTreeReadTask::Readers readers_, + Priority priority_, + MergeTreePrefetchedReadPool & pool_) + : is_valid(true) + , readers(std::move(readers_)) +{ + prefetch_futures.push_back(pool_.createPrefetchedFuture(readers.main.get(), priority_)); + + for (const auto & reader : readers.prewhere) + prefetch_futures.push_back(pool_.createPrefetchedFuture(reader.get(), priority_)); +} + +void MergeTreePrefetchedReadPool::PrefetechedReaders::wait() +{ + ProfileEventTimeIncrement watch(ProfileEvents::WaitPrefetchTaskMicroseconds); + for (auto & prefetch_future : prefetch_futures) + prefetch_future.wait(); +} + +MergeTreeReadTask::Readers MergeTreePrefetchedReadPool::PrefetechedReaders::get() +{ + SCOPE_EXIT({ is_valid = false; }); + ProfileEventTimeIncrement watch(ProfileEvents::WaitPrefetchTaskMicroseconds); + + /// First wait for completion of all futures. + for (auto & prefetch_future : prefetch_futures) + prefetch_future.wait(); + + /// Then rethrow first exception if any. + for (auto & prefetch_future : prefetch_futures) + prefetch_future.get(); + + return std::move(readers); +} + MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( - size_t threads, - size_t sum_marks_, - size_t min_marks_for_concurrent_read_, RangesInDataParts && parts_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, + const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - size_t preferred_block_size_bytes_, - const MergeTreeReaderSettings & reader_settings_, - ContextPtr context_, - bool use_uncompressed_cache_, - bool is_remote_read_, - const MergeTreeSettings & storage_settings_) - : WithContext(context_) - , log(&Poco::Logger::get("MergeTreePrefetchedReadPool(" + (parts_.empty() ? "" : parts_.front().data_part->storage.getStorageID().getNameForLogs()) + ")")) - , header(storage_snapshot_->getSampleBlockForColumns(column_names_)) - , mark_cache(context_->getGlobalContext()->getMarkCache().get()) - , uncompressed_cache(use_uncompressed_cache_ ? context_->getGlobalContext()->getUncompressedCache().get() : nullptr) - , profile_callback([this](ReadBufferFromFileBase::ProfileInfo info_) { profileFeedback(info_); }) - , index_granularity_bytes(storage_settings_.index_granularity_bytes) - , fixed_index_granularity(storage_settings_.index_granularity) - , storage_snapshot(storage_snapshot_) - , column_names(column_names_) - , virtual_column_names(virtual_column_names_) - , prewhere_info(prewhere_info_) - , actions_settings(actions_settings_) - , reader_settings(reader_settings_) - , is_remote_read(is_remote_read_) + const PoolSettings & settings_, + const ContextPtr & context_) + : MergeTreeReadPoolBase( + std::move(parts_), + storage_snapshot_, + prewhere_info_, + actions_settings_, + reader_settings_, + column_names_, + virtual_column_names_, + settings_, + context_) + , WithContext(context_) , prefetch_threadpool(getContext()->getPrefetchThreadpool()) + , log(&Poco::Logger::get("MergeTreePrefetchedReadPool(" + (parts_.empty() ? "" : parts_.front().data_part->storage.getStorageID().getNameForLogs()) + ")")) { /// Tasks creation might also create a lost of readers - check they do not /// do any time consuming operations in ctor. ProfileEventTimeIncrement watch(ProfileEvents::MergeTreePrefetchedReadPoolInit); - parts_infos = getPartsInfos(parts_, preferred_block_size_bytes_); - threads_tasks = createThreadsTasks(threads, sum_marks_, min_marks_for_concurrent_read_); + fillPerPartStatistics(); + fillPerThreadTasks(pool_settings.threads, pool_settings.sum_marks); } -struct MergeTreePrefetchedReadPool::PartInfo +std::future MergeTreePrefetchedReadPool::createPrefetchedFuture(IMergeTreeReader * reader, Priority priority) { - MergeTreeData::DataPartPtr data_part; - AlterConversionsPtr alter_conversions; - size_t part_index_in_query; - size_t sum_marks = 0; - MarkRanges ranges; - - NameSet column_name_set; - MergeTreeReadTaskColumns task_columns; - MergeTreeBlockSizePredictorPtr size_predictor; - - size_t approx_size_of_mark = 0; - size_t prefetch_step_marks = 0; - - size_t estimated_memory_usage_for_single_prefetch = 0; - size_t required_readers_num = 0; -}; - -std::future MergeTreePrefetchedReadPool::createPrefetchedReader( - const IMergeTreeDataPart & data_part, - const NamesAndTypesList & columns, - const AlterConversionsPtr & alter_conversions, - const MarkRanges & required_ranges, - Priority priority) const -{ - auto reader = data_part.getReader( - columns, storage_snapshot->metadata, required_ranges, - uncompressed_cache, mark_cache, alter_conversions, reader_settings, - IMergeTreeReader::ValueSizeMap{}, profile_callback); - /// In order to make a prefetch we need to wait for marks to be loaded. But we just created /// a reader (which starts loading marks in its constructor), then if we do prefetch right /// after creating a reader, it will be very inefficient. We can do prefetch for all parts /// only inside this MergeTreePrefetchedReadPool, where read tasks are created and distributed, /// and we cannot block either, therefore make prefetch inside the pool and put the future - /// into the read task (MergeTreeReadTask). When a thread calls getTask(), it will wait for - /// it (if not yet ready) after getting the task. - auto task = [=, my_reader = std::move(reader), context = getContext()]() mutable -> MergeTreeReaderPtr && + /// into the thread task. When a thread calls getTask(), it will wait for it is not ready yet. + auto task = [=, context = getContext()]() mutable { /// For async read metrics in system.query_log. PrefetchIncrement watch(context->getAsyncReadCounters()); - - my_reader->prefetchBeginOfRange(priority); - return std::move(my_reader); + reader->prefetchBeginOfRange(priority); }; - return scheduleFromThreadPool(std::move(task), prefetch_threadpool, "ReadPrepare", priority); + + return scheduleFromThreadPool(std::move(task), prefetch_threadpool, "ReadPrepare", priority); } -void MergeTreePrefetchedReadPool::createPrefetchedReaderForTask(MergeTreeReadTask & task) const +void MergeTreePrefetchedReadPool::createPrefetchedReadersForTask(ThreadTask & task) { - if (task.reader.valid()) + if (task.readers_future.valid()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Task already has a reader"); - task.reader = createPrefetchedReader(*task.data_part, task.task_columns.columns, task.alter_conversions, task.mark_ranges, task.priority); - - if (reader_settings.apply_deleted_mask && task.data_part->hasLightweightDelete()) - { - auto pre_reader = createPrefetchedReader(*task.data_part, {LightweightDeleteDescription::FILTER_COLUMN}, task.alter_conversions, task.mark_ranges, task.priority); - task.pre_reader_for_step.push_back(std::move(pre_reader)); - } - - for (const auto & pre_columns_per_step : task.task_columns.pre_columns) - { - auto pre_reader = createPrefetchedReader(*task.data_part, pre_columns_per_step, task.alter_conversions, task.mark_ranges, task.priority); - task.pre_reader_for_step.push_back(std::move(pre_reader)); - } + auto extras = getExtras(); + auto readers = MergeTreeReadTask::createReaders(task.read_info, extras, task.ranges); + task.readers_future = PrefetechedReaders(std::move(readers), task.priority, *this); } -bool MergeTreePrefetchedReadPool::TaskHolder::operator <(const TaskHolder & other) const -{ - chassert(task->priority >= 0); - chassert(other.task->priority >= 0); - return task->priority > other.task->priority; /// Less is better. - /// With default std::priority_queue, top() returns largest element. - /// So closest to 0 will be on top with this comparator. -} - -void MergeTreePrefetchedReadPool::startPrefetches() const +void MergeTreePrefetchedReadPool::startPrefetches() { if (prefetch_queue.empty()) return; - [[maybe_unused]] TaskHolder prev(nullptr, 0); + [[maybe_unused]] TaskHolder prev; [[maybe_unused]] const Priority highest_priority{reader_settings.read_settings.priority.value + 1}; assert(prefetch_queue.top().task->priority == highest_priority); + while (!prefetch_queue.empty()) { const auto & top = prefetch_queue.top(); - createPrefetchedReaderForTask(*top.task); + createPrefetchedReadersForTask(*top.task); #ifndef NDEBUG if (prev.task) { assert(top.task->priority >= highest_priority); if (prev.thread_id == top.thread_id) - { assert(prev.task->priority < top.task->priority); - } } prev = top; #endif @@ -175,11 +159,11 @@ void MergeTreePrefetchedReadPool::startPrefetches() const } } -MergeTreeReadTaskPtr MergeTreePrefetchedReadPool::getTask(size_t thread) +MergeTreeReadTaskPtr MergeTreePrefetchedReadPool::getTask(size_t task_idx, MergeTreeReadTask * previous_task) { std::lock_guard lock(mutex); - if (threads_tasks.empty()) + if (per_thread_tasks.empty()) return nullptr; if (!started_prefetches) @@ -188,112 +172,129 @@ MergeTreeReadTaskPtr MergeTreePrefetchedReadPool::getTask(size_t thread) startPrefetches(); } - auto it = threads_tasks.find(thread); - if (it == threads_tasks.end()) - { - ThreadsTasks::iterator non_prefetched_tasks_to_steal = threads_tasks.end(); - ThreadsTasks::iterator prefetched_tasks_to_steal = threads_tasks.end(); - int64_t best_prefetched_task_priority = -1; - - /// There is no point stealing in order (like in MergeTreeReadPool, where tasks can be stolen - /// only from the next thread). Even if we steal task from the next thread, which reads from - /// the same part as we just read, it might seem that we can reuse our own reader, do some - /// seek avoiding and it will have a good result as we avoided seek (new request). But it is - /// not so, because this next task will most likely have its own reader a prefetch already on - /// the fly. (Not to mention that in fact we cannot reuse our own reader if initially we did - /// not accounted this range into range request to object storage). - for (auto thread_tasks_it = threads_tasks.begin(); thread_tasks_it != threads_tasks.end(); ++thread_tasks_it) - { - /// Prefer to steal tasks which have an initialized reader (with prefetched data). Thus we avoid - /// losing a prefetch by creating our own reader (or resusing our own reader if the part - /// is the same as last read by this thread). - auto & thread_tasks = thread_tasks_it->second; - auto task_it = std::find_if( - thread_tasks.begin(), thread_tasks.end(), - [](const auto & task) { return task->reader.valid(); }); - - if (task_it == thread_tasks.end()) - { - /// The follow back to non-prefetched task should lie on the thread which - /// has more tasks than others. - if (non_prefetched_tasks_to_steal == threads_tasks.end() - || non_prefetched_tasks_to_steal->second.size() < thread_tasks.size()) - non_prefetched_tasks_to_steal = thread_tasks_it; - } - /// Try to steal task with the best (lowest) priority (because it will be executed faster). - else if (prefetched_tasks_to_steal == threads_tasks.end() - || (*task_it)->priority < best_prefetched_task_priority) - { - best_prefetched_task_priority = (*task_it)->priority; - chassert(best_prefetched_task_priority >= 0); - prefetched_tasks_to_steal = thread_tasks_it; - } - } - - if (prefetched_tasks_to_steal != threads_tasks.end()) - { - auto & thread_tasks = prefetched_tasks_to_steal->second; - assert(!thread_tasks.empty()); - - auto task_it = std::find_if( - thread_tasks.begin(), thread_tasks.end(), - [](const auto & task) { return task->reader.valid(); }); - assert(task_it != thread_tasks.end()); - - auto task = std::move(*task_it); - thread_tasks.erase(task_it); - - if (thread_tasks.empty()) - threads_tasks.erase(prefetched_tasks_to_steal); - - return task; - } - - /// TODO: it also makes sense to first try to steal from the next thread if it has ranges - /// from the same part as current thread last read - to reuse the reader. - - if (non_prefetched_tasks_to_steal != threads_tasks.end()) - { - auto & thread_tasks = non_prefetched_tasks_to_steal->second; - assert(!thread_tasks.empty()); - - /// Get second half of the tasks. - const size_t total_tasks = thread_tasks.size(); - const size_t half = total_tasks / 2; - auto half_it = thread_tasks.begin() + half; - assert(half_it != thread_tasks.end()); - - /// Give them to current thread, as current thread's tasks list is empty. - auto & current_thread_tasks = threads_tasks[thread]; - current_thread_tasks.insert( - current_thread_tasks.end(), make_move_iterator(half_it), make_move_iterator(thread_tasks.end())); - - /// Erase them from the thread from which we steal. - thread_tasks.resize(half); - if (thread_tasks.empty()) - threads_tasks.erase(non_prefetched_tasks_to_steal); - - auto task = std::move(current_thread_tasks.front()); - current_thread_tasks.erase(current_thread_tasks.begin()); - if (current_thread_tasks.empty()) - threads_tasks.erase(thread); - - return task; - } - - return nullptr; - } + auto it = per_thread_tasks.find(task_idx); + if (it == per_thread_tasks.end()) + return stealTask(task_idx, previous_task); auto & thread_tasks = it->second; assert(!thread_tasks.empty()); - auto task = std::move(thread_tasks.front()); + auto thread_task = std::move(thread_tasks.front()); thread_tasks.pop_front(); if (thread_tasks.empty()) - threads_tasks.erase(it); + per_thread_tasks.erase(it); - return task; + return createTask(*thread_task, previous_task); +} + +MergeTreeReadTaskPtr MergeTreePrefetchedReadPool::stealTask(size_t thread, MergeTreeReadTask * previous_task) +{ + auto non_prefetched_tasks_to_steal = per_thread_tasks.end(); + auto prefetched_tasks_to_steal = per_thread_tasks.end(); + int64_t best_prefetched_task_priority = -1; + + /// There is no point stealing in order (like in MergeTreeReadPool, where tasks can be stolen + /// only from the next thread). Even if we steal task from the next thread, which reads from + /// the same part as we just read, it might seem that we can reuse our own reader, do some + /// seek avoiding and it will have a good result as we avoided seek (new request). But it is + /// not so, because this next task will most likely have its own reader a prefetch already on + /// the fly. (Not to mention that in fact we cannot reuse our own reader if initially we did + /// not accounted this range into range request to object storage). + for (auto thread_tasks_it = per_thread_tasks.begin(); thread_tasks_it != per_thread_tasks.end(); ++thread_tasks_it) + { + /// Prefer to steal tasks which have an initialized reader (with prefetched data). Thus we avoid + /// losing a prefetch by creating our own reader (or resusing our own reader if the part + /// is the same as last read by this thread). + auto & thread_tasks = thread_tasks_it->second; + + auto task_it = std::find_if( + thread_tasks.begin(), thread_tasks.end(), + [](const auto & task) { return task->readers_future.valid(); }); + + if (task_it == thread_tasks.end()) + { + /// The follow back to non-prefetched task should lie on the thread which + /// has more tasks than others. + if (non_prefetched_tasks_to_steal == per_thread_tasks.end() + || non_prefetched_tasks_to_steal->second.size() < thread_tasks.size()) + non_prefetched_tasks_to_steal = thread_tasks_it; + } + /// Try to steal task with the best (lowest) priority (because it will be executed faster). + else if (prefetched_tasks_to_steal == per_thread_tasks.end() + || (*task_it)->priority < best_prefetched_task_priority) + { + best_prefetched_task_priority = (*task_it)->priority; + chassert(best_prefetched_task_priority >= 0); + prefetched_tasks_to_steal = thread_tasks_it; + } + } + + if (prefetched_tasks_to_steal != per_thread_tasks.end()) + { + auto & thread_tasks = prefetched_tasks_to_steal->second; + assert(!thread_tasks.empty()); + + auto task_it = std::find_if( + thread_tasks.begin(), thread_tasks.end(), + [](const auto & task) { return task->readers_future.valid(); }); + + assert(task_it != thread_tasks.end()); + auto thread_task = std::move(*task_it); + thread_tasks.erase(task_it); + + if (thread_tasks.empty()) + per_thread_tasks.erase(prefetched_tasks_to_steal); + + return createTask(*thread_task, previous_task); + } + + /// TODO: it also makes sense to first try to steal from the next thread if it has ranges + /// from the same part as current thread last read - to reuse the reader. + if (non_prefetched_tasks_to_steal != per_thread_tasks.end()) + { + auto & thread_tasks = non_prefetched_tasks_to_steal->second; + assert(!thread_tasks.empty()); + + /// Get second half of the tasks. + const size_t total_tasks = thread_tasks.size(); + const size_t half = total_tasks / 2; + auto half_it = thread_tasks.begin() + half; + assert(half_it != thread_tasks.end()); + + /// Give them to current thread, as current thread's tasks list is empty. + auto & current_thread_tasks = per_thread_tasks[thread]; + current_thread_tasks.insert( + current_thread_tasks.end(), make_move_iterator(half_it), make_move_iterator(thread_tasks.end())); + + /// Erase them from the thread from which we steal. + thread_tasks.resize(half); + if (thread_tasks.empty()) + per_thread_tasks.erase(non_prefetched_tasks_to_steal); + + auto thread_task = std::move(current_thread_tasks.front()); + current_thread_tasks.erase(current_thread_tasks.begin()); + if (current_thread_tasks.empty()) + per_thread_tasks.erase(thread); + + return createTask(*thread_task, previous_task); + } + + return nullptr; +} + +MergeTreeReadTaskPtr MergeTreePrefetchedReadPool::createTask(ThreadTask & task, MergeTreeReadTask * previous_task) +{ + if (task.readers_future.valid()) + { + auto size_predictor = task.read_info->shared_size_predictor + ? std::make_unique(*task.read_info->shared_size_predictor) + : nullptr; + + return std::make_unique(task.read_info, task.readers_future.get(), task.ranges, std::move(size_predictor)); + } + + return MergeTreeReadPoolBase::createTask(task.read_info, task.ranges, previous_task); } size_t getApproximateSizeOfGranule(const IMergeTreeDataPart & part, const Names & columns_to_read) @@ -304,151 +305,111 @@ size_t getApproximateSizeOfGranule(const IMergeTreeDataPart & part, const Names return columns_size.data_compressed / part.getMarksCount(); } -MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInfos( - const RangesInDataParts & parts, size_t preferred_block_size_bytes) const +void MergeTreePrefetchedReadPool::fillPerPartStatistics() { - PartsInfos result; - Block sample_block = storage_snapshot->metadata->getSampleBlock(); + per_part_statistics.clear(); + per_part_statistics.reserve(parts_ranges.size()); const auto & settings = getContext()->getSettingsRef(); - const bool predict_block_size_bytes = preferred_block_size_bytes > 0; - for (const auto & part : parts) + for (size_t i = 0; i < parts_ranges.size(); ++i) { - auto part_info = std::make_unique(); - - part_info->data_part = part.data_part; - part_info->alter_conversions = part.alter_conversions; - part_info->part_index_in_query = part.part_index_in_query; - part_info->ranges = part.ranges; - std::sort(part_info->ranges.begin(), part_info->ranges.end()); - - LoadedMergeTreeDataPartInfoForReader part_reader_info(part.data_part, part_info->alter_conversions); + auto & part_stat = per_part_statistics.emplace_back(); + const auto & read_info = *per_part_infos[i]; /// Sum up total size of all mark ranges in a data part. - for (const auto & range : part.ranges) - part_info->sum_marks += range.end - range.begin; + for (const auto & range : parts_ranges[i].ranges) + part_stat.sum_marks += range.end - range.begin; - part_info->approx_size_of_mark = getApproximateSizeOfGranule(*part_info->data_part, column_names); + const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info + ? prewhere_info->prewhere_actions->getRequiredColumnsNames() + : column_names; - const auto task_columns = getReadTaskColumns( - part_reader_info, - storage_snapshot, - column_names, - virtual_column_names, - prewhere_info, - actions_settings, - reader_settings, - /* with_subcolumns */ true); + part_stat.approx_size_of_mark = getApproximateSizeOfGranule(*read_info.data_part, columns); - part_info->size_predictor = !predict_block_size_bytes - ? nullptr - : IMergeTreeSelectAlgorithm::getSizePredictor(part.data_part, task_columns, sample_block); - - /// Will be used to distinguish between PREWHERE and WHERE columns when applying filter. - const auto & required_column_names = task_columns.columns.getNames(); - part_info->column_name_set = {required_column_names.begin(), required_column_names.end()}; - part_info->task_columns = task_columns; + auto update_stat_for_column = [&](const auto & column_name) + { + size_t column_size = read_info.data_part->getColumnSize(column_name).data_compressed; + part_stat.estimated_memory_usage_for_single_prefetch += std::min(column_size, settings.prefetch_buffer_size); + ++part_stat.required_readers_num; + }; /// adjustBufferSize(), which is done in MergeTreeReaderStream and MergeTreeReaderCompact, /// lowers buffer size if file size (or required read range) is less. So we know that the /// settings.prefetch_buffer_size will be lowered there, therefore we account it here as well. /// But here we make a more approximate lowering (because we do not have loaded marks yet), /// while in adjustBufferSize it will be presize. - for (const auto & col : task_columns.columns) - { - const auto col_size = part.data_part->getColumnSize(col.name).data_compressed; - part_info->estimated_memory_usage_for_single_prefetch += std::min(col_size, settings.prefetch_buffer_size); - ++part_info->required_readers_num; - } - if (reader_settings.apply_deleted_mask && part.data_part->hasLightweightDelete()) - { - const auto col_size = part.data_part->getColumnSize( - LightweightDeleteDescription::FILTER_COLUMN.name).data_compressed; - part_info->estimated_memory_usage_for_single_prefetch += std::min(col_size, settings.prefetch_buffer_size); - ++part_info->required_readers_num; - } - if (prewhere_info) - { - for (const auto & columns : task_columns.pre_columns) - { - for (const auto & col : columns) - { - const size_t col_size = part.data_part->getColumnSize(col.name).data_compressed; - part_info->estimated_memory_usage_for_single_prefetch += std::min(col_size, settings.prefetch_buffer_size); - ++part_info->required_readers_num; - } - } - } + for (const auto & column : read_info.task_columns.columns) + update_stat_for_column(column.name); - result.push_back(std::move(part_info)); + if (reader_settings.apply_deleted_mask && read_info.data_part->hasLightweightDelete()) + update_stat_for_column(LightweightDeleteDescription::FILTER_COLUMN.name); + + for (const auto & pre_columns : read_info.task_columns.pre_columns) + for (const auto & column : pre_columns) + update_stat_for_column(column.name); } - - return result; } -MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThreadsTasks( - size_t threads, size_t sum_marks, size_t /* min_marks_for_concurrent_read */) const +void MergeTreePrefetchedReadPool::fillPerThreadTasks(size_t threads, size_t sum_marks) { - if (parts_infos.empty()) - return {}; + if (per_part_infos.empty()) + return; const auto & context = getContext(); const auto & settings = context->getSettingsRef(); size_t total_size_approx = 0; - for (const auto & part : parts_infos) - { - total_size_approx += part->sum_marks * part->approx_size_of_mark; - } + for (const auto & part : per_part_statistics) + total_size_approx += part.sum_marks * part.approx_size_of_mark; size_t min_prefetch_step_marks = 0; - - for (const auto & part : parts_infos) + for (size_t i = 0; i < per_part_infos.size(); ++i) { + auto & part_stat = per_part_statistics[i]; + if (settings.filesystem_prefetch_step_marks) { - part->prefetch_step_marks = settings.filesystem_prefetch_step_marks; + part_stat.prefetch_step_marks = settings.filesystem_prefetch_step_marks; } - else if (settings.filesystem_prefetch_step_bytes && part->approx_size_of_mark) + else if (settings.filesystem_prefetch_step_bytes && part_stat.approx_size_of_mark) { - part->prefetch_step_marks = std::max( - 1, static_cast(std::round(static_cast(settings.filesystem_prefetch_step_bytes) / part->approx_size_of_mark))); + part_stat.prefetch_step_marks = std::max( + 1, static_cast(std::round(static_cast(settings.filesystem_prefetch_step_bytes) / part_stat.approx_size_of_mark))); } /// This limit is important to avoid spikes of slow aws getObject requests when parallelizing within one file. /// (The default is taken from here https://docs.aws.amazon.com/whitepapers/latest/s3-optimizing-performance-best-practices/use-byte-range-fetches.html). - if (part->approx_size_of_mark + if (part_stat.approx_size_of_mark && settings.filesystem_prefetch_min_bytes_for_single_read_task - && part->approx_size_of_mark < settings.filesystem_prefetch_min_bytes_for_single_read_task) + && part_stat.approx_size_of_mark < settings.filesystem_prefetch_min_bytes_for_single_read_task) { const size_t min_prefetch_step_marks_by_total_cols = static_cast( - std::ceil(static_cast(settings.filesystem_prefetch_min_bytes_for_single_read_task) / part->approx_size_of_mark)); + std::ceil(static_cast(settings.filesystem_prefetch_min_bytes_for_single_read_task) / part_stat.approx_size_of_mark)); + /// At least one task to start working on it right now and another one to prefetch in the meantime. const size_t new_min_prefetch_step_marks = std::min(min_prefetch_step_marks_by_total_cols, sum_marks / threads / 2); if (min_prefetch_step_marks < new_min_prefetch_step_marks) { LOG_DEBUG(log, "Increasing min prefetch step from {} to {}", min_prefetch_step_marks, new_min_prefetch_step_marks); - min_prefetch_step_marks = new_min_prefetch_step_marks; } } - if (part->prefetch_step_marks < min_prefetch_step_marks) + if (part_stat.prefetch_step_marks < min_prefetch_step_marks) { - LOG_DEBUG(log, "Increasing prefetch step from {} to {}", part->prefetch_step_marks, min_prefetch_step_marks); - - part->prefetch_step_marks = min_prefetch_step_marks; + LOG_DEBUG(log, "Increasing prefetch step from {} to {}", part_stat.prefetch_step_marks, min_prefetch_step_marks); + part_stat.prefetch_step_marks = min_prefetch_step_marks; } LOG_DEBUG( log, "Part: {}, sum_marks: {}, approx mark size: {}, prefetch_step_bytes: {}, prefetch_step_marks: {}, (ranges: {})", - part->data_part->name, - part->sum_marks, - part->approx_size_of_mark, + parts_ranges[i].data_part->name, + part_stat.sum_marks, + part_stat.approx_size_of_mark, settings.filesystem_prefetch_step_bytes, - part->prefetch_step_marks, - toString(part->ranges)); + part_stat.prefetch_step_marks, + toString(parts_ranges[i].ranges)); } const size_t min_marks_per_thread = (sum_marks - 1) / threads + 1; @@ -466,13 +427,24 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr size_t allowed_memory_usage = settings.filesystem_prefetch_max_memory_usage; if (!allowed_memory_usage) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `filesystem_prefetch_max_memory_usage` must be non-zero"); + std::optional allowed_prefetches_num = settings.filesystem_prefetches_limit ? std::optional(settings.filesystem_prefetches_limit) : std::nullopt; - ThreadsTasks result_threads_tasks; + per_thread_tasks.clear(); size_t total_tasks = 0; - for (size_t i = 0, part_idx = 0; i < threads && part_idx < parts_infos.size(); ++i) + + /// Make a copy to modify ranges. + std::vector per_part_ranges; + per_part_ranges.reserve(parts_ranges.size()); + for (const auto & part_with_ranges : parts_ranges) + { + auto & part_ranges = per_part_ranges.emplace_back(part_with_ranges.ranges); + std::sort(part_ranges.begin(), part_ranges.end()); + } + + for (size_t i = 0, part_idx = 0; i < threads && part_idx < per_part_infos.size(); ++i) { int64_t need_marks = min_marks_per_thread; @@ -483,119 +455,102 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr /// reads from pool which are from reader. Priority priority{reader_settings.read_settings.priority.value + 1}; - while (need_marks > 0 && part_idx < parts_infos.size()) + while (need_marks > 0 && part_idx < per_part_infos.size()) { - auto & part = *parts_infos[part_idx]; - size_t & marks_in_part = part.sum_marks; + auto & part_stat = per_part_statistics[part_idx]; + auto & part_ranges = per_part_ranges[part_idx]; - if (marks_in_part == 0) + if (part_stat.sum_marks == 0) { ++part_idx; continue; } MarkRanges ranges_to_get_from_part; - size_t marks_to_get_from_part = std::min(need_marks, marks_in_part); + size_t marks_to_get_from_part = std::min(need_marks, part_stat.sum_marks); /// Split by prefetch step even if !allow_prefetch below. Because it will allow /// to make a better distribution of tasks which did not fill into memory limit /// or prefetches limit through tasks stealing. - if (part.prefetch_step_marks) + if (part_stat.prefetch_step_marks) { - marks_to_get_from_part = std::min(marks_to_get_from_part, part.prefetch_step_marks); + marks_to_get_from_part = std::min(marks_to_get_from_part, part_stat.prefetch_step_marks); } - if (marks_in_part == marks_to_get_from_part) + if (part_stat.sum_marks == marks_to_get_from_part) { - ranges_to_get_from_part = part.ranges; + ranges_to_get_from_part = part_ranges; } else { - if (part.sum_marks < marks_to_get_from_part) + if (part_stat.sum_marks < marks_to_get_from_part) { throw Exception( ErrorCodes::LOGICAL_ERROR, "Requested {} marks from part {}, but part has only {} marks", - marks_to_get_from_part, part.data_part->name, part.sum_marks); + marks_to_get_from_part, per_part_infos[part_idx]->data_part->name, part_stat.sum_marks); } - size_t get_marks_num = marks_to_get_from_part; - while (get_marks_num > 0) + size_t num_marks_to_get = marks_to_get_from_part; + while (num_marks_to_get > 0) { - MarkRange & range = part.ranges.front(); + MarkRange & range = part_ranges.front(); const size_t marks_in_range = range.end - range.begin; - const size_t marks_to_get_from_range = std::min(marks_in_range, get_marks_num); - get_marks_num -= marks_to_get_from_range; + const size_t marks_to_get_from_range = std::min(marks_in_range, num_marks_to_get); + num_marks_to_get -= marks_to_get_from_range; ranges_to_get_from_part.emplace_back(range.begin, range.begin + marks_to_get_from_range); range.begin += marks_to_get_from_range; if (range.begin == range.end) { - part.ranges.pop_front(); + part_ranges.pop_front(); } - else if (!get_marks_num && part.prefetch_step_marks && range.end - range.begin < part.prefetch_step_marks) + else if (!num_marks_to_get && part_stat.prefetch_step_marks && range.end - range.begin < part_stat.prefetch_step_marks) { - /// We already have `get_marks_num` marks, but current mark range has + /// We already have `num_marks_to_get` marks, but current mark range has /// less than `prefetch_step_marks` marks, then add them too. ranges_to_get_from_part.emplace_back(range.begin, range.end); marks_to_get_from_part += range.end - range.begin; - part.ranges.pop_front(); + part_ranges.pop_front(); } } } need_marks -= marks_to_get_from_part; sum_marks -= marks_to_get_from_part; - marks_in_part -= marks_to_get_from_part; - - auto curr_task_size_predictor = !part.size_predictor ? nullptr - : std::make_unique(*part.size_predictor); /// make a copy - - auto read_task = std::make_unique( - part.data_part, - part.alter_conversions, - ranges_to_get_from_part, - part.part_index_in_query, - part.column_name_set, - part.task_columns, - std::move(curr_task_size_predictor)); - - read_task->priority = priority; + part_stat.sum_marks -= marks_to_get_from_part; bool allow_prefetch = false; if (allowed_memory_usage - && (allowed_prefetches_num.has_value() == false || allowed_prefetches_num.value() > 0)) + && (!allowed_prefetches_num.has_value() || allowed_prefetches_num.value() > 0)) { - allow_prefetch = part.estimated_memory_usage_for_single_prefetch <= allowed_memory_usage - && (allowed_prefetches_num.has_value() == false - || part.required_readers_num <= allowed_prefetches_num.value()); + allow_prefetch = part_stat.estimated_memory_usage_for_single_prefetch <= allowed_memory_usage + && (!allowed_prefetches_num.has_value() || part_stat.required_readers_num <= allowed_prefetches_num.value()); if (allow_prefetch) { - allowed_memory_usage -= part.estimated_memory_usage_for_single_prefetch; + allowed_memory_usage -= part_stat.estimated_memory_usage_for_single_prefetch; if (allowed_prefetches_num.has_value()) - *allowed_prefetches_num -= part.required_readers_num; + *allowed_prefetches_num -= part_stat.required_readers_num; } } + auto thread_task = std::make_unique(per_part_infos[part_idx], ranges_to_get_from_part, priority); if (allow_prefetch) - { - prefetch_queue.emplace(TaskHolder(read_task.get(), i)); - } - ++priority.value; + prefetch_queue.emplace(TaskHolder{thread_task.get(), i}); - result_threads_tasks[i].push_back(std::move(read_task)); + per_thread_tasks[i].push_back(std::move(thread_task)); + + ++priority.value; ++total_tasks; } } - LOG_TEST(log, "Result tasks {} for {} threads: {}", total_tasks, threads, dumpTasks(result_threads_tasks)); - - return result_threads_tasks; + LOG_TEST(log, "Result tasks {} for {} threads: {}", total_tasks, threads, dumpTasks(per_thread_tasks)); } -std::string MergeTreePrefetchedReadPool::dumpTasks(const ThreadsTasks & tasks) +std::string MergeTreePrefetchedReadPool::dumpTasks(const TasksPerThread & tasks) { WriteBufferFromOwnString result; for (const auto & [thread_id, thread_tasks] : tasks) @@ -608,9 +563,9 @@ std::string MergeTreePrefetchedReadPool::dumpTasks(const ThreadsTasks & tasks) { result << '\t'; result << ++no << ": "; - result << "reader: " << task->reader.valid() << ", "; - result << "part: " << task->data_part->name << ", "; - result << "ranges: " << toString(task->mark_ranges); + result << "reader future: " << task->readers_future.valid() << ", "; + result << "part: " << task->read_info->data_part->name << ", "; + result << "ranges: " << toString(task->ranges); } } } diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h index ae97b32c7f9..f530879f86a 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h @@ -1,9 +1,6 @@ #pragma once - +#include #include -#include -#include -#include #include #include #include @@ -16,99 +13,110 @@ using MergeTreeReaderPtr = std::unique_ptr; /// A class which is responsible for creating read tasks /// which are later taken by readers via getTask method. /// Does prefetching for the read tasks it creates. -class MergeTreePrefetchedReadPool : public IMergeTreeReadPool, private WithContext +class MergeTreePrefetchedReadPool : public MergeTreeReadPoolBase, private WithContext { public: MergeTreePrefetchedReadPool( - size_t threads, - size_t sum_marks_, - size_t min_marks_for_concurrent_read_, RangesInDataParts && parts_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, + const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - size_t preferred_block_size_bytes_, - const MergeTreeReaderSettings & reader_settings_, - ContextPtr context_, - bool use_uncompressed_cache_, - bool is_remote_read_, - const MergeTreeSettings & storage_settings_); + const PoolSettings & settings_, + const ContextPtr & context_); - MergeTreeReadTaskPtr getTask(size_t thread) override; + String getName() const override { return "PrefetchedReadPool"; } + bool preservesOrderOfRanges() const override { return false; } + MergeTreeReadTaskPtr getTask(size_t task_idx, MergeTreeReadTask * previous_task) override; void profileFeedback(ReadBufferFromFileBase::ProfileInfo) override {} - Block getHeader() const override { return header; } - static bool checkReadMethodAllowed(LocalFSReadMethod method); static bool checkReadMethodAllowed(RemoteFSReadMethod method); private: - struct PartInfo; - using PartInfoPtr = std::shared_ptr; - using PartsInfos = std::vector; - using MergeTreeReadTaskPtr = std::unique_ptr; - using ThreadTasks = std::deque; - using ThreadsTasks = std::map; + struct PartStatistic + { + size_t sum_marks = 0; - std::future createPrefetchedReader( - const IMergeTreeDataPart & data_part, - const NamesAndTypesList & columns, - const AlterConversionsPtr & alter_conversions, - const MarkRanges & required_ranges, - Priority priority) const; + size_t approx_size_of_mark = 0; + size_t prefetch_step_marks = 0; - void createPrefetchedReaderForTask(MergeTreeReadTask & task) const; + size_t estimated_memory_usage_for_single_prefetch = 0; + size_t required_readers_num = 0; + }; - size_t getApproxSizeOfGranule(const IMergeTreeDataPart & part) const; + class PrefetechedReaders + { + public: + PrefetechedReaders() = default; + PrefetechedReaders(MergeTreeReadTask::Readers readers_, Priority priority_, MergeTreePrefetchedReadPool & pool_); - PartsInfos getPartsInfos(const RangesInDataParts & parts, size_t preferred_block_size_bytes) const; + void wait(); + MergeTreeReadTask::Readers get(); + bool valid() const { return is_valid; } - ThreadsTasks createThreadsTasks( - size_t threads, - size_t sum_marks, - size_t min_marks_for_concurrent_read) const; + private: + bool is_valid = false; + MergeTreeReadTask::Readers readers; + std::vector> prefetch_futures; + }; - void startPrefetches() const; + struct ThreadTask + { + using InfoPtr = MergeTreeReadTask::InfoPtr; - static std::string dumpTasks(const ThreadsTasks & tasks); + ThreadTask(InfoPtr read_info_, MarkRanges ranges_, Priority priority_) + : read_info(std::move(read_info_)), ranges(std::move(ranges_)), priority(priority_) + { + } - Poco::Logger * log; + ~ThreadTask() + { + if (readers_future.valid()) + readers_future.wait(); + } - Block header; - MarkCache * mark_cache; - UncompressedCache * uncompressed_cache; - ReadBufferFromFileBase::ProfileCallback profile_callback; - size_t index_granularity_bytes; - size_t fixed_index_granularity; - - StorageSnapshotPtr storage_snapshot; - const Names column_names; - const Names virtual_column_names; - PrewhereInfoPtr prewhere_info; - const ExpressionActionsSettings actions_settings; - const MergeTreeReaderSettings reader_settings; - RangesInDataParts parts_ranges; - - [[ maybe_unused ]] const bool is_remote_read; - ThreadPool & prefetch_threadpool; - - PartsInfos parts_infos; - - ThreadsTasks threads_tasks; - std::mutex mutex; + InfoPtr read_info; + MarkRanges ranges; + Priority priority; + PrefetechedReaders readers_future; + }; struct TaskHolder { - explicit TaskHolder(MergeTreeReadTask * task_, size_t thread_id_) : task(task_), thread_id(thread_id_) {} - MergeTreeReadTask * task; - size_t thread_id; - bool operator <(const TaskHolder & other) const; + ThreadTask * task = nullptr; + size_t thread_id = 0; + bool operator<(const TaskHolder & other) const; }; - mutable std::priority_queue prefetch_queue; /// the smallest on top + + using ThreadTaskPtr = std::unique_ptr; + using ThreadTasks = std::deque; + using TasksPerThread = std::map; + using PartStatistics = std::vector; + + void fillPerPartStatistics(); + void fillPerThreadTasks(size_t threads, size_t sum_marks); + + void startPrefetches(); + void createPrefetchedReadersForTask(ThreadTask & task); + std::future createPrefetchedFuture(IMergeTreeReader * reader, Priority priority); + + MergeTreeReadTaskPtr stealTask(size_t thread, MergeTreeReadTask * previous_task); + MergeTreeReadTaskPtr createTask(ThreadTask & thread_task, MergeTreeReadTask * previous_task); + + static std::string dumpTasks(const TasksPerThread & tasks); + + mutable std::mutex mutex; + ThreadPool & prefetch_threadpool; + + PartStatistics per_part_statistics; + TasksPerThread per_thread_tasks; + std::priority_queue prefetch_queue; /// the smallest on top bool started_prefetches = false; + Poco::Logger * log; /// A struct which allows to track max number of tasks which were in the /// threadpool simultaneously (similar to CurrentMetrics, but the result diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 2ab90189f9d..8ed7a9d8707 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -1,5 +1,6 @@ +#include "Storages/MergeTree/MergeTreeBlockReadUtils.h" +#include "Storages/MergeTree/MergeTreeReadTask.h" #include -#include #include #include #include @@ -32,9 +33,6 @@ size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & column } MergeTreeReadPool::MergeTreeReadPool( - size_t threads_, - size_t sum_marks_, - size_t min_marks_for_concurrent_read_, RangesInDataParts && parts_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, @@ -42,29 +40,22 @@ MergeTreeReadPool::MergeTreeReadPool( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - ContextPtr context_, - bool do_not_steal_tasks_) - : storage_snapshot(storage_snapshot_) - , column_names(column_names_) - , virtual_column_names(virtual_column_names_) - , min_marks_for_concurrent_read(min_marks_for_concurrent_read_) - , prewhere_info(prewhere_info_) - , actions_settings(actions_settings_) - , reader_settings(reader_settings_) - , parts_ranges(std::move(parts_)) - , predict_block_size_bytes(context_->getSettingsRef().preferred_block_size_bytes > 0) - , do_not_steal_tasks(do_not_steal_tasks_) - , merge_tree_use_const_size_tasks_for_remote_reading(context_->getSettingsRef().merge_tree_use_const_size_tasks_for_remote_reading) + const PoolSettings & settings_, + const ContextPtr & context_) + : MergeTreeReadPoolBase( + std::move(parts_), + storage_snapshot_, + prewhere_info_, + actions_settings_, + reader_settings_, + column_names_, + virtual_column_names_, + settings_, + context_) + , min_marks_for_concurrent_read(pool_settings.min_marks_for_concurrent_read) , backoff_settings{context_->getSettingsRef()} - , backoff_state{threads_} + , backoff_state{pool_settings.threads} { - /// parts don't contain duplicate MergeTreeDataPart's. - const auto per_part_sum_marks = fillPerPartInfo( - parts_ranges, storage_snapshot, is_part_on_remote_disk, - predict_block_size_bytes, - column_names, virtual_column_names, prewhere_info, - actions_settings, reader_settings, per_part_params); - if (std::ranges::count(is_part_on_remote_disk, true)) { const auto & settings = context_->getSettingsRef(); @@ -73,8 +64,11 @@ MergeTreeReadPool::MergeTreeReadPool( size_t total_marks = 0; for (const auto & part : parts_ranges) { - total_compressed_bytes += getApproxSizeOfPart( - *part.data_part, prewhere_info ? prewhere_info->prewhere_actions->getRequiredColumnsNames() : column_names_); + const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info + ? prewhere_info->prewhere_actions->getRequiredColumnsNames() + : column_names_; + + total_compressed_bytes += getApproxSizeOfPart(*part.data_part, columns); total_marks += part.getMarksCount(); } @@ -83,118 +77,60 @@ MergeTreeReadPool::MergeTreeReadPool( const auto min_bytes_per_task = settings.merge_tree_min_bytes_per_task_for_remote_reading; const auto avg_mark_bytes = std::max(total_compressed_bytes / total_marks, 1); /// We're taking min here because number of tasks shouldn't be too low - it will make task stealing impossible. - const auto heuristic_min_marks = std::min(total_marks / threads_, min_bytes_per_task / avg_mark_bytes); + const auto heuristic_min_marks = std::min(total_marks / pool_settings.threads, min_bytes_per_task / avg_mark_bytes); + if (heuristic_min_marks > min_marks_for_concurrent_read) - { min_marks_for_concurrent_read = heuristic_min_marks; - } } } - fillPerThreadInfo(threads_, sum_marks_, per_part_sum_marks, parts_ranges); + fillPerThreadInfo(pool_settings.threads, pool_settings.sum_marks); } -std::vector MergeTreeReadPool::fillPerPartInfo( - const RangesInDataParts & parts, - const StorageSnapshotPtr & storage_snapshot, - std::vector & is_part_on_remote_disk, - bool & predict_block_size_bytes, - const Names & column_names, - const Names & virtual_column_names, - const PrewhereInfoPtr & prewhere_info, - const ExpressionActionsSettings & actions_settings, - const MergeTreeReaderSettings & reader_settings, - std::vector & per_part_params) -{ - std::vector per_part_sum_marks; - Block sample_block = storage_snapshot->metadata->getSampleBlock(); - is_part_on_remote_disk.resize(parts.size()); - - for (const auto i : collections::range(0, parts.size())) - { - const auto & part = parts[i]; -#ifndef NDEBUG - assertSortedAndNonIntersecting(part.ranges); -#endif - - bool part_on_remote_disk = part.data_part->isStoredOnRemoteDisk(); - is_part_on_remote_disk[i] = part_on_remote_disk; - - /// Read marks for every data part. - size_t sum_marks = 0; - for (const auto & range : part.ranges) - sum_marks += range.end - range.begin; - - per_part_sum_marks.push_back(sum_marks); - - auto & per_part = per_part_params.emplace_back(); - per_part.data_part = part; - - LoadedMergeTreeDataPartInfoForReader part_info(part.data_part, part.alter_conversions); - auto task_columns = getReadTaskColumns( - part_info, storage_snapshot, column_names, virtual_column_names, - prewhere_info, actions_settings, - reader_settings, /*with_subcolumns=*/ true); - - auto size_predictor = !predict_block_size_bytes ? nullptr - : IMergeTreeSelectAlgorithm::getSizePredictor(part.data_part, task_columns, sample_block); - - per_part.size_predictor = std::move(size_predictor); - - /// will be used to distinguish between PREWHERE and WHERE columns when applying filter - const auto & required_column_names = task_columns.columns.getNames(); - per_part.column_name_set = {required_column_names.begin(), required_column_names.end()}; - per_part.task_columns = std::move(task_columns); - } - - return per_part_sum_marks; -} - -MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t thread) +MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t task_idx, MergeTreeReadTask * previous_task) { const std::lock_guard lock{mutex}; /// If number of threads was lowered due to backoff, then will assign work only for maximum 'backoff_state.current_threads' threads. - if (thread >= backoff_state.current_threads) + if (task_idx >= backoff_state.current_threads) return nullptr; if (remaining_thread_tasks.empty()) return nullptr; - const auto tasks_remaining_for_this_thread = !threads_tasks[thread].sum_marks_in_parts.empty(); - if (!tasks_remaining_for_this_thread && do_not_steal_tasks) + const auto tasks_remaining_for_this_thread = !threads_tasks[task_idx].sum_marks_in_parts.empty(); + if (!tasks_remaining_for_this_thread && pool_settings.do_not_steal_tasks) return nullptr; /// Steal task if nothing to do and it's not prohibited - auto thread_idx = thread; + auto thread_idx = task_idx; if (!tasks_remaining_for_this_thread) { auto it = remaining_thread_tasks.lower_bound(backoff_state.current_threads); // Grab the entire tasks of a thread which is killed by backoff if (it != remaining_thread_tasks.end()) { - threads_tasks[thread] = std::move(threads_tasks[*it]); + threads_tasks[task_idx] = std::move(threads_tasks[*it]); remaining_thread_tasks.erase(it); - remaining_thread_tasks.insert(thread); + remaining_thread_tasks.insert(task_idx); } else // Try steal tasks from the next thread { - it = remaining_thread_tasks.upper_bound(thread); + it = remaining_thread_tasks.upper_bound(task_idx); if (it == remaining_thread_tasks.end()) it = remaining_thread_tasks.begin(); thread_idx = *it; } } + auto & thread_tasks = threads_tasks[thread_idx]; - auto & thread_task = thread_tasks.parts_and_ranges.back(); - const auto part_idx = thread_task.part_idx; - auto & part = per_part_params[part_idx].data_part; + const auto part_idx = thread_task.part_idx; auto & marks_in_part = thread_tasks.sum_marks_in_parts.back(); size_t need_marks; - if (is_part_on_remote_disk[part_idx] && !merge_tree_use_const_size_tasks_for_remote_reading) + if (is_part_on_remote_disk[part_idx] && !pool_settings.use_const_size_tasks_for_remote_reading) need_marks = marks_in_part; else /// Get whole part to read if it is small enough. need_marks = std::min(marks_in_part, min_marks_for_concurrent_read); @@ -237,28 +173,12 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t thread) } } - const auto & per_part = per_part_params[part_idx]; - auto curr_task_size_predictor = !per_part.size_predictor ? nullptr - : std::make_unique(*per_part.size_predictor); /// make a copy - - return std::make_unique( - part.data_part, - part.alter_conversions, - ranges_to_get_from_part, - part.part_index_in_query, - per_part.column_name_set, - per_part.task_columns, - std::move(curr_task_size_predictor)); -} - -Block MergeTreeReadPool::getHeader() const -{ - return storage_snapshot->getSampleBlockForColumns(column_names); + return createTask(per_part_infos[part_idx], std::move(ranges_to_get_from_part), previous_task); } void MergeTreeReadPool::profileFeedback(ReadBufferFromFileBase::ProfileInfo info) { - if (backoff_settings.min_read_latency_ms == 0 || do_not_steal_tasks) + if (backoff_settings.min_read_latency_ms == 0 || pool_settings.do_not_steal_tasks) return; if (info.nanoseconds < backoff_settings.min_read_latency_ms * 1000000) @@ -295,13 +215,10 @@ void MergeTreeReadPool::profileFeedback(ReadBufferFromFileBase::ProfileInfo info LOG_DEBUG(log, "Will lower number of threads to {}", backoff_state.current_threads); } - -void MergeTreeReadPool::fillPerThreadInfo( - size_t threads, size_t sum_marks, std::vector per_part_sum_marks, - const RangesInDataParts & parts) +void MergeTreeReadPool::fillPerThreadInfo(size_t threads, size_t sum_marks) { threads_tasks.resize(threads); - if (parts.empty()) + if (parts_ranges.empty()) return; struct PartInfo @@ -314,17 +231,19 @@ void MergeTreeReadPool::fillPerThreadInfo( using PartsInfo = std::vector; std::queue parts_queue; + auto per_part_sum_marks = getPerPartSumMarks(); + { /// Group parts by disk name. /// We try minimize the number of threads concurrently read from the same disk. /// It improves the performance for JBOD architecture. std::map> parts_per_disk; - for (size_t i = 0; i < parts.size(); ++i) + for (size_t i = 0; i < parts_ranges.size(); ++i) { - PartInfo part_info{parts[i], per_part_sum_marks[i], i}; - if (parts[i].data_part->isStoredOnDisk()) - parts_per_disk[parts[i].data_part->getDataPartStorage().getDiskName()].push_back(std::move(part_info)); + PartInfo part_info{parts_ranges[i], per_part_sum_marks[i], i}; + if (parts_ranges[i].data_part->isStoredOnDisk()) + parts_per_disk[parts_ranges[i].data_part->getDataPartStorage().getDiskName()].push_back(std::move(part_info)); else parts_per_disk[""].push_back(std::move(part_info)); } @@ -344,7 +263,7 @@ void MergeTreeReadPool::fillPerThreadInfo( while (need_marks > 0 && !parts_queue.empty()) { auto & current_parts = parts_queue.front(); - RangesInDataPart & part = current_parts.back().part; + auto & part_with_ranges = current_parts.back().part; size_t & marks_in_part = current_parts.back().sum_marks; const auto part_idx = current_parts.back().part_idx; @@ -364,7 +283,7 @@ void MergeTreeReadPool::fillPerThreadInfo( /// Get whole part to read if it is small enough. if (marks_in_part <= need_marks) { - ranges_to_get_from_part = part.ranges; + ranges_to_get_from_part = part_with_ranges.ranges; marks_in_ranges = marks_in_part; need_marks -= marks_in_part; @@ -377,10 +296,10 @@ void MergeTreeReadPool::fillPerThreadInfo( /// Loop through part ranges. while (need_marks > 0) { - if (part.ranges.empty()) + if (part_with_ranges.ranges.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected end of ranges while spreading marks among threads"); - MarkRange & range = part.ranges.front(); + MarkRange & range = part_with_ranges.ranges.front(); const size_t marks_in_range = range.end - range.begin; const size_t marks_to_get_from_range = std::min(marks_in_range, need_marks); @@ -390,11 +309,11 @@ void MergeTreeReadPool::fillPerThreadInfo( marks_in_part -= marks_to_get_from_range; need_marks -= marks_to_get_from_range; if (range.begin == range.end) - part.ranges.pop_front(); + part_with_ranges.ranges.pop_front(); } } - threads_tasks[i].parts_and_ranges.push_back({ part_idx, ranges_to_get_from_part }); + threads_tasks[i].parts_and_ranges.push_back({part_idx, ranges_to_get_from_part}); threads_tasks[i].sum_marks_in_parts.push_back(marks_in_ranges); if (marks_in_ranges != 0) remaining_thread_tasks.insert(i); @@ -413,152 +332,4 @@ void MergeTreeReadPool::fillPerThreadInfo( } } - -MergeTreeReadPoolParallelReplicas::~MergeTreeReadPoolParallelReplicas() = default; - - -Block MergeTreeReadPoolParallelReplicas::getHeader() const -{ - return storage_snapshot->getSampleBlockForColumns(extension.columns_to_read); -} - -MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t thread) -{ - /// This parameter is needed only to satisfy the interface - UNUSED(thread); - - std::lock_guard lock(mutex); - - if (no_more_tasks_available) - return nullptr; - - if (buffered_ranges.empty()) - { - auto result = extension.callback(ParallelReadRequest( - CoordinationMode::Default, - extension.number_of_current_replica, - min_marks_for_concurrent_read * threads, - /// For Default coordination mode we don't need to pass part names. - RangesInDataPartsDescription{})); - - if (!result || result->finish) - { - no_more_tasks_available = true; - return nullptr; - } - - buffered_ranges = std::move(result->description); - } - - if (buffered_ranges.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No tasks to read. This is a bug"); - - auto & current_task = buffered_ranges.front(); - - RangesInDataPart part; - size_t part_idx = 0; - for (size_t index = 0; index < per_part_params.size(); ++index) - { - auto & other_part = per_part_params[index]; - if (other_part.data_part.data_part->info == current_task.info) - { - part = other_part.data_part; - part_idx = index; - break; - } - } - - MarkRanges ranges_to_read; - size_t current_sum_marks = 0; - while (current_sum_marks < min_marks_for_concurrent_read && !current_task.ranges.empty()) - { - auto diff = min_marks_for_concurrent_read - current_sum_marks; - auto range = current_task.ranges.front(); - if (range.getNumberOfMarks() > diff) - { - auto new_range = range; - new_range.end = range.begin + diff; - range.begin += diff; - - current_task.ranges.front() = range; - ranges_to_read.push_back(new_range); - current_sum_marks += new_range.getNumberOfMarks(); - continue; - } - - ranges_to_read.push_back(range); - current_sum_marks += range.getNumberOfMarks(); - current_task.ranges.pop_front(); - } - - if (current_task.ranges.empty()) - buffered_ranges.pop_front(); - - const auto & per_part = per_part_params[part_idx]; - - auto curr_task_size_predictor - = !per_part.size_predictor ? nullptr : std::make_unique(*per_part.size_predictor); /// make a copy - - return std::make_unique( - part.data_part, - part.alter_conversions, - ranges_to_read, - part.part_index_in_query, - per_part.column_name_set, - per_part.task_columns, - std::move(curr_task_size_predictor)); -} - - -MarkRanges MergeTreeInOrderReadPoolParallelReplicas::getNewTask(RangesInDataPartDescription description) -{ - std::lock_guard lock(mutex); - - auto get_from_buffer = [&]() -> std::optional - { - for (auto & desc : buffered_tasks) - { - if (desc.info == description.info && !desc.ranges.empty()) - { - auto result = std::move(desc.ranges); - desc.ranges = MarkRanges{}; - return result; - } - } - return std::nullopt; - }; - - if (auto result = get_from_buffer(); result) - return result.value(); - - if (no_more_tasks) - return {}; - - auto response = extension.callback(ParallelReadRequest( - mode, - extension.number_of_current_replica, - min_marks_for_concurrent_read * request.size(), - request - )); - - if (!response || response->description.empty() || response->finish) - { - no_more_tasks = true; - return {}; - } - - /// Fill the buffer - for (size_t i = 0; i < request.size(); ++i) - { - auto & new_ranges = response->description[i].ranges; - auto & old_ranges = buffered_tasks[i].ranges; - std::move(new_ranges.begin(), new_ranges.end(), std::back_inserter(old_ranges)); - } - - if (auto result = get_from_buffer(); result) - return result.value(); - - return {}; -} - } diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index 68d5438cb3d..3a1af947cae 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -1,38 +1,30 @@ #pragma once - +#include #include -#include -#include #include #include #include -#include #include #include -#include - #include namespace DB { -/** Provides read tasks for MergeTreeThreadSelectProcessor`s in fine-grained batches, allowing for more +/** Provides read tasks for MergeTreeThreadSelectAlgorithm in fine-grained batches, allowing for more * uniform distribution of work amongst multiple threads. All parts and their ranges are divided into `threads` * workloads with at most `sum_marks / threads` marks. Then, threads are performing reads from these workloads * in "sequential" manner, requesting work in small batches. As soon as some thread has exhausted * it's workload, it either is signaled that no more work is available (`do_not_steal_tasks == false`) or * continues taking small batches from other threads' workloads (`do_not_steal_tasks == true`). */ -class MergeTreeReadPool : public IMergeTreeReadPool +class MergeTreeReadPool : public MergeTreeReadPoolBase { public: struct BackoffSettings; MergeTreeReadPool( - size_t threads_, - size_t sum_marks_, - size_t min_marks_for_concurrent_read_, RangesInDataParts && parts_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, @@ -40,12 +32,14 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const Names & virtual_column_names_, - ContextPtr context_, - bool do_not_steal_tasks_ = false); + const PoolSettings & settings_, + const ContextPtr & context_); ~MergeTreeReadPool() override = default; - MergeTreeReadTaskPtr getTask(size_t thread) override; + String getName() const override { return "ReadPool"; } + bool preservesOrderOfRanges() const override { return false; } + MergeTreeReadTaskPtr getTask(size_t task_idx, MergeTreeReadTask * previous_task) override; /** Each worker could call this method and pass information about read performance. * If read performance is too low, pool could decide to lower number of threads: do not assign more tasks to several threads. @@ -53,8 +47,6 @@ public: */ void profileFeedback(ReadBufferFromFileBase::ProfileInfo info) override; - Block getHeader() const override; - /** Pull could dynamically lower (backoff) number of threads, if read operation are too slow. * Settings for that backoff. */ @@ -82,50 +74,12 @@ public: BackoffSettings() : min_read_latency_ms(0) {} }; - struct PerPartParams - { - MergeTreeReadTaskColumns task_columns; - NameSet column_name_set; - MergeTreeBlockSizePredictorPtr size_predictor; - RangesInDataPart data_part; - }; - - static std::vector fillPerPartInfo( - const RangesInDataParts & parts, - const StorageSnapshotPtr & storage_snapshot, - std::vector & is_part_on_remote_disk, - bool & predict_block_size_bytes, - const Names & column_names, - const Names & virtual_column_names, - const PrewhereInfoPtr & prewhere_info, - const ExpressionActionsSettings & actions_settings_, - const MergeTreeReaderSettings & reader_settings_, - std::vector & per_part_params); - private: - void fillPerThreadInfo( - size_t threads, size_t sum_marks, std::vector per_part_sum_marks, - const RangesInDataParts & parts); - - /// Initialized in constructor - StorageSnapshotPtr storage_snapshot; - const Names column_names; - const Names virtual_column_names; - size_t min_marks_for_concurrent_read{0}; - PrewhereInfoPtr prewhere_info; - ExpressionActionsSettings actions_settings; - MergeTreeReaderSettings reader_settings; - RangesInDataParts parts_ranges; - bool predict_block_size_bytes; - bool do_not_steal_tasks; - bool merge_tree_use_const_size_tasks_for_remote_reading = false; - - std::vector per_part_params; - std::vector is_part_on_remote_disk; - - BackoffSettings backoff_settings; + void fillPerThreadInfo(size_t threads, size_t sum_marks); mutable std::mutex mutex; + size_t min_marks_for_concurrent_read = 0; + /// State to track numbers of slow reads. struct BackoffState { @@ -135,16 +89,10 @@ private: explicit BackoffState(size_t threads) : current_threads(threads) {} }; + + const BackoffSettings backoff_settings; BackoffState backoff_state; - struct Part - { - MergeTreeData::DataPartPtr data_part; - size_t part_index_in_query; - }; - - std::vector parts_with_idx; - struct ThreadTask { struct PartIndexAndRange @@ -159,123 +107,8 @@ private: std::vector threads_tasks; std::set remaining_thread_tasks; + Poco::Logger * log = &Poco::Logger::get("MergeTreeReadPool"); - }; -class MergeTreeReadPoolParallelReplicas : public IMergeTreeReadPool -{ -public: - MergeTreeReadPoolParallelReplicas( - StorageSnapshotPtr storage_snapshot_, - size_t threads_, - ParallelReadingExtension extension_, - const RangesInDataParts & parts_, - const PrewhereInfoPtr & prewhere_info_, - const ExpressionActionsSettings & actions_settings_, - const MergeTreeReaderSettings & reader_settings_, - const Names & column_names_, - const Names & virtual_column_names_, - size_t min_marks_for_concurrent_read_) - : extension(extension_) - , threads(threads_) - , prewhere_info(prewhere_info_) - , actions_settings(actions_settings_) - , reader_settings(reader_settings_) - , storage_snapshot(storage_snapshot_) - , min_marks_for_concurrent_read(min_marks_for_concurrent_read_) - , column_names(column_names_) - , virtual_column_names(virtual_column_names_) - , parts_ranges(std::move(parts_)) - { - MergeTreeReadPool::fillPerPartInfo( - parts_ranges, storage_snapshot, is_part_on_remote_disk, - predict_block_size_bytes, column_names, virtual_column_names, prewhere_info, - actions_settings, reader_settings, per_part_params); - - extension.all_callback(InitialAllRangesAnnouncement( - CoordinationMode::Default, - parts_ranges.getDescriptions(), - extension.number_of_current_replica - )); - } - - ~MergeTreeReadPoolParallelReplicas() override; - - Block getHeader() const override; - - MergeTreeReadTaskPtr getTask(size_t thread) override; - - void profileFeedback(ReadBufferFromFileBase::ProfileInfo) override {} - -private: - ParallelReadingExtension extension; - - RangesInDataPartsDescription buffered_ranges; - size_t threads; - bool no_more_tasks_available{false}; - Poco::Logger * log = &Poco::Logger::get("MergeTreeReadPoolParallelReplicas"); - - std::mutex mutex; - - PrewhereInfoPtr prewhere_info; - ExpressionActionsSettings actions_settings; - MergeTreeReaderSettings reader_settings; - StorageSnapshotPtr storage_snapshot; - size_t min_marks_for_concurrent_read; - const Names column_names; - const Names virtual_column_names; - RangesInDataParts parts_ranges; - - bool predict_block_size_bytes = false; - std::vector is_part_on_remote_disk; - std::vector per_part_params; -}; - -using MergeTreeReadPoolParallelReplicasPtr = std::shared_ptr; - - -class MergeTreeInOrderReadPoolParallelReplicas : private boost::noncopyable -{ -public: - MergeTreeInOrderReadPoolParallelReplicas( - RangesInDataParts parts_, - ParallelReadingExtension extension_, - CoordinationMode mode_, - size_t min_marks_for_concurrent_read_) - : parts_ranges(parts_) - , extension(extension_) - , mode(mode_) - , min_marks_for_concurrent_read(min_marks_for_concurrent_read_) - { - for (const auto & part : parts_ranges) - request.push_back({part.data_part->info, MarkRanges{}}); - - for (const auto & part : parts_ranges) - buffered_tasks.push_back({part.data_part->info, MarkRanges{}}); - - extension.all_callback(InitialAllRangesAnnouncement( - mode, - parts_ranges.getDescriptions(), - extension.number_of_current_replica - )); - } - - MarkRanges getNewTask(RangesInDataPartDescription description); - - - RangesInDataParts parts_ranges; - ParallelReadingExtension extension; - CoordinationMode mode; - size_t min_marks_for_concurrent_read{0}; - - bool no_more_tasks{false}; - RangesInDataPartsDescription request; - RangesInDataPartsDescription buffered_tasks; - - std::mutex mutex; -}; - -using MergeTreeInOrderReadPoolParallelReplicasPtr = std::shared_ptr; - } diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp new file mode 100644 index 00000000000..446baccd961 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -0,0 +1,149 @@ +#include +#include +#include + +namespace DB +{ + +MergeTreeReadPoolBase::MergeTreeReadPoolBase( + RangesInDataParts && parts_, + const StorageSnapshotPtr & storage_snapshot_, + const PrewhereInfoPtr & prewhere_info_, + const ExpressionActionsSettings & actions_settings_, + const MergeTreeReaderSettings & reader_settings_, + const Names & column_names_, + const Names & virtual_column_names_, + const PoolSettings & pool_settings_, + const ContextPtr & context_) + : parts_ranges(std::move(parts_)) + , storage_snapshot(storage_snapshot_) + , prewhere_info(prewhere_info_) + , actions_settings(actions_settings_) + , reader_settings(reader_settings_) + , column_names(column_names_) + , virtual_column_names(virtual_column_names_) + , pool_settings(pool_settings_) + , owned_mark_cache(context_->getGlobalContext()->getMarkCache()) + , owned_uncompressed_cache(pool_settings_.use_uncompressed_cache ? context_->getGlobalContext()->getUncompressedCache() : nullptr) + , header(storage_snapshot->getSampleBlockForColumns(column_names)) + , profile_callback([this](ReadBufferFromFileBase::ProfileInfo info_) { profileFeedback(info_); }) +{ + fillPerPartInfos(); +} + +void MergeTreeReadPoolBase::fillPerPartInfos() +{ + per_part_infos.reserve(parts_ranges.size()); + is_part_on_remote_disk.reserve(parts_ranges.size()); + + auto sample_block = storage_snapshot->metadata->getSampleBlock(); + + for (const auto & part_with_ranges : parts_ranges) + { +#ifndef NDEBUG + assertSortedAndNonIntersecting(part_with_ranges.ranges); +#endif + + MergeTreeReadTask::Info read_task_info; + + read_task_info.data_part = part_with_ranges.data_part; + read_task_info.part_index_in_query = part_with_ranges.part_index_in_query; + read_task_info.alter_conversions = part_with_ranges.alter_conversions; + + LoadedMergeTreeDataPartInfoForReader part_info(part_with_ranges.data_part, part_with_ranges.alter_conversions); + + read_task_info.task_columns = getReadTaskColumns( + part_info, storage_snapshot, column_names, virtual_column_names, + prewhere_info, actions_settings, + reader_settings, /*with_subcolumns=*/ true); + + if (pool_settings.preferred_block_size_bytes > 0) + { + const auto & result_column_names = read_task_info.task_columns.columns.getNames(); + NameSet all_column_names(result_column_names.begin(), result_column_names.end()); + + for (const auto & pre_columns_per_step : read_task_info.task_columns.pre_columns) + { + const auto & pre_column_names = pre_columns_per_step.getNames(); + all_column_names.insert(pre_column_names.begin(), pre_column_names.end()); + } + + read_task_info.shared_size_predictor = std::make_unique( + read_task_info.data_part, + Names(all_column_names.begin(), all_column_names.end()), + sample_block); + } + + is_part_on_remote_disk.push_back(part_with_ranges.data_part->isStoredOnRemoteDisk()); + per_part_infos.push_back(std::make_shared(std::move(read_task_info))); + } +} + +std::vector MergeTreeReadPoolBase::getPerPartSumMarks() const +{ + std::vector per_part_sum_marks; + per_part_sum_marks.reserve(parts_ranges.size()); + + for (const auto & part_with_ranges : parts_ranges) + { + size_t sum_marks = 0; + for (const auto & range : part_with_ranges.ranges) + sum_marks += range.end - range.begin; + + per_part_sum_marks.push_back(sum_marks); + } + + return per_part_sum_marks; +} + +MergeTreeReadTaskPtr MergeTreeReadPoolBase::createTask( + MergeTreeReadTask::InfoPtr read_info, + MarkRanges ranges, + MergeTreeReadTask * previous_task) const +{ + auto task_size_predictor = read_info->shared_size_predictor + ? std::make_unique(*read_info->shared_size_predictor) + : nullptr; /// make a copy + + auto get_part_name = [](const auto & task_info) -> const String & + { + return task_info.data_part->isProjectionPart() ? task_info.data_part->getParentPart()->name : task_info.data_part->name; + }; + + auto extras = getExtras(); + MergeTreeReadTask::Readers task_readers; + + if (!previous_task) + { + task_readers = MergeTreeReadTask::createReaders(read_info, extras, ranges); + } + else if (get_part_name(previous_task->getInfo()) != get_part_name(*read_info)) + { + extras.value_size_map = previous_task->getMainReader().getAvgValueSizeHints(); + task_readers = MergeTreeReadTask::createReaders(read_info, extras, ranges); + } + else + { + task_readers = previous_task->releaseReaders(); + } + + return std::make_unique( + read_info, + std::move(task_readers), + std::move(ranges), + std::move(task_size_predictor)); +} + +MergeTreeReadTask::Extras MergeTreeReadPoolBase::getExtras() const +{ + return + { + .uncompressed_cache = owned_uncompressed_cache.get(), + .mark_cache = owned_mark_cache.get(), + .reader_settings = reader_settings, + .storage_snapshot = storage_snapshot, + .profile_callback = profile_callback, + }; +} + +} diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.h b/src/Storages/MergeTree/MergeTreeReadPoolBase.h new file mode 100644 index 00000000000..0081063cd37 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.h @@ -0,0 +1,67 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class MergeTreeReadPoolBase : public IMergeTreeReadPool +{ +public: + struct PoolSettings + { + size_t threads = 0; + size_t sum_marks = 0; + size_t min_marks_for_concurrent_read = 0; + size_t preferred_block_size_bytes = 0; + + bool use_uncompressed_cache = false; + bool do_not_steal_tasks = false; + bool use_const_size_tasks_for_remote_reading = false; + }; + + MergeTreeReadPoolBase( + RangesInDataParts && parts_, + const StorageSnapshotPtr & storage_snapshot_, + const PrewhereInfoPtr & prewhere_info_, + const ExpressionActionsSettings & actions_settings_, + const MergeTreeReaderSettings & reader_settings_, + const Names & column_names_, + const Names & virtual_column_names_, + const PoolSettings & settings_, + const ContextPtr & context_); + + Block getHeader() const override { return header; } + +protected: + /// Initialized in constructor + const RangesInDataParts parts_ranges; + const StorageSnapshotPtr storage_snapshot; + const PrewhereInfoPtr prewhere_info; + const ExpressionActionsSettings actions_settings; + const MergeTreeReaderSettings reader_settings; + const Names column_names; + const Names virtual_column_names; + const PoolSettings pool_settings; + const MarkCachePtr owned_mark_cache; + const UncompressedCachePtr owned_uncompressed_cache; + const Block header; + + void fillPerPartInfos(); + std::vector getPerPartSumMarks() const; + + MergeTreeReadTaskPtr createTask( + MergeTreeReadTask::InfoPtr read_info, + MarkRanges ranges, + MergeTreeReadTask * previous_task) const; + + MergeTreeReadTask::Extras getExtras() const; + + std::vector per_part_infos; + std::vector is_part_on_remote_disk; + + ReadBufferFromFileBase::ProfileCallback profile_callback; +}; + +} diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp new file mode 100644 index 00000000000..1b621ad5055 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp @@ -0,0 +1,73 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( + bool has_limit_below_one_block_, + MergeTreeReadType read_type_, + RangesInDataParts parts_, + const StorageSnapshotPtr & storage_snapshot_, + const PrewhereInfoPtr & prewhere_info_, + const ExpressionActionsSettings & actions_settings_, + const MergeTreeReaderSettings & reader_settings_, + const Names & column_names_, + const Names & virtual_column_names_, + const PoolSettings & settings_, + const ContextPtr & context_) + : MergeTreeReadPoolBase( + std::move(parts_), + storage_snapshot_, + prewhere_info_, + actions_settings_, + reader_settings_, + column_names_, + virtual_column_names_, + settings_, + context_) + , has_limit_below_one_block(has_limit_below_one_block_) + , read_type(read_type_) +{ + per_part_mark_ranges.reserve(parts_ranges.size()); + for (const auto & part_with_ranges : parts_ranges) + per_part_mark_ranges.push_back(part_with_ranges.ranges); +} + +MergeTreeReadTaskPtr MergeTreeReadPoolInOrder::getTask(size_t task_idx, MergeTreeReadTask * previous_task) +{ + if (task_idx >= per_part_infos.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Requested task with idx {}, but there are only {} parts", + task_idx, per_part_infos.size()); + + auto & all_mark_ranges = per_part_mark_ranges[task_idx]; + if (all_mark_ranges.empty()) + return nullptr; + + MarkRanges mark_ranges_for_task; + if (read_type == MergeTreeReadType::InReverseOrder) + { + /// Read ranges from right to left. + mark_ranges_for_task.emplace_back(std::move(all_mark_ranges.back())); + all_mark_ranges.pop_back(); + } + else if (has_limit_below_one_block) + { + /// If we need to read few rows, set one range per task to reduce number of read data. + mark_ranges_for_task.emplace_back(std::move(all_mark_ranges.front())); + all_mark_ranges.pop_front(); + } + else + { + mark_ranges_for_task = std::move(all_mark_ranges); + } + + return createTask(per_part_infos[task_idx], std::move(mark_ranges_for_task), previous_task); +} + +} diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h new file mode 100644 index 00000000000..d9cc1ba4984 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h @@ -0,0 +1,35 @@ +#pragma once +#include + +namespace DB +{ + +class MergeTreeReadPoolInOrder : public MergeTreeReadPoolBase +{ +public: + MergeTreeReadPoolInOrder( + bool has_limit_below_one_block_, + MergeTreeReadType read_type_, + RangesInDataParts parts_, + const StorageSnapshotPtr & storage_snapshot_, + const PrewhereInfoPtr & prewhere_info_, + const ExpressionActionsSettings & actions_settings_, + const MergeTreeReaderSettings & reader_settings_, + const Names & column_names_, + const Names & virtual_column_names_, + const PoolSettings & settings_, + const ContextPtr & context_); + + String getName() const override { return "ReadPoolInOrder"; } + bool preservesOrderOfRanges() const override { return true; } + MergeTreeReadTaskPtr getTask(size_t task_idx, MergeTreeReadTask * previous_task) override; + void profileFeedback(ReadBufferFromFileBase::ProfileInfo) override {} + +private: + const bool has_limit_below_one_block; + const MergeTreeReadType read_type; + + std::vector per_part_mark_ranges; +}; + +} diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp new file mode 100644 index 00000000000..e61ddf0d122 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -0,0 +1,110 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( + ParallelReadingExtension extension_, + RangesInDataParts && parts_, + const StorageSnapshotPtr & storage_snapshot_, + const PrewhereInfoPtr & prewhere_info_, + const ExpressionActionsSettings & actions_settings_, + const MergeTreeReaderSettings & reader_settings_, + const Names & column_names_, + const Names & virtual_column_names_, + const PoolSettings & settings_, + const ContextPtr & context_) + : MergeTreeReadPoolBase( + std::move(parts_), + storage_snapshot_, + prewhere_info_, + actions_settings_, + reader_settings_, + column_names_, + virtual_column_names_, + settings_, + context_) + , extension(std::move(extension_)) +{ + extension.all_callback(InitialAllRangesAnnouncement( + CoordinationMode::Default, + parts_ranges.getDescriptions(), + extension.number_of_current_replica + )); +} + +MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t /*task_idx*/, MergeTreeReadTask * previous_task) +{ + std::lock_guard lock(mutex); + + if (no_more_tasks_available) + return nullptr; + + if (buffered_ranges.empty()) + { + auto result = extension.callback(ParallelReadRequest( + CoordinationMode::Default, + extension.number_of_current_replica, + pool_settings.min_marks_for_concurrent_read * pool_settings.threads, + /// For Default coordination mode we don't need to pass part names. + RangesInDataPartsDescription{})); + + if (!result || result->finish) + { + no_more_tasks_available = true; + return nullptr; + } + + buffered_ranges = std::move(result->description); + } + + if (buffered_ranges.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No tasks to read. This is a bug"); + + auto & current_task = buffered_ranges.front(); + + size_t part_idx = 0; + for (size_t index = 0; index < per_part_infos.size(); ++index) + { + if (per_part_infos[index]->data_part->info == current_task.info) + { + part_idx = index; + break; + } + } + + MarkRanges ranges_to_read; + size_t current_sum_marks = 0; + while (current_sum_marks < pool_settings.min_marks_for_concurrent_read && !current_task.ranges.empty()) + { + auto diff = pool_settings.min_marks_for_concurrent_read - current_sum_marks; + auto range = current_task.ranges.front(); + if (range.getNumberOfMarks() > diff) + { + auto new_range = range; + new_range.end = range.begin + diff; + range.begin += diff; + + current_task.ranges.front() = range; + ranges_to_read.push_back(new_range); + current_sum_marks += new_range.getNumberOfMarks(); + continue; + } + + ranges_to_read.push_back(range); + current_sum_marks += range.getNumberOfMarks(); + current_task.ranges.pop_front(); + } + + if (current_task.ranges.empty()) + buffered_ranges.pop_front(); + + return createTask(per_part_infos[part_idx], std::move(ranges_to_read), previous_task); +} + +} diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h new file mode 100644 index 00000000000..08020565ec4 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h @@ -0,0 +1,39 @@ +#pragma once +#include +#include + +namespace DB +{ + +class MergeTreeReadPoolParallelReplicas : public MergeTreeReadPoolBase +{ +public: + MergeTreeReadPoolParallelReplicas( + ParallelReadingExtension extension_, + RangesInDataParts && parts_, + const StorageSnapshotPtr & storage_snapshot_, + const PrewhereInfoPtr & prewhere_info_, + const ExpressionActionsSettings & actions_settings_, + const MergeTreeReaderSettings & reader_settings_, + const Names & column_names_, + const Names & virtual_column_names_, + const PoolSettings & settings_, + const ContextPtr & context_); + + ~MergeTreeReadPoolParallelReplicas() override = default; + + String getName() const override { return "ReadPoolParallelReplicas"; } + bool preservesOrderOfRanges() const override { return false; } + void profileFeedback(ReadBufferFromFileBase::ProfileInfo) override {} + MergeTreeReadTaskPtr getTask(size_t task_idx, MergeTreeReadTask * previous_task) override; + +private: + mutable std::mutex mutex; + + const ParallelReadingExtension extension; + RangesInDataPartsDescription buffered_ranges; + bool no_more_tasks_available{false}; + Poco::Logger * log = &Poco::Logger::get("MergeTreeReadPoolParallelReplicas"); +}; + +} diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp new file mode 100644 index 00000000000..a822a517933 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp @@ -0,0 +1,106 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrder( + ParallelReadingExtension extension_, + CoordinationMode mode_, + RangesInDataParts parts_, + const StorageSnapshotPtr & storage_snapshot_, + const PrewhereInfoPtr & prewhere_info_, + const ExpressionActionsSettings & actions_settings_, + const MergeTreeReaderSettings & reader_settings_, + const Names & column_names_, + const Names & virtual_column_names_, + const PoolSettings & settings_, + const ContextPtr & context_) + : MergeTreeReadPoolBase( + std::move(parts_), + storage_snapshot_, + prewhere_info_, + actions_settings_, + reader_settings_, + column_names_, + virtual_column_names_, + settings_, + context_) + , extension(std::move(extension_)) + , mode(mode_) +{ + for (const auto & part : parts_ranges) + request.push_back({part.data_part->info, MarkRanges{}}); + + for (const auto & part : parts_ranges) + buffered_tasks.push_back({part.data_part->info, MarkRanges{}}); + + extension.all_callback(InitialAllRangesAnnouncement( + mode, + parts_ranges.getDescriptions(), + extension.number_of_current_replica + )); +} + +MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicasInOrder::getTask(size_t task_idx, MergeTreeReadTask * previous_task) +{ + std::lock_guard lock(mutex); + + if (task_idx >= per_part_infos.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Requested task with idx {}, but there are only {} parts", + task_idx, per_part_infos.size()); + + const auto & part_info = per_part_infos[task_idx]->data_part->info; + auto get_from_buffer = [&]() -> std::optional + { + for (auto & desc : buffered_tasks) + { + if (desc.info == part_info && !desc.ranges.empty()) + { + auto result = std::move(desc.ranges); + desc.ranges = MarkRanges{}; + return result; + } + } + return std::nullopt; + }; + + if (auto result = get_from_buffer(); result) + return createTask(per_part_infos[task_idx], std::move(*result), previous_task); + + if (no_more_tasks) + return nullptr; + + auto response = extension.callback(ParallelReadRequest( + mode, + extension.number_of_current_replica, + pool_settings.min_marks_for_concurrent_read * request.size(), + request + )); + + if (!response || response->description.empty() || response->finish) + { + no_more_tasks = true; + return nullptr; + } + + /// Fill the buffer + for (size_t i = 0; i < request.size(); ++i) + { + auto & new_ranges = response->description[i].ranges; + auto & old_ranges = buffered_tasks[i].ranges; + std::move(new_ranges.begin(), new_ranges.end(), std::back_inserter(old_ranges)); + } + + if (auto result = get_from_buffer(); result) + return createTask(per_part_infos[task_idx], std::move(*result), previous_task); + + return nullptr; +} + +} diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h new file mode 100644 index 00000000000..3e5f8f5dfba --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h @@ -0,0 +1,40 @@ +#pragma once +#include +#include + +namespace DB +{ + +class MergeTreeReadPoolParallelReplicasInOrder : public MergeTreeReadPoolBase +{ +public: + MergeTreeReadPoolParallelReplicasInOrder( + ParallelReadingExtension extension_, + CoordinationMode mode_, + RangesInDataParts parts_, + const StorageSnapshotPtr & storage_snapshot_, + const PrewhereInfoPtr & prewhere_info_, + const ExpressionActionsSettings & actions_settings_, + const MergeTreeReaderSettings & reader_settings_, + const Names & column_names_, + const Names & virtual_column_names_, + const PoolSettings & settings_, + const ContextPtr & context_); + + String getName() const override { return "ReadPoolParallelReplicasInOrder"; } + bool preservesOrderOfRanges() const override { return true; } + void profileFeedback(ReadBufferFromFileBase::ProfileInfo) override {} + MergeTreeReadTaskPtr getTask(size_t task_idx, MergeTreeReadTask * previous_task) override; + +private: + const ParallelReadingExtension extension; + const CoordinationMode mode; + + bool no_more_tasks{false}; + RangesInDataPartsDescription request; + RangesInDataPartsDescription buffered_tasks; + + mutable std::mutex mutex; +}; + +}; diff --git a/src/Storages/MergeTree/MergeTreeReadTask.cpp b/src/Storages/MergeTree/MergeTreeReadTask.cpp new file mode 100644 index 00000000000..9593a1fb1f3 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeReadTask.cpp @@ -0,0 +1,193 @@ +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +String MergeTreeReadTask::Columns::dump() const +{ + WriteBufferFromOwnString s; + for (size_t i = 0; i < pre_columns.size(); ++i) + { + s << "STEP " << i << ": " << pre_columns[i].toString() << "\n"; + } + s << "COLUMNS: " << columns.toString() << "\n"; + return s.str(); +} + +MergeTreeReadTask::MergeTreeReadTask( + InfoPtr info_, + Readers readers_, + MarkRanges mark_ranges_, + MergeTreeBlockSizePredictorPtr size_predictor_) + : info(std::move(info_)) + , readers(std::move(readers_)) + , mark_ranges(std::move(mark_ranges_)) + , size_predictor(std::move(size_predictor_)) +{ +} + +MergeTreeReadTask::Readers MergeTreeReadTask::createReaders( + const InfoPtr & read_info, const Extras & extras, const MarkRanges & ranges) +{ + Readers new_readers; + + auto create_reader = [&](const NamesAndTypesList & columns_to_read) + { + return read_info->data_part->getReader( + columns_to_read, extras.storage_snapshot, ranges, + extras.uncompressed_cache, extras.mark_cache, + read_info->alter_conversions, extras.reader_settings, extras.value_size_map, extras.profile_callback); + }; + + new_readers.main = create_reader(read_info->task_columns.columns); + + /// Add lightweight delete filtering step + if (extras.reader_settings.apply_deleted_mask && read_info->data_part->hasLightweightDelete()) + new_readers.prewhere.push_back(create_reader({LightweightDeleteDescription::FILTER_COLUMN})); + + for (const auto & pre_columns_per_step : read_info->task_columns.pre_columns) + new_readers.prewhere.push_back(create_reader(pre_columns_per_step)); + + return new_readers; +} + +MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders( + const Readers & task_readers, + const PrewhereExprInfo & prewhere_actions, + const Names & non_const_virtual_column_names) +{ + MergeTreeReadTask::RangeReaders new_range_readers; + if (prewhere_actions.steps.size() != task_readers.prewhere.size()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "PREWHERE steps count mismatch, actions: {}, readers: {}", + prewhere_actions.steps.size(), task_readers.prewhere.size()); + + MergeTreeRangeReader * prev_reader = nullptr; + bool last_reader = false; + + for (size_t i = 0; i < prewhere_actions.steps.size(); ++i) + { + last_reader = task_readers.main->getColumns().empty() && (i + 1 == prewhere_actions.steps.size()); + + MergeTreeRangeReader current_reader( + task_readers.prewhere[i].get(), + prev_reader, prewhere_actions.steps[i].get(), + last_reader, non_const_virtual_column_names); + + new_range_readers.prewhere.push_back(std::move(current_reader)); + prev_reader = &new_range_readers.prewhere.back(); + } + + if (!last_reader) + { + new_range_readers.main = MergeTreeRangeReader(task_readers.main.get(), prev_reader, nullptr, true, non_const_virtual_column_names); + } + else + { + /// If all columns are read by prewhere range readers than move last prewhere range reader to main. + new_range_readers.main = std::move(new_range_readers.prewhere.back()); + new_range_readers.prewhere.pop_back(); + } + + return new_range_readers; +} + +void MergeTreeReadTask::initializeRangeReaders( + const PrewhereExprInfo & prewhere_actions, + const Names & non_const_virtual_column_names) +{ + if (range_readers.main.isInitialized()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Range reader is already initialized"); + + range_readers = createRangeReaders(readers, prewhere_actions, non_const_virtual_column_names); +} + +UInt64 MergeTreeReadTask::estimateNumRows(const BlockSizeParams & params) const +{ + if (!size_predictor) + return static_cast(params.max_block_size_rows); + + /// Calculates number of rows will be read using preferred_block_size_bytes. + /// Can't be less than avg_index_granularity. + size_t rows_to_read = size_predictor->estimateNumRows(params.preferred_block_size_bytes); + if (!rows_to_read) + return rows_to_read; + + auto total_row_in_current_granule = range_readers.main.numRowsInCurrentGranule(); + rows_to_read = std::max(total_row_in_current_granule, rows_to_read); + + if (params.preferred_max_column_in_block_size_bytes) + { + /// Calculates number of rows will be read using preferred_max_column_in_block_size_bytes. + auto rows_to_read_for_max_size_column = size_predictor->estimateNumRowsForMaxSizeColumn(params.preferred_max_column_in_block_size_bytes); + + double filtration_ratio = std::max(params.min_filtration_ratio, 1.0 - size_predictor->filtered_rows_ratio); + auto rows_to_read_for_max_size_column_with_filtration + = static_cast(rows_to_read_for_max_size_column / filtration_ratio); + + /// If preferred_max_column_in_block_size_bytes is used, number of rows to read can be less than current_index_granularity. + rows_to_read = std::min(rows_to_read, rows_to_read_for_max_size_column_with_filtration); + } + + auto unread_rows_in_current_granule = range_readers.main.numPendingRowsInCurrentGranule(); + if (unread_rows_in_current_granule >= rows_to_read) + return rows_to_read; + + const auto & index_granularity = info->data_part->index_granularity; + return index_granularity.countMarksForRows(range_readers.main.currentMark(), rows_to_read, range_readers.main.numReadRowsInCurrentGranule(), params.min_marks_to_read); +} + +MergeTreeReadTask::BlockAndProgress MergeTreeReadTask::read(const BlockSizeParams & params) +{ + if (size_predictor) + size_predictor->startBlock(); + + UInt64 recommended_rows = estimateNumRows(params); + UInt64 rows_to_read = std::max(static_cast(1), std::min(params.max_block_size_rows, recommended_rows)); + + auto read_result = range_readers.main.read(rows_to_read, mark_ranges); + + /// All rows were filtered. Repeat. + if (read_result.num_rows == 0) + read_result.columns.clear(); + + const auto & sample_block = range_readers.main.getSampleBlock(); + if (read_result.num_rows != 0 && sample_block.columns() != read_result.columns.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent number of columns got from MergeTreeRangeReader. " + "Have {} in sample block and {} columns in list", + toString(sample_block.columns()), toString(read_result.columns.size())); + + /// TODO: check columns have the same types as in header. + UInt64 num_filtered_rows = read_result.numReadRows() - read_result.num_rows; + + size_t num_read_rows = read_result.numReadRows(); + size_t num_read_bytes = read_result.numBytesRead(); + + if (size_predictor) + { + size_predictor->updateFilteredRowsRation(read_result.numReadRows(), num_filtered_rows); + if (!read_result.columns.empty()) + size_predictor->update(sample_block, read_result.columns, read_result.num_rows); + } + + Block block; + if (read_result.num_rows != 0) + block = sample_block.cloneWithColumns(read_result.columns); + + BlockAndProgress res = { + .block = std::move(block), + .row_count = read_result.num_rows, + .num_read_rows = num_read_rows, + .num_read_bytes = num_read_bytes }; + + return res; +} + +} diff --git a/src/Storages/MergeTree/MergeTreeReadTask.h b/src/Storages/MergeTree/MergeTreeReadTask.h new file mode 100644 index 00000000000..8d2f0657fd1 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeReadTask.h @@ -0,0 +1,156 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class UncompressedCache; +class MarkCache; + +struct MergeTreeBlockSizePredictor; +using MergeTreeBlockSizePredictorPtr = std::shared_ptr; + +class IMergeTreeDataPart; +using DataPartPtr = std::shared_ptr; +using MergeTreeReaderPtr = std::unique_ptr; + +enum class MergeTreeReadType +{ + /// By default, read will use MergeTreeReadPool and return pipe with num_streams outputs. + /// If num_streams == 1, will read without pool, in order specified in parts. + Default, + /// Read in sorting key order. + /// Returned pipe will have the number of ports equals to parts.size(). + /// Parameter num_streams_ is ignored in this case. + /// User should add MergingSorted itself if needed. + InOrder, + /// The same as InOrder, but in reverse order. + /// For every part, read ranges and granules from end to begin. Also add ReverseTransform. + InReverseOrder, + /// A special type of reading where every replica + /// talks to a remote coordinator (which is located on the initiator node) + /// and who spreads marks and parts across them. + ParallelReplicas, +}; + +/// A batch of work for MergeTreeSelectProcessor +struct MergeTreeReadTask : private boost::noncopyable +{ +public: + struct Columns + { + /// Column names to read during WHERE + NamesAndTypesList columns; + /// Column names to read during each PREWHERE step + std::vector pre_columns; + + String dump() const; + }; + + struct Info + { + /// Data part which should be read while performing this task + DataPartPtr data_part; + /// For virtual `part_index` virtual column + size_t part_index_in_query; + /// Alter converversionss that should be applied on-fly for part. + AlterConversionsPtr alter_conversions; + /// Column names to read during PREWHERE and WHERE + Columns task_columns; + /// Shared initialized size predictor. It is copied for each new task. + MergeTreeBlockSizePredictorPtr shared_size_predictor; + }; + + using InfoPtr = std::shared_ptr; + + /// Extra params that required for creation of reader. + struct Extras + { + UncompressedCache * uncompressed_cache = nullptr; + MarkCache * mark_cache = nullptr; + MergeTreeReaderSettings reader_settings; + StorageSnapshotPtr storage_snapshot; + IMergeTreeReader::ValueSizeMap value_size_map; + ReadBufferFromFileBase::ProfileCallback profile_callback; + }; + + struct Readers + { + MergeTreeReaderPtr main; + std::vector prewhere; + }; + + struct RangeReaders + { + /// Used to save current range processing status + MergeTreeRangeReader main; + + /// Range readers for multiple filtering steps: row level security, PREWHERE etc. + /// NOTE: we take references to elements and push_back new elements, that's why it is a deque but not a vector + std::deque prewhere; + }; + + struct BlockSizeParams + { + UInt64 max_block_size_rows = DEFAULT_BLOCK_SIZE; + UInt64 preferred_block_size_bytes = 1000000; + UInt64 preferred_max_column_in_block_size_bytes = 0; + UInt64 min_marks_to_read = 0; + double min_filtration_ratio = 0.00001; + }; + + /// The result of reading from task. + struct BlockAndProgress + { + Block block; + size_t row_count = 0; + size_t num_read_rows = 0; + size_t num_read_bytes = 0; + }; + + MergeTreeReadTask(InfoPtr info_, Readers readers_, MarkRanges mark_ranges_, MergeTreeBlockSizePredictorPtr size_predictor_); + + void initializeRangeReaders(const PrewhereExprInfo & prewhere_actions, const Names & non_const_virtual_column_names); + + BlockAndProgress read(const BlockSizeParams & params); + bool isFinished() const { return mark_ranges.empty() && range_readers.main.isCurrentRangeFinished(); } + + const Info & getInfo() const { return *info; } + const MergeTreeRangeReader & getMainRangeReader() const { return range_readers.main; } + const IMergeTreeReader & getMainReader() const { return *readers.main; } + + Readers releaseReaders() { return std::move(readers); } + + static Readers createReaders(const InfoPtr & read_info, const Extras & extras, const MarkRanges & ranges); + static RangeReaders createRangeReaders(const Readers & readers, const PrewhereExprInfo & prewhere_actions, const Names & non_const_virtual_column_names); + +private: + UInt64 estimateNumRows(const BlockSizeParams & params) const; + + /// Shared information required for reading. + InfoPtr info; + + /// Readers for data_part of this task. + /// May be reused and released to the next task. + Readers readers; + + /// Range readers to read mark_ranges from data_part + RangeReaders range_readers; + + /// Ranges to read from data_part + MarkRanges mark_ranges; + + /// Used to satistfy preferred_block_size_bytes limitation + MergeTreeBlockSizePredictorPtr size_predictor; +}; + +using MergeTreeReadTaskPtr = std::unique_ptr; + +} diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index f65e66ff52d..a007249cf19 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes MergeTreeReaderCompact::MergeTreeReaderCompact( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, @@ -29,7 +29,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( : IMergeTreeReader( data_part_info_for_read_, columns_, - metadata_snapshot_, + storage_snapshot_, uncompressed_cache_, mark_cache_, mark_ranges_, @@ -112,15 +112,10 @@ void MergeTreeReaderCompact::initialize() compressed_data_buffer = non_cached_buffer.get(); } } - catch (const Exception & e) - { - if (!isRetryableException(e)) - data_part_info_for_read->reportBroken(); - throw; - } catch (...) { - data_part_info_for_read->reportBroken(); + if (!isRetryableException(std::current_exception())) + data_part_info_for_read->reportBroken(); throw; } } @@ -130,7 +125,7 @@ void MergeTreeReaderCompact::fillColumnPositions() size_t columns_num = columns_to_read.size(); column_positions.resize(columns_num); - read_only_offsets.resize(columns_num); + columns_for_offsets.resize(columns_num); for (size_t i = 0; i < columns_num; ++i) { @@ -149,20 +144,48 @@ void MergeTreeReaderCompact::fillColumnPositions() position.reset(); } + /// If array of Nested column is missing in part, + /// we have to read its offsets if they exist. if (!position && is_array) { - /// If array of Nested column is missing in part, - /// we have to read its offsets if they exist. - auto position_level = findColumnForOffsets(column_to_read); - if (position_level.has_value()) + NameAndTypePair column_to_read_with_subcolumns = column_to_read; + auto [name_in_storage, subcolumn_name] = Nested::splitName(column_to_read.name); + + /// If it is a part of Nested, we need to get the column from + /// storage metatadata which is converted to Nested type with subcolumns. + /// It is required for proper counting of shared streams. + if (!subcolumn_name.empty()) { - column_positions[i].emplace(position_level->first); - read_only_offsets[i].emplace(position_level->second); + /// If column is renamed get the new name from storage metadata. + if (alter_conversions->columnHasNewName(name_in_storage)) + name_in_storage = alter_conversions->getColumnNewName(name_in_storage); + + if (!storage_columns_with_collected_nested) + { + auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects(); + auto storage_columns_list = Nested::collect(storage_snapshot->getColumns(options)); + storage_columns_with_collected_nested = ColumnsDescription(std::move(storage_columns_list)); + } + + column_to_read_with_subcolumns = storage_columns_with_collected_nested + ->getColumnOrSubcolumn( + GetColumnsOptions::All, + Nested::concatenateName(name_in_storage, subcolumn_name)); + } + + auto name_level_for_offsets = findColumnForOffsets(column_to_read_with_subcolumns); + + if (name_level_for_offsets.has_value()) + { + column_positions[i] = data_part_info_for_read->getColumnPosition(name_level_for_offsets->first); + columns_for_offsets[i] = name_level_for_offsets; partially_read_columns.insert(column_to_read.name); } } else + { column_positions[i] = std::move(position); + } } } @@ -203,7 +226,7 @@ size_t MergeTreeReaderCompact::readRows( auto & column = res_columns[pos]; size_t column_size_before_reading = column->size(); - readData(columns_to_read[pos], column, from_mark, current_task_last_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]); + readData(columns_to_read[pos], column, from_mark, current_task_last_mark, *column_positions[pos], rows_to_read, columns_for_offsets[pos]); size_t read_rows_in_column = column->size() - column_size_before_reading; if (read_rows_in_column != rows_to_read) @@ -211,18 +234,21 @@ size_t MergeTreeReaderCompact::readRows( "Cannot read all data in MergeTreeReaderCompact. Rows read: {}. Rows expected: {}.", read_rows_in_column, rows_to_read); } - catch (Exception & e) + catch (...) { - if (!isRetryableException(e)) + if (!isRetryableException(std::current_exception())) data_part_info_for_read->reportBroken(); /// Better diagnostics. - e.addMessage(getMessageForDiagnosticOfBrokenPart(from_mark, max_rows_to_read)); - throw; - } - catch (...) - { - data_part_info_for_read->reportBroken(); + try + { + rethrow_exception(std::current_exception()); + } + catch (Exception & e) + { + e.addMessage(getMessageForDiagnosticOfBrokenPart(from_mark, max_rows_to_read)); + } + throw; } } @@ -239,23 +265,37 @@ size_t MergeTreeReaderCompact::readRows( void MergeTreeReaderCompact::readData( const NameAndTypePair & name_and_type, ColumnPtr & column, size_t from_mark, size_t current_task_last_mark, size_t column_position, size_t rows_to_read, - std::optional only_offsets_level) + ColumnNameLevel name_level_for_offsets) { const auto & [name, type] = name_and_type; + std::optional column_for_offsets; + + if (name_level_for_offsets.has_value()) + { + const auto & part_columns = data_part_info_for_read->getColumnsDescription(); + column_for_offsets = part_columns.getPhysical(name_level_for_offsets->first); + } adjustUpperBound(current_task_last_mark); /// Must go before seek. if (!isContinuousReading(from_mark, column_position)) seekToMark(from_mark, column_position); + /// If we read only offsets we have to read prefix anyway + /// to preserve correctness of serialization. + auto buffer_getter_for_prefix = [&](const auto &) -> ReadBuffer * + { + return data_buffer; + }; + auto buffer_getter = [&](const ISerialization::SubstreamPath & substream_path) -> ReadBuffer * { /// Offset stream from another column could be read, in case of current /// column does not exists (see findColumnForOffsets() in /// MergeTreeReaderCompact::fillColumnPositions()) - bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; - if (only_offsets_level.has_value()) + if (name_level_for_offsets.has_value()) { + bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; if (!is_offsets) return nullptr; @@ -275,7 +315,7 @@ void MergeTreeReaderCompact::readData( /// /// Here only_offsets_level is the level of the alternative stream, /// and substream_path.size() is the level of the current stream. - if (only_offsets_level.value() < ISerialization::getArrayLevel(substream_path)) + if (name_level_for_offsets->second < ISerialization::getArrayLevel(substream_path)) return nullptr; } @@ -283,22 +323,32 @@ void MergeTreeReaderCompact::readData( }; ISerialization::DeserializeBinaryBulkStatePtr state; + ISerialization::DeserializeBinaryBulkStatePtr state_for_prefix; + ISerialization::DeserializeBinaryBulkSettings deserialize_settings; - deserialize_settings.getter = buffer_getter; deserialize_settings.avg_value_size_hint = avg_value_size_hints[name]; if (name_and_type.isSubcolumn()) { - const auto & type_in_storage = name_and_type.getTypeInStorage(); - const auto & name_in_storage = name_and_type.getNameInStorage(); + NameAndTypePair name_type_in_storage{name_and_type.getNameInStorage(), name_and_type.getTypeInStorage()}; - auto serialization = getSerializationInPart({name_in_storage, type_in_storage}); - ColumnPtr temp_column = type_in_storage->createColumn(*serialization); + /// In case of reading onlys offset use the correct serialization for reading of the prefix + auto serialization = getSerializationInPart(name_type_in_storage); + ColumnPtr temp_column = name_type_in_storage.type->createColumn(*serialization); + if (column_for_offsets) + { + auto serialization_for_prefix = getSerializationInPart(*column_for_offsets); + + deserialize_settings.getter = buffer_getter_for_prefix; + serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix); + } + + deserialize_settings.getter = buffer_getter; serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state); serialization->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state, nullptr); - auto subcolumn = type_in_storage->getSubcolumn(name_and_type.getSubcolumnName(), temp_column); + auto subcolumn = name_type_in_storage.type->getSubcolumn(name_and_type.getSubcolumnName(), temp_column); /// TODO: Avoid extra copying. if (column->empty()) @@ -308,13 +358,24 @@ void MergeTreeReaderCompact::readData( } else { + /// In case of reading only offsets use the correct serialization for reading the prefix auto serialization = getSerializationInPart(name_and_type); + + if (column_for_offsets) + { + auto serialization_for_prefix = getSerializationInPart(*column_for_offsets); + + deserialize_settings.getter = buffer_getter_for_prefix; + serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix); + } + + deserialize_settings.getter = buffer_getter; serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state); serialization->deserializeBinaryBulkWithMultipleStreams(column, rows_to_read, deserialize_settings, state, nullptr); } /// The buffer is left in inconsistent state after reading single offsets - if (only_offsets_level.has_value()) + if (name_level_for_offsets.has_value()) last_read_granule.reset(); else last_read_granule.emplace(from_mark, column_position); @@ -333,15 +394,10 @@ try seekToMark(all_mark_ranges.front().begin, 0); data_buffer->prefetch(priority); } -catch (const Exception & e) -{ - if (!isRetryableException(e)) - data_part_info_for_read->reportBroken(); - throw; -} catch (...) { - data_part_info_for_read->reportBroken(); + if (!isRetryableException(std::current_exception())) + data_part_info_for_read->reportBroken(); throw; } diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h index f180d7508f7..cf706526363 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.h +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h @@ -21,7 +21,7 @@ public: MergeTreeReaderCompact( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, @@ -52,12 +52,19 @@ private: MergeTreeMarksLoader marks_loader; + /// Storage columns with collected separate arrays of Nested to columns of Nested type. + /// They maybe be needed for finding offsets of missed Nested columns in parts. + /// They are rarely used and are heavy to initialized, so we create them + /// only on demand and cache in this field. + std::optional storage_columns_with_collected_nested; + /// Positions of columns in part structure. using ColumnPositions = std::vector>; ColumnPositions column_positions; + /// Should we read full column or only it's offsets. /// Element of the vector is the level of the alternative stream. - std::vector> read_only_offsets; + std::vector columns_for_offsets; /// For asynchronous reading from remote fs. Same meaning as in MergeTreeReaderStream. std::optional last_right_offset; @@ -68,8 +75,8 @@ private: void seekToMark(size_t row_index, size_t column_index); void readData(const NameAndTypePair & name_and_type, ColumnPtr & column, size_t from_mark, - size_t current_task_last_mark, size_t column_position, size_t rows_to_read, - std::optional only_offsets_level); + size_t current_task_last_mark, size_t column_position, + size_t rows_to_read, ColumnNameLevel name_level_for_offsets); /// Returns maximal value of granule size in compressed file from @mark_ranges. /// This value is used as size of read buffer. @@ -84,7 +91,6 @@ private: ReadBufferFromFileBase::ProfileCallback profile_callback; clockid_t clock_type; - bool initialized = false; }; diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp index fed8032fb17..bacd86511f5 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -19,13 +19,13 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, DataPartInMemoryPtr data_part_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, MarkRanges mark_ranges_, MergeTreeReaderSettings settings_) : IMergeTreeReader( data_part_info_for_read_, columns_, - metadata_snapshot_, + storage_snapshot_, nullptr, nullptr, mark_ranges_, @@ -42,7 +42,7 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory( { if (auto offsets_position = findColumnForOffsets(column_to_read)) { - positions_for_offsets[column_to_read.name] = offsets_position->first; + positions_for_offsets[column_to_read.name] = *data_part_info_for_read->getColumnPosition(offsets_position->first); partially_read_columns.insert(column_to_read.name); } } diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.h b/src/Storages/MergeTree/MergeTreeReaderInMemory.h index cb67bc46eae..e26a98f0916 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.h +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.h @@ -18,7 +18,7 @@ public: MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, DataPartInMemoryPtr data_part_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, MarkRanges mark_ranges_, MergeTreeReaderSettings settings_); diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 140fb6da5df..29924d06f68 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -24,7 +24,7 @@ namespace MergeTreeReaderWide::MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, @@ -35,7 +35,7 @@ MergeTreeReaderWide::MergeTreeReaderWide( : IMergeTreeReader( data_part_info_, columns_, - metadata_snapshot_, + storage_snapshot_, uncompressed_cache_, mark_cache_, mark_ranges_, @@ -47,15 +47,10 @@ MergeTreeReaderWide::MergeTreeReaderWide( for (size_t i = 0; i < columns_to_read.size(); ++i) addStreams(columns_to_read[i], serializations[i], profile_callback_, clock_type_); } - catch (const Exception & e) - { - if (!isRetryableException(e)) - data_part_info_for_read->reportBroken(); - throw; - } catch (...) { - data_part_info_for_read->reportBroken(); + if (!isRetryableException(std::current_exception())) + data_part_info_for_read->reportBroken(); throw; } } @@ -78,15 +73,10 @@ void MergeTreeReaderWide::prefetchBeginOfRange(Priority priority) /// of range only once so there is no such problem. /// 4. continue_reading == false, as we haven't read anything yet. } - catch (const Exception & e) - { - if (!isRetryableException(e)) - data_part_info_for_read->reportBroken(); - throw; - } catch (...) { - data_part_info_for_read->reportBroken(); + if (!isRetryableException(std::current_exception())) + data_part_info_for_read->reportBroken(); throw; } } @@ -184,18 +174,21 @@ size_t MergeTreeReaderWide::readRows( /// In particular, even if for some streams there are no rows to be read, /// you must ensure that no seeks are skipped and at this point they all point to to_mark. } - catch (Exception & e) + catch (...) { - if (!isRetryableException(e)) + if (!isRetryableException(std::current_exception())) data_part_info_for_read->reportBroken(); /// Better diagnostics. - e.addMessage(getMessageForDiagnosticOfBrokenPart(from_mark, max_rows_to_read)); - throw; - } - catch (...) - { - data_part_info_for_read->reportBroken(); + try + { + rethrow_exception(std::current_exception()); + } + catch (Exception & e) + { + e.addMessage(getMessageForDiagnosticOfBrokenPart(from_mark, max_rows_to_read)); + } + throw; } diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index c31b1baf32e..2a850cc2814 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -17,7 +17,7 @@ public: MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp deleted file mode 100644 index bb515bcd10f..00000000000 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ /dev/null @@ -1,134 +0,0 @@ -#include -#include -#include "Storages/MergeTree/MergeTreeBaseSelectProcessor.h" - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int MEMORY_LIMIT_EXCEEDED; -} - -bool MergeTreeReverseSelectAlgorithm::getNewTaskImpl() -try -{ - if (pool) - return getNewTaskParallelReplicas(); - else - return getNewTaskOrdinaryReading(); -} -catch (...) -{ - /// Suspicion of the broken part. A part is added to the queue for verification. - if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED) - storage.reportBrokenPart(data_part); - throw; -} - - -bool MergeTreeReverseSelectAlgorithm::getNewTaskOrdinaryReading() -{ - if (chunks.empty() && all_mark_ranges.empty()) - return false; - - /// We have some blocks to return in buffer. - /// Return true to continue reading, but actually don't create a task. - if (all_mark_ranges.empty()) - return true; - - if (!reader) - initializeReaders(); - - /// Read ranges from right to left. - MarkRanges mark_ranges_for_task{std::move(all_mark_ranges.back())}; - all_mark_ranges.pop_back(); - - auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr - : getSizePredictor(data_part, task_columns, sample_block); - - task = std::make_unique( - data_part, - alter_conversions, - mark_ranges_for_task, - part_index_in_query, - column_name_set, - task_columns, - std::move(size_predictor)); - - return true; - -} - -bool MergeTreeReverseSelectAlgorithm::getNewTaskParallelReplicas() -{ - if (chunks.empty() && no_more_tasks) - return false; - - /// We have some blocks to return in buffer. - /// Return true to continue reading, but actually don't create a task. - if (no_more_tasks) - return true; - - if (!reader) - initializeReaders(); - - auto description = RangesInDataPartDescription{ - .info = data_part->info, - /// We just ignore all the distribution done before - /// Everything will be done on coordinator side - .ranges = {}, - }; - - auto mark_ranges_for_task = pool->getNewTask(description); - if (mark_ranges_for_task.empty()) - { - /// If we have chunks in buffer - return true to continue reading from them - return !chunks.empty(); - } - - auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr - : getSizePredictor(data_part, task_columns, sample_block); - - task = std::make_unique( - data_part, - alter_conversions, - mark_ranges_for_task, - part_index_in_query, - column_name_set, - task_columns, - std::move(size_predictor)); - - return true; -} - -MergeTreeReverseSelectAlgorithm::BlockAndProgress MergeTreeReverseSelectAlgorithm::readFromPart() -{ - BlockAndProgress res; - - if (!chunks.empty()) - { - res = std::move(chunks.back()); - chunks.pop_back(); - return res; - } - - if (!task->range_reader.isInitialized()) - initializeRangeReaders(*task); - - while (!task->isFinished()) - { - auto chunk = readFromPartImpl(); - chunks.push_back(std::move(chunk)); - } - - if (chunks.empty()) - return {}; - - res = std::move(chunks.back()); - chunks.pop_back(); - - return res; -} - -} diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h deleted file mode 100644 index 618f2855695..00000000000 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once -#include -#include - - -namespace DB -{ - -/// Used to read data from single part with select query -/// in reverse order of primary key. -/// Cares about PREWHERE, virtual columns, indexes etc. -/// To read data from multiple parts, Storage (MergeTree) creates multiple such objects. -class MergeTreeReverseSelectAlgorithm final : public MergeTreeSelectAlgorithm -{ -public: - template - explicit MergeTreeReverseSelectAlgorithm(Args &&... args) - : MergeTreeSelectAlgorithm{std::forward(args)...} - { - LOG_TRACE(log, "Reading {} ranges in reverse order from part {}, approx. {} rows starting from {}", - all_mark_ranges.size(), data_part->name, total_rows, - data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin)); - } - - String getName() const override { return "MergeTreeReverse"; } - -private: - bool getNewTaskImpl() override; - void finalizeNewTask() override {} - - bool getNewTaskParallelReplicas(); - bool getNewTaskOrdinaryReading(); - - BlockAndProgress readFromPart() override; - - std::vector chunks; - - /// Used for parallel replicas - bool no_more_tasks{false}; - - Poco::Logger * log = &Poco::Logger::get("MergeTreeReverseSelectProcessor"); -}; - -} diff --git a/src/Storages/MergeTree/MergeTreeSelectAlgorithms.cpp b/src/Storages/MergeTree/MergeTreeSelectAlgorithms.cpp new file mode 100644 index 00000000000..8bc4377cffb --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeSelectAlgorithms.cpp @@ -0,0 +1,87 @@ +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +MergeTreeThreadSelectAlgorithm::TaskResult MergeTreeThreadSelectAlgorithm::getNewTask(IMergeTreeReadPool & pool, MergeTreeReadTask * previous_task) +{ + TaskResult res; + res.first = pool.getTask(thread_idx, previous_task); + res.second = !!res.first; + return res; +} + +MergeTreeReadTask::BlockAndProgress MergeTreeThreadSelectAlgorithm::readFromTask(MergeTreeReadTask * task, const MergeTreeReadTask::BlockSizeParams & params) +{ + if (!task) + return {}; + + return task->read(params); +} + +IMergeTreeSelectAlgorithm::TaskResult MergeTreeInOrderSelectAlgorithm::getNewTask(IMergeTreeReadPool & pool, MergeTreeReadTask * previous_task) +{ + if (!pool.preservesOrderOfRanges()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "MergeTreeInOrderSelectAlgorithm requires read pool that preserves order of ranges, got: {}", pool.getName()); + + TaskResult res; + res.first = pool.getTask(part_idx, previous_task); + res.second = !!res.first; + return res; +} + +MergeTreeReadTask::BlockAndProgress MergeTreeInOrderSelectAlgorithm::readFromTask(MergeTreeReadTask * task, const BlockSizeParams & params) +{ + if (!task) + return {}; + + return task->read(params); +} + +IMergeTreeSelectAlgorithm::TaskResult MergeTreeInReverseOrderSelectAlgorithm::getNewTask(IMergeTreeReadPool & pool, MergeTreeReadTask * previous_task) +{ + if (!pool.preservesOrderOfRanges()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "MergeTreeInReverseOrderSelectAlgorithm requires read pool that preserves order of ranges, got: {}", pool.getName()); + + TaskResult res; + res.first = pool.getTask(part_idx, previous_task); + /// We may have some chunks to return in buffer. + /// Set continue_reading to true but actually don't create a new task. + res.second = !!res.first || !chunks.empty(); + return res; +} + +MergeTreeReadTask::BlockAndProgress MergeTreeInReverseOrderSelectAlgorithm::readFromTask(MergeTreeReadTask * task, const BlockSizeParams & params) +{ + MergeTreeReadTask::BlockAndProgress res; + + if (!chunks.empty()) + { + res = std::move(chunks.back()); + chunks.pop_back(); + return res; + } + + if (!task) + return {}; + + while (!task->isFinished()) + chunks.push_back(task->read(params)); + + if (chunks.empty()) + return {}; + + res = std::move(chunks.back()); + chunks.pop_back(); + return res; +} + +} diff --git a/src/Storages/MergeTree/MergeTreeSelectAlgorithms.h b/src/Storages/MergeTree/MergeTreeSelectAlgorithms.h new file mode 100644 index 00000000000..a6254a90687 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeSelectAlgorithms.h @@ -0,0 +1,64 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class IMergeTreeReadPool; + +class IMergeTreeSelectAlgorithm : private boost::noncopyable +{ +public: + /// The pair of {task, continue_reading}. + using TaskResult = std::pair; + using BlockSizeParams = MergeTreeReadTask::BlockSizeParams; + + virtual ~IMergeTreeSelectAlgorithm() = default; + + virtual String getName() const = 0; + virtual TaskResult getNewTask(IMergeTreeReadPool & pool, MergeTreeReadTask * previous_task) = 0; + virtual MergeTreeReadTask::BlockAndProgress readFromTask(MergeTreeReadTask * task, const BlockSizeParams & params) = 0; +}; + +using MergeTreeSelectAlgorithmPtr = std::unique_ptr; + +class MergeTreeThreadSelectAlgorithm : public IMergeTreeSelectAlgorithm +{ +public: + explicit MergeTreeThreadSelectAlgorithm(size_t thread_idx_) : thread_idx(thread_idx_) {} + String getName() const override { return "Thread"; } + TaskResult getNewTask(IMergeTreeReadPool & pool, MergeTreeReadTask * previous_task) override; + MergeTreeReadTask::BlockAndProgress readFromTask(MergeTreeReadTask * task, const BlockSizeParams & params) override; + +private: + const size_t thread_idx; +}; + +class MergeTreeInOrderSelectAlgorithm : public IMergeTreeSelectAlgorithm +{ +public: + explicit MergeTreeInOrderSelectAlgorithm(size_t part_idx_) : part_idx(part_idx_) {} + String getName() const override { return "InOrder"; } + TaskResult getNewTask(IMergeTreeReadPool & pool, MergeTreeReadTask * previous_task) override; + MergeTreeReadTask::BlockAndProgress readFromTask(MergeTreeReadTask * task, const BlockSizeParams & params) override; + +private: + const size_t part_idx; +}; + +class MergeTreeInReverseOrderSelectAlgorithm : public IMergeTreeSelectAlgorithm +{ +public: + explicit MergeTreeInReverseOrderSelectAlgorithm(size_t part_idx_) : part_idx(part_idx_) {} + String getName() const override { return "InReverseOrder"; } + TaskResult getNewTask(IMergeTreeReadPool & pool, MergeTreeReadTask * previous_task) override; + MergeTreeReadTask::BlockAndProgress readFromTask(MergeTreeReadTask * task, const BlockSizeParams & params) override; + +private: + const size_t part_idx; + std::vector chunks; +}; + +} diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index ce4ba69c08c..975fad1ab6b 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -1,85 +1,435 @@ #include -#include -#include -#include -#include - +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace DB { -MergeTreeSelectAlgorithm::MergeTreeSelectAlgorithm( +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; + extern const int LOGICAL_ERROR; + extern const int QUERY_WAS_CANCELLED; +} + +static void injectNonConstVirtualColumns( + size_t rows, + Block & block, + const Names & virtual_columns); + +static void injectPartConstVirtualColumns( + size_t rows, + Block & block, + MergeTreeReadTask * task, + const DataTypePtr & partition_value_type, + const Names & virtual_columns); + +MergeTreeSelectProcessor::MergeTreeSelectProcessor( + MergeTreeReadPoolPtr pool_, + MergeTreeSelectAlgorithmPtr algorithm_, const MergeTreeData & storage_, - const StorageSnapshotPtr & storage_snapshot_, - const MergeTreeData::DataPartPtr & owned_data_part_, - const AlterConversionsPtr & alter_conversions_, - UInt64 max_block_size_rows_, - size_t preferred_block_size_bytes_, - size_t preferred_max_column_in_block_size_bytes_, - Names required_columns_, - MarkRanges mark_ranges_, - bool use_uncompressed_cache_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, + const MergeTreeReadTask::BlockSizeParams & block_size_params_, const MergeTreeReaderSettings & reader_settings_, - MergeTreeInOrderReadPoolParallelReplicasPtr pool_, - const Names & virt_column_names_, - size_t part_index_in_query_, - bool has_limit_below_one_block_) - : IMergeTreeSelectAlgorithm{ - storage_snapshot_->getSampleBlockForColumns(required_columns_), - storage_, storage_snapshot_, prewhere_info_, actions_settings_, max_block_size_rows_, - preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, - reader_settings_, use_uncompressed_cache_, virt_column_names_}, - required_columns{std::move(required_columns_)}, - data_part{owned_data_part_}, - alter_conversions(alter_conversions_), - sample_block(storage_snapshot_->metadata->getSampleBlock()), - all_mark_ranges(std::move(mark_ranges_)), - part_index_in_query(part_index_in_query_), - has_limit_below_one_block(has_limit_below_one_block_), - pool(pool_), - total_rows(data_part->index_granularity.getRowsCountInRanges(all_mark_ranges)) + const Names & virt_column_names_) + : pool(std::move(pool_)) + , algorithm(std::move(algorithm_)) + , prewhere_info(prewhere_info_) + , actions_settings(actions_settings_) + , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps)) + , reader_settings(reader_settings_) + , block_size_params(block_size_params_) + , virt_column_names(virt_column_names_) + , partition_value_type(storage_.getPartitionValueType()) { - ordered_names = header_without_const_virtual_columns.getNames(); + if (reader_settings.apply_deleted_mask) + { + PrewhereExprStep step + { + .type = PrewhereExprStep::Filter, + .actions = nullptr, + .filter_column_name = LightweightDeleteDescription::FILTER_COLUMN.name, + .remove_filter_column = true, + .need_filter = true, + .perform_alter_conversions = true, + }; + + lightweight_delete_filter_step = std::make_shared(std::move(step)); + } + + header_without_const_virtual_columns = applyPrewhereActions(pool->getHeader(), prewhere_info); + size_t non_const_columns_offset = header_without_const_virtual_columns.columns(); + injectNonConstVirtualColumns(0, header_without_const_virtual_columns, virt_column_names); + + for (size_t col_num = non_const_columns_offset; col_num < header_without_const_virtual_columns.columns(); ++col_num) + non_const_virtual_column_names.emplace_back(header_without_const_virtual_columns.getByPosition(col_num).name); + + result_header = header_without_const_virtual_columns; + injectPartConstVirtualColumns(0, result_header, nullptr, partition_value_type, virt_column_names); + + if (!prewhere_actions.steps.empty()) + LOG_TRACE(log, "PREWHERE condition was split into {} steps: {}", prewhere_actions.steps.size(), prewhere_actions.dumpConditions()); + + if (prewhere_info) + LOG_TEST(log, "Original PREWHERE DAG:\n{}\nPREWHERE actions:\n{}", + (prewhere_info->prewhere_actions ? prewhere_info->prewhere_actions->dumpDAG(): std::string("")), + (!prewhere_actions.steps.empty() ? prewhere_actions.dump() : std::string(""))); } -void MergeTreeSelectAlgorithm::initializeReaders() +String MergeTreeSelectProcessor::getName() const { - LoadedMergeTreeDataPartInfoForReader part_info(data_part, alter_conversions); - - task_columns = getReadTaskColumns( - part_info, storage_snapshot, - required_columns, virt_column_names, - prewhere_info, - actions_settings, reader_settings, /*with_subcolumns=*/ true); - - /// Will be used to distinguish between PREWHERE and WHERE columns when applying filter - const auto & column_names = task_columns.columns.getNames(); - column_name_set = NameSet{column_names.begin(), column_names.end()}; - - if (use_uncompressed_cache) - owned_uncompressed_cache = storage.getContext()->getUncompressedCache(); - - owned_mark_cache = storage.getContext()->getMarkCache(); - - initializeMergeTreeReadersForPart( - data_part, alter_conversions, task_columns, - storage_snapshot->getMetadataForQuery(), all_mark_ranges, {}, {}); + return fmt::format("MergeTreeSelect(pool: {}, algorithm: {})", pool->getName(), algorithm->getName()); } +bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, PrewhereExprInfo & prewhere); -void MergeTreeSelectAlgorithm::finish() +PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, bool enable_multiple_prewhere_read_steps) { - /** Close the files (before destroying the object). - * When many sources are created, but simultaneously reading only a few of them, - * buffers don't waste memory. - */ - reader.reset(); - pre_reader_for_step.clear(); - data_part.reset(); + PrewhereExprInfo prewhere_actions; + if (prewhere_info) + { + if (prewhere_info->row_level_filter) + { + PrewhereExprStep row_level_filter_step + { + .type = PrewhereExprStep::Filter, + .actions = std::make_shared(prewhere_info->row_level_filter, actions_settings), + .filter_column_name = prewhere_info->row_level_column_name, + .remove_filter_column = true, + .need_filter = true, + .perform_alter_conversions = true, + }; + + prewhere_actions.steps.emplace_back(std::make_shared(std::move(row_level_filter_step))); + } + + if (!enable_multiple_prewhere_read_steps || + !tryBuildPrewhereSteps(prewhere_info, actions_settings, prewhere_actions)) + { + PrewhereExprStep prewhere_step + { + .type = PrewhereExprStep::Filter, + .actions = std::make_shared(prewhere_info->prewhere_actions, actions_settings), + .filter_column_name = prewhere_info->prewhere_column_name, + .remove_filter_column = prewhere_info->remove_prewhere_column, + .need_filter = prewhere_info->need_filter, + .perform_alter_conversions = true, + }; + + prewhere_actions.steps.emplace_back(std::make_shared(std::move(prewhere_step))); + } + } + + return prewhere_actions; } -MergeTreeSelectAlgorithm::~MergeTreeSelectAlgorithm() = default; +ChunkAndProgress MergeTreeSelectProcessor::read() +{ + while (!is_cancelled) + { + try + { + bool continue_reading = true; + if (!task || task->isFinished()) + std::tie(task, continue_reading) = algorithm->getNewTask(*pool, task.get()); + + if (!continue_reading) + break; + } + catch (const Exception & e) + { + if (e.code() == ErrorCodes::QUERY_WAS_CANCELLED) + break; + throw; + } + + if (task && !task->getMainRangeReader().isInitialized()) + initializeRangeReaders(); + + auto res = algorithm->readFromTask(task.get(), block_size_params); + + if (res.row_count) + { + injectVirtualColumns(res.block, res.row_count, task.get(), partition_value_type, virt_column_names); + + /// Reorder the columns according to result_header + Columns ordered_columns; + ordered_columns.reserve(result_header.columns()); + for (size_t i = 0; i < result_header.columns(); ++i) + { + auto name = result_header.getByPosition(i).name; + ordered_columns.push_back(res.block.getByName(name).column); + } + + return ChunkAndProgress{ + .chunk = Chunk(ordered_columns, res.row_count), + .num_read_rows = res.num_read_rows, + .num_read_bytes = res.num_read_bytes, + .is_finished = false}; + } + else + { + return {Chunk(), res.num_read_rows, res.num_read_bytes, false}; + } + } + + return {Chunk(), 0, 0, true}; +} + +void MergeTreeSelectProcessor::initializeRangeReaders() +{ + PrewhereExprInfo all_prewhere_actions; + if (lightweight_delete_filter_step && task->getInfo().data_part->hasLightweightDelete()) + all_prewhere_actions.steps.push_back(lightweight_delete_filter_step); + + for (const auto & step : prewhere_actions.steps) + all_prewhere_actions.steps.push_back(step); + + task->initializeRangeReaders(all_prewhere_actions, non_const_virtual_column_names); +} + + +namespace +{ + struct VirtualColumnsInserter + { + explicit VirtualColumnsInserter(Block & block_) : block(block_) {} + + bool columnExists(const String & name) const { return block.has(name); } + + void insertUInt8Column(const ColumnPtr & column, const String & name) + { + block.insert({column, std::make_shared(), name}); + } + + void insertUInt64Column(const ColumnPtr & column, const String & name) + { + block.insert({column, std::make_shared(), name}); + } + + void insertUUIDColumn(const ColumnPtr & column, const String & name) + { + block.insert({column, std::make_shared(), name}); + } + + void insertLowCardinalityColumn(const ColumnPtr & column, const String & name) + { + block.insert({column, std::make_shared(std::make_shared()), name}); + } + + void insertPartitionValueColumn( + size_t rows, const Row & partition_value, const DataTypePtr & partition_value_type, const String & name) + { + ColumnPtr column; + if (rows) + column = partition_value_type->createColumnConst(rows, Tuple(partition_value.begin(), partition_value.end())) + ->convertToFullColumnIfConst(); + else + column = partition_value_type->createColumn(); + + block.insert({column, partition_value_type, name}); + } + + Block & block; + }; +} + +/// Adds virtual columns that are not const for all rows +static void injectNonConstVirtualColumns( + size_t rows, + Block & block, + const Names & virtual_columns) +{ + VirtualColumnsInserter inserter(block); + for (const auto & virtual_column_name : virtual_columns) + { + if (virtual_column_name == "_part_offset") + { + if (!rows) + { + inserter.insertUInt64Column(DataTypeUInt64().createColumn(), virtual_column_name); + } + else + { + if (!inserter.columnExists(virtual_column_name)) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Column {} must have been filled part reader", + virtual_column_name); + } + } + + if (virtual_column_name == LightweightDeleteDescription::FILTER_COLUMN.name) + { + /// If _row_exists column isn't present in the part then fill it here with 1s + ColumnPtr column; + if (rows) + column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumnConst(rows, 1)->convertToFullColumnIfConst(); + else + column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumn(); + + inserter.insertUInt8Column(column, virtual_column_name); + } + } +} + +/// Adds virtual columns that are const for the whole part +static void injectPartConstVirtualColumns( + size_t rows, + Block & block, + MergeTreeReadTask * task, + const DataTypePtr & partition_value_type, + const Names & virtual_columns) +{ + VirtualColumnsInserter inserter(block); + /// add virtual columns + /// Except _sample_factor, which is added from the outside. + if (!virtual_columns.empty()) + { + if (unlikely(rows && !task)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insert virtual columns to non-empty chunk without specified task."); + + const IMergeTreeDataPart * part = nullptr; + + if (rows) + { + part = task->getInfo().data_part.get(); + if (part->isProjectionPart()) + part = part->getParentPart(); + } + + for (const auto & virtual_column_name : virtual_columns) + { + if (virtual_column_name == "_part") + { + ColumnPtr column; + if (rows) + column = DataTypeLowCardinality{std::make_shared()} + .createColumnConst(rows, part->name) + ->convertToFullColumnIfConst(); + else + column = DataTypeLowCardinality{std::make_shared()}.createColumn(); + + inserter.insertLowCardinalityColumn(column, virtual_column_name); + } + else if (virtual_column_name == "_part_index") + { + ColumnPtr column; + if (rows) + column = DataTypeUInt64().createColumnConst(rows, task->getInfo().part_index_in_query)->convertToFullColumnIfConst(); + else + column = DataTypeUInt64().createColumn(); + + inserter.insertUInt64Column(column, virtual_column_name); + } + else if (virtual_column_name == "_part_uuid") + { + ColumnPtr column; + if (rows) + column = DataTypeUUID().createColumnConst(rows, part->uuid)->convertToFullColumnIfConst(); + else + column = DataTypeUUID().createColumn(); + + inserter.insertUUIDColumn(column, virtual_column_name); + } + else if (virtual_column_name == "_partition_id") + { + ColumnPtr column; + if (rows) + column = DataTypeLowCardinality{std::make_shared()} + .createColumnConst(rows, part->info.partition_id) + ->convertToFullColumnIfConst(); + else + column = DataTypeLowCardinality{std::make_shared()}.createColumn(); + + inserter.insertLowCardinalityColumn(column, virtual_column_name); + } + else if (virtual_column_name == "_partition_value") + { + if (rows) + inserter.insertPartitionValueColumn(rows, part->partition.value, partition_value_type, virtual_column_name); + else + inserter.insertPartitionValueColumn(rows, {}, partition_value_type, virtual_column_name); + } + } + } +} + +void MergeTreeSelectProcessor::injectVirtualColumns( + Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns) +{ + /// First add non-const columns that are filled by the range reader and then const columns that we will fill ourselves. + /// Note that the order is important: virtual columns filled by the range reader must go first + injectNonConstVirtualColumns(row_count, block, virtual_columns); + injectPartConstVirtualColumns(row_count, block, task, partition_value_type, virtual_columns); +} + +Block MergeTreeSelectProcessor::applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info) +{ + if (prewhere_info) + { + if (prewhere_info->row_level_filter) + { + block = prewhere_info->row_level_filter->updateHeader(std::move(block)); + auto & row_level_column = block.getByName(prewhere_info->row_level_column_name); + if (!row_level_column.type->canBeUsedInBooleanContext()) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", + row_level_column.type->getName()); + } + + block.erase(prewhere_info->row_level_column_name); + } + + if (prewhere_info->prewhere_actions) + { + block = prewhere_info->prewhere_actions->updateHeader(std::move(block)); + + auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); + if (!prewhere_column.type->canBeUsedInBooleanContext()) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", + prewhere_column.type->getName()); + } + + if (prewhere_info->remove_prewhere_column) + { + block.erase(prewhere_info->prewhere_column_name); + } + else if (prewhere_info->need_filter) + { + WhichDataType which(removeNullable(recursiveRemoveLowCardinality(prewhere_column.type))); + + if (which.isNativeInt() || which.isNativeUInt()) + prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1u)->convertToFullColumnIfConst(); + else if (which.isFloat()) + prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1.0f)->convertToFullColumnIfConst(); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, + "Illegal type {} of column for filter", + prewhere_column.type->getName()); + } + } + } + + return block; +} + +Block MergeTreeSelectProcessor::transformHeader( + Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns) +{ + auto transformed = applyPrewhereActions(std::move(block), prewhere_info); + injectVirtualColumns(transformed, 0, nullptr, partition_value_type, virtual_columns); + return transformed; +} } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 5f4f49bf075..a5178cda55d 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -1,78 +1,118 @@ #pragma once -#include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace DB { +struct PrewhereExprInfo; -/// Used to read data from single part with select query -/// Cares about PREWHERE, virtual columns, indexes etc. -/// To read data from multiple parts, Storage (MergeTree) creates multiple such objects. -class MergeTreeSelectAlgorithm : public IMergeTreeSelectAlgorithm +struct ChunkAndProgress { -public: - MergeTreeSelectAlgorithm( - const MergeTreeData & storage, - const StorageSnapshotPtr & storage_snapshot_, - const MergeTreeData::DataPartPtr & owned_data_part_, - const AlterConversionsPtr & alter_conversions_, - UInt64 max_block_size_rows, - size_t preferred_block_size_bytes, - size_t preferred_max_column_in_block_size_bytes, - Names required_columns_, - MarkRanges mark_ranges, - bool use_uncompressed_cache, - const PrewhereInfoPtr & prewhere_info, - const ExpressionActionsSettings & actions_settings_, - const MergeTreeReaderSettings & reader_settings, - MergeTreeInOrderReadPoolParallelReplicasPtr pool_, - const Names & virt_column_names = {}, - size_t part_index_in_query_ = 0, - bool has_limit_below_one_block_ = false); - - ~MergeTreeSelectAlgorithm() override; - -protected: - /// Defer initialization from constructor, because it may be heavy - /// and it's better to do it lazily in `getNewTaskImpl`, which is executing in parallel. - void initializeReaders(); - void finish() final; - - /// Used by Task - Names required_columns; - /// Names from header. Used in order to order columns in read blocks. - Names ordered_names; - NameSet column_name_set; - - MergeTreeReadTaskColumns task_columns; - - /// Data part will not be removed if the pointer owns it - MergeTreeData::DataPartPtr data_part; - - /// Alter converversionss that should be applied on-fly for part. - AlterConversionsPtr alter_conversions; - - /// Cache getSampleBlock call, which might be heavy. - Block sample_block; - - /// Mark ranges we should read (in ascending order) - MarkRanges all_mark_ranges; - /// Value of _part_index virtual column (used only in SelectExecutor) - size_t part_index_in_query = 0; - /// If true, every task will be created only with one range. - /// It reduces amount of read data for queries with small LIMIT. - bool has_limit_below_one_block = false; - - /// Pool for reading in order - MergeTreeInOrderReadPoolParallelReplicasPtr pool; - - size_t total_rows = 0; + Chunk chunk; + size_t num_read_rows = 0; + size_t num_read_bytes = 0; + /// Explicitly indicate that we have read all data. + /// This is needed to occasionally return empty chunk to indicate the progress while the rows are filtered out in PREWHERE. + bool is_finished = false; }; +struct ParallelReadingExtension +{ + MergeTreeAllRangesCallback all_callback; + MergeTreeReadTaskCallback callback; + size_t count_participating_replicas{0}; + size_t number_of_current_replica{0}; + /// This is needed to estimate the number of bytes + /// between a pair of marks to perform one request + /// over the network for a 1Gb of data. + Names columns_to_read; +}; + +/// Base class for MergeTreeThreadSelectAlgorithm and MergeTreeSelectAlgorithm +class MergeTreeSelectProcessor : private boost::noncopyable +{ +public: + MergeTreeSelectProcessor( + MergeTreeReadPoolPtr pool_, + MergeTreeSelectAlgorithmPtr algorithm_, + const MergeTreeData & storage_, + const PrewhereInfoPtr & prewhere_info_, + const ExpressionActionsSettings & actions_settings_, + const MergeTreeReadTask::BlockSizeParams & block_size_params_, + const MergeTreeReaderSettings & reader_settings_, + const Names & virt_column_names_); + + String getName() const; + + static Block transformHeader( + Block block, + const PrewhereInfoPtr & prewhere_info, + const DataTypePtr & partition_value_type, + const Names & virtual_columns); + + Block getHeader() const { return result_header; } + + ChunkAndProgress read(); + + void cancel() { is_cancelled = true; } + + const MergeTreeReaderSettings & getSettings() const { return reader_settings; } + + static PrewhereExprInfo getPrewhereActions( + PrewhereInfoPtr prewhere_info, + const ExpressionActionsSettings & actions_settings, + bool enable_multiple_prewhere_read_steps); + +private: + /// This struct allow to return block with no columns but with non-zero number of rows similar to Chunk + struct BlockAndProgress + { + Block block; + size_t row_count = 0; + size_t num_read_rows = 0; + size_t num_read_bytes = 0; + }; + + /// Used for filling header with no rows as well as block with data + static void injectVirtualColumns(Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns); + static Block applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info); + + /// Sets up range readers corresponding to data readers + void initializeRangeReaders(); + + const MergeTreeReadPoolPtr pool; + const MergeTreeSelectAlgorithmPtr algorithm; + + const PrewhereInfoPtr prewhere_info; + const ExpressionActionsSettings actions_settings; + const PrewhereExprInfo prewhere_actions; + + const MergeTreeReaderSettings reader_settings; + const MergeTreeReadTask::BlockSizeParams block_size_params; + const Names virt_column_names; + const DataTypePtr partition_value_type; + + /// Current task to read from. + MergeTreeReadTaskPtr task; + /// This step is added when the part has lightweight delete mask + PrewhereExprStepPtr lightweight_delete_filter_step; + /// These columns will be filled by the merge tree range reader + Names non_const_virtual_column_names; + /// This header is used for chunks from readFromPart(). + Block header_without_const_virtual_columns; + /// A result of getHeader(). A chunk which this header is returned from read(). + Block result_header; + + Poco::Logger * log = &Poco::Logger::get("MergeTreeSelectProcessor"); + std::atomic is_cancelled{false}; +}; + +using MergeTreeSelectProcessorPtr = std::unique_ptr; + } diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 5a6d59bf0be..8a9faa5cee4 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -138,6 +138,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( ReadSettings read_settings; if (read_with_direct_io) read_settings.direct_io_threshold = 1; + read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; MergeTreeReaderSettings reader_settings = { @@ -150,7 +151,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( mark_ranges.emplace(MarkRanges{MarkRange(0, data_part->getMarksCount())}); reader = data_part->getReader( - columns_for_reader, storage_snapshot->metadata, + columns_for_reader, storage_snapshot, *mark_ranges, /* uncompressed_cache = */ nullptr, mark_cache.get(), alter_conversions, reader_settings, {}, {}); } diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index 6df841059b9..1906f130101 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -141,6 +141,18 @@ void MergeTreeSettings::sanityCheck(size_t background_pool_tasks) const background_pool_tasks); } + if (number_of_free_entries_in_pool_to_execute_optimize_entire_partition > background_pool_tasks) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of 'number_of_free_entries_in_pool_to_execute_optimize_entire_partition' setting" + " ({}) (default values are defined in section of config.xml" + " or the value can be specified per table in SETTINGS section of CREATE TABLE query)" + " is greater than the value of 'background_pool_size'*'background_merges_mutations_concurrency_ratio'" + " ({}) (the value is defined in users.xml for default profile)." + " This indicates incorrect configuration because the maximum size of merge will be always lowered.", + number_of_free_entries_in_pool_to_execute_optimize_entire_partition, + background_pool_tasks); + } + // Zero index_granularity is nonsensical. if (index_granularity < 1) { diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index d986ea1d281..4f36da048c2 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -65,6 +65,7 @@ struct Settings; M(UInt64, merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds, 1ULL * 3600 * 24 * 30, "Remove old broken detached parts in the background if they remained intouched for a specified by this setting period of time.", 0) \ M(UInt64, min_age_to_force_merge_seconds, 0, "If all parts in a certain range are older than this value, range will be always eligible for merging. Set to 0 to disable.", 0) \ M(Bool, min_age_to_force_merge_on_partition_only, false, "Whether min_age_to_force_merge_seconds should be applied only on the entire partition and not on subset.", false) \ + M(UInt64, number_of_free_entries_in_pool_to_execute_optimize_entire_partition, 25, "When there is less than specified number of free entries in pool, do not try to execute optimize entire partition with a merge (this merge is created when set min_age_to_force_merge_seconds > 0 and min_age_to_force_merge_on_partition_only = true). This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \ M(UInt64, merge_tree_enable_clear_old_broken_detached, false, "Enable clearing old broken detached parts operation in background.", 0) \ M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \ M(CleanDeletedRows, clean_deleted_rows, CleanDeletedRows::Never, "Is the Replicated Merge cleanup has to be done automatically at each merge or manually (possible values are 'Always'/'Never' (default))", 0) \ @@ -119,6 +120,7 @@ struct Settings; M(Bool, detach_not_byte_identical_parts, false, "Do not remove non byte-idential parts for ReplicatedMergeTree, instead detach them (maybe useful for further analysis).", 0) \ M(UInt64, max_replicated_fetches_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \ M(UInt64, max_replicated_sends_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \ + M(Milliseconds, wait_for_unique_parts_send_before_shutdown_ms, 0, "Before shutdown table will wait for required amount time for unique parts (exist only on current replica) to be fetched by other replicas (0 means disabled).", 0) \ \ /** Check delay of replicas settings. */ \ M(UInt64, min_relative_delay_to_measure, 120, "Calculate relative replica delay only if absolute delay is not less that this value.", 0) \ @@ -167,7 +169,6 @@ struct Settings; /** Experimental/work in progress feature. Unsafe for production. */ \ M(UInt64, part_moves_between_shards_enable, 0, "Experimental/Incomplete feature to move parts between shards. Does not take into account sharding expressions.", 0) \ M(UInt64, part_moves_between_shards_delay_seconds, 30, "Time to wait before/after moving parts between shards.", 0) \ - M(Bool, use_metadata_cache, false, "Experimental feature to speed up parts loading process by using MergeTree metadata cache", 0) \ M(Bool, allow_remote_fs_zero_copy_replication, false, "Don't use this setting in production, because it is not ready.", 0) \ M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for zero-copy table-independent info.", 0) \ M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \ @@ -198,6 +199,7 @@ struct Settings; M(Bool, in_memory_parts_insert_sync, false, "Obsolete setting, does nothing.", 0) \ M(MaxThreads, max_part_loading_threads, 0, "Obsolete setting, does nothing.", 0) \ M(MaxThreads, max_part_removal_threads, 0, "Obsolete setting, does nothing.", 0) \ + M(Bool, use_metadata_cache, false, "Obsolete setting, does nothing.", 0) \ /// Settings that should not change after the creation of a table. /// NOLINTNEXTLINE diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp index 69fbdd5a64d..a450505f7a8 100644 --- a/src/Storages/MergeTree/MergeTreeSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSource.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -24,7 +24,7 @@ struct MergeTreeSource::AsyncReadingState /// which can be called from background thread. /// Invariant: /// * background thread changes status InProgress -> IsFinished - /// * (status == InProgress) => (MergeTreeBaseSelectProcessor is alive) + /// * (status == InProgress) => (MergeTreeSelectProcessor is alive) void setResult(ChunkAndProgress chunk_) { @@ -118,7 +118,7 @@ struct MergeTreeSource::AsyncReadingState /// (executing thread) (bg pool thread) /// Control::finish() /// stage = Stage::IsFinished; - /// ~MergeTreeBaseSelectProcessor() + /// ~MergeTreeSelectProcessor() /// ~AsyncReadingState() /// control->stage != Stage::InProgress /// ~EventFD() @@ -133,12 +133,12 @@ private: }; #endif -MergeTreeSource::MergeTreeSource(MergeTreeSelectAlgorithmPtr algorithm_) - : ISource(algorithm_->getHeader()) - , algorithm(std::move(algorithm_)) +MergeTreeSource::MergeTreeSource(MergeTreeSelectProcessorPtr processor_) + : ISource(processor_->getHeader()) + , processor(std::move(processor_)) { #if defined(OS_LINUX) - if (algorithm->getSettings().use_asynchronous_read_from_pool) + if (processor->getSettings().use_asynchronous_read_from_pool) async_reading_state = std::make_unique(); #endif } @@ -147,12 +147,12 @@ MergeTreeSource::~MergeTreeSource() = default; std::string MergeTreeSource::getName() const { - return algorithm->getName(); + return processor->getName(); } void MergeTreeSource::onCancel() { - algorithm->cancel(); + processor->cancel(); } ISource::Status MergeTreeSource::prepare() @@ -184,7 +184,7 @@ Chunk MergeTreeSource::processReadResult(ChunkAndProgress chunk) finished = chunk.is_finished; /// We can return a chunk with no rows even if are not finished. - /// This allows to report progress when all the rows are filtered out inside MergeTreeBaseSelectProcessor by PREWHERE logic. + /// This allows to report progress when all the rows are filtered out inside MergeTreeSelectProcessor by PREWHERE logic. return std::move(chunk.chunk); } @@ -200,7 +200,7 @@ std::optional MergeTreeSource::tryGenerate() chassert(async_reading_state->getStage() == AsyncReadingState::Stage::NotStarted); /// It is important to store control into job. - /// Otherwise, race between job and ~MergeTreeBaseSelectProcessor is possible. + /// Otherwise, race between job and ~MergeTreeSelectProcessor is possible. auto job = [this, control = async_reading_state->start()]() mutable { auto holder = std::move(control); @@ -208,7 +208,7 @@ std::optional MergeTreeSource::tryGenerate() try { OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; - holder->setResult(algorithm->read()); + holder->setResult(processor->read()); } catch (...) { @@ -223,7 +223,7 @@ std::optional MergeTreeSource::tryGenerate() #endif OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; - return processReadResult(algorithm->read()); + return processReadResult(processor->read()); } #if defined(OS_LINUX) diff --git a/src/Storages/MergeTree/MergeTreeSource.h b/src/Storages/MergeTree/MergeTreeSource.h index 463faad0fab..655f0ee6ebe 100644 --- a/src/Storages/MergeTree/MergeTreeSource.h +++ b/src/Storages/MergeTree/MergeTreeSource.h @@ -4,15 +4,15 @@ namespace DB { -class IMergeTreeSelectAlgorithm; -using MergeTreeSelectAlgorithmPtr = std::unique_ptr; +class MergeTreeSelectProcessor; +using MergeTreeSelectProcessorPtr = std::unique_ptr; struct ChunkAndProgress; class MergeTreeSource final : public ISource { public: - explicit MergeTreeSource(MergeTreeSelectAlgorithmPtr algorithm_); + explicit MergeTreeSource(MergeTreeSelectProcessorPtr processor_); ~MergeTreeSource() override; std::string getName() const override; @@ -29,7 +29,7 @@ protected: void onCancel() override; private: - MergeTreeSelectAlgorithmPtr algorithm; + MergeTreeSelectProcessorPtr processor; #if defined(OS_LINUX) struct AsyncReadingState; diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp deleted file mode 100644 index 01094d65ac5..00000000000 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include -#include - - -namespace DB -{ - -MergeTreeThreadSelectAlgorithm::MergeTreeThreadSelectAlgorithm( - size_t thread_, - IMergeTreeReadPoolPtr pool_, - size_t min_marks_for_concurrent_read_, - size_t max_block_size_rows_, - size_t preferred_block_size_bytes_, - size_t preferred_max_column_in_block_size_bytes_, - const MergeTreeData & storage_, - const StorageSnapshotPtr & storage_snapshot_, - bool use_uncompressed_cache_, - const PrewhereInfoPtr & prewhere_info_, - const ExpressionActionsSettings & actions_settings_, - const MergeTreeReaderSettings & reader_settings_, - const Names & virt_column_names_) - : IMergeTreeSelectAlgorithm{ - pool_->getHeader(), storage_, storage_snapshot_, prewhere_info_, actions_settings_, max_block_size_rows_, - preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, - reader_settings_, use_uncompressed_cache_, virt_column_names_}, - thread{thread_}, - pool{std::move(pool_)} -{ - min_marks_to_read = min_marks_for_concurrent_read_; -} - -/// Requests read task from MergeTreeReadPool and signals whether it got one -bool MergeTreeThreadSelectAlgorithm::getNewTaskImpl() -{ - task = pool->getTask(thread); - return static_cast(task); -} - - -void MergeTreeThreadSelectAlgorithm::finalizeNewTask() -{ - const std::string part_name = task->data_part->isProjectionPart() ? task->data_part->getParentPart()->name : task->data_part->name; - - /// Allows pool to reduce number of threads in case of too slow reads. - auto profile_callback = [this](ReadBufferFromFileBase::ProfileInfo info_) { pool->profileFeedback(info_); }; - const auto & metadata_snapshot = storage_snapshot->metadata; - - IMergeTreeReader::ValueSizeMap value_size_map; - - if (reader && part_name != last_read_part_name) - { - value_size_map = reader->getAvgValueSizeHints(); - } - - /// task->reader.valid() means there is a prefetched reader in this test, use it. - const bool init_new_readers = !reader || task->reader.valid() || part_name != last_read_part_name; - if (init_new_readers) - initializeMergeTreeReadersForCurrentTask(metadata_snapshot, value_size_map, profile_callback); - - last_read_part_name = part_name; -} - - -void MergeTreeThreadSelectAlgorithm::finish() -{ - reader.reset(); - pre_reader_for_step.clear(); -} - - -MergeTreeThreadSelectAlgorithm::~MergeTreeThreadSelectAlgorithm() = default; - -} diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h deleted file mode 100644 index 4d9c9c92daf..00000000000 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once -#include - - -namespace DB -{ - -class IMergeTreeReadPool; -using IMergeTreeReadPoolPtr = std::shared_ptr; - -/** Used in conjunction with MergeTreeReadPool, asking it for more work to do and performing whatever reads it is asked - * to perform. - */ -class MergeTreeThreadSelectAlgorithm final : public IMergeTreeSelectAlgorithm -{ -public: - MergeTreeThreadSelectAlgorithm( - size_t thread_, - IMergeTreeReadPoolPtr pool_, - size_t min_marks_for_concurrent_read, - size_t max_block_size_, - size_t preferred_block_size_bytes_, - size_t preferred_max_column_in_block_size_bytes_, - const MergeTreeData & storage_, - const StorageSnapshotPtr & storage_snapshot_, - bool use_uncompressed_cache_, - const PrewhereInfoPtr & prewhere_info_, - const ExpressionActionsSettings & actions_settings_, - const MergeTreeReaderSettings & reader_settings_, - const Names & virt_column_names_); - - String getName() const override { return "MergeTreeThread"; } - - ~MergeTreeThreadSelectAlgorithm() override; - -protected: - /// Requests read task from MergeTreeReadPool and signals whether it got one - bool getNewTaskImpl() override; - - void finalizeNewTask() override; - - void finish() override; - -private: - /// "thread" index (there are N threads and each thread is assigned index in interval [0..N-1]) - size_t thread; - - IMergeTreeReadPoolPtr pool; - - /// Last part read in this thread - std::string last_read_part_name; -}; - -} diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 5efb7286685..466dbb45a4d 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -226,7 +226,7 @@ static bool isConditionGood(const RPNBuilderTreeNode & condition, const NameSet return false; } -void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const +void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context, std::set & pk_positions) const { auto function_node_optional = node.toFunctionNodeOrNull(); @@ -237,7 +237,7 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree for (size_t i = 0; i < arguments_size; ++i) { auto argument = function_node_optional->getArgumentAt(i); - analyzeImpl(res, argument, where_optimizer_context); + analyzeImpl(res, argument, where_optimizer_context, pk_positions); } } else @@ -270,6 +270,7 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree cond.good = cond.viable; /// Find min position in PK of any column that is used in this condition. cond.min_position_in_primary_key = findMinPosition(cond.table_columns, primary_key_names_positions); + pk_positions.emplace(cond.min_position_in_primary_key); } res.emplace_back(std::move(cond)); @@ -281,7 +282,29 @@ MergeTreeWhereOptimizer::Conditions MergeTreeWhereOptimizer::analyze(const RPNBu const WhereOptimizerContext & where_optimizer_context) const { Conditions res; - analyzeImpl(res, node, where_optimizer_context); + std::set pk_positions; + analyzeImpl(res, node, where_optimizer_context, pk_positions); + + /// E.g., if the primary key is (a, b, c) but the condition is a = 1 and c = 1, + /// we should only put (a = 1) to the tail of PREWHERE, + /// and treat (c = 1) as a normal column. + if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere) + { + Int64 min_valid_pk_pos = -1; + for (auto pk_pos : pk_positions) + { + if (pk_pos != min_valid_pk_pos + 1) + break; + min_valid_pk_pos = pk_pos; + } + for (auto & cond : res) + { + if (cond.min_position_in_primary_key > min_valid_pk_pos) + cond.min_position_in_primary_key = std::numeric_limits::max() - 1; + } + LOG_TRACE(log, "The min valid primary key position for moving to the tail of PREWHERE is {}", min_valid_pk_pos); + } + return res; } diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index fb5e84b67c6..dd9dc803f35 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -108,7 +108,7 @@ private: std::optional optimizeImpl(const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const; - void analyzeImpl(Conditions & res, const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const; + void analyzeImpl(Conditions & res, const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context, std::set & pk_positions) const; /// Transform conjunctions chain in WHERE expression to Conditions list. Conditions analyze(const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const; diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 39c4157a42e..b52a3a61100 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -113,7 +113,7 @@ void MergeTreeWriteAheadLog::rotate(const std::unique_lock &) MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore( const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - std::unique_lock & parts_lock, + DataPartsLock & parts_lock, bool readonly) { std::unique_lock lock(write_mutex); diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index f5398a24e7d..5fb9dd907a1 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -11,6 +11,7 @@ namespace DB { class MergeTreeData; +struct DataPartsLock; /** WAL stores addditions and removals of data parts in in-memory format. * Format of data in WAL: @@ -64,7 +65,7 @@ public: std::vector restore( const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - std::unique_lock & parts_lock, + DataPartsLock & parts_lock, bool readonly); using MinMaxBlockNumber = std::pair; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 491c36433ca..808ece8dc82 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -453,6 +453,7 @@ static ExecuteTTLType shouldExecuteTTL(const StorageMetadataPtr & metadata_snaps /// Return set of indices which should be recalculated during mutation also /// wraps input stream into additional expression stream static std::set getIndicesToRecalculate( + const MergeTreeDataPartPtr & source_part, QueryPipelineBuilder & builder, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, @@ -463,10 +464,15 @@ static std::set getIndicesToRecalculate( std::set indices_to_recalc; ASTPtr indices_recalc_expr_list = std::make_shared(); const auto & indices = metadata_snapshot->getSecondaryIndices(); + bool is_full_part_storage = isFullPartStorage(source_part->getDataPartStorage()); for (const auto & index : indices) { - if (materialized_indices.contains(index.name)) + bool need_recalculate = + materialized_indices.contains(index.name) + || (!is_full_part_storage && source_part->hasSecondaryIndex(index.name)); + + if (need_recalculate) { if (indices_to_recalc.insert(index_factory.get(index)).second) { @@ -496,15 +502,23 @@ static std::set getIndicesToRecalculate( } static std::set getProjectionsToRecalculate( + const MergeTreeDataPartPtr & source_part, const StorageMetadataPtr & metadata_snapshot, const NameSet & materialized_projections) { std::set projections_to_recalc; + bool is_full_part_storage = isFullPartStorage(source_part->getDataPartStorage()); + for (const auto & projection : metadata_snapshot->getProjections()) { - if (materialized_projections.contains(projection.name)) + bool need_recalculate = + materialized_projections.contains(projection.name) + || (!is_full_part_storage && source_part->hasProjection(projection.name)); + + if (need_recalculate) projections_to_recalc.insert(&projection); } + return projections_to_recalc; } @@ -1279,14 +1293,20 @@ private: removed_indices.insert(command.column_name); } + bool is_full_part_storage = isFullPartStorage(ctx->new_data_part->getDataPartStorage()); const auto & indices = ctx->metadata_snapshot->getSecondaryIndices(); + MergeTreeIndices skip_indices; for (const auto & idx : indices) { if (removed_indices.contains(idx.name)) continue; - if (ctx->materialized_indices.contains(idx.name)) + bool need_recalculate = + ctx->materialized_indices.contains(idx.name) + || (!is_full_part_storage && ctx->source_part->hasSecondaryIndex(idx.name)); + + if (need_recalculate) { skip_indices.push_back(MergeTreeIndexFactory::instance().get(idx)); } @@ -1319,7 +1339,11 @@ private: if (removed_projections.contains(projection.name)) continue; - if (ctx->materialized_projections.contains(projection.name)) + bool need_recalculate = + ctx->materialized_projections.contains(projection.name) + || (!is_full_part_storage && ctx->source_part->hasProjection(projection.name)); + + if (need_recalculate) { ctx->projections_to_build.push_back(&projection); } @@ -1821,7 +1845,7 @@ bool MutateTask::prepare() .txn = ctx->txn, .hardlinked_files = &ctx->hardlinked_files, .files_to_copy_instead_of_hardlinks = std::move(files_to_copy_instead_of_hardlinks), .keep_metadata_version = true }; - auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, clone_params); + auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, clone_params, ctx->context->getWriteSettings()); part->getDataPartStorage().beginTransaction(); ctx->temporary_directory_lock = std::move(lock); @@ -1841,6 +1865,7 @@ bool MutateTask::prepare() context_for_reading->setSetting("max_threads", 1); context_for_reading->setSetting("allow_asynchronous_read_from_io_pool_for_merge_tree", false); context_for_reading->setSetting("max_streams_for_merge_tree_reading", Field(0)); + context_for_reading->setSetting("read_from_filesystem_cache_if_exists_otherwise_bypass_cache", 1); MutationHelpers::splitAndModifyMutationCommands( ctx->source_part, ctx->metadata_snapshot, @@ -1920,9 +1945,16 @@ bool MutateTask::prepare() else /// TODO: check that we modify only non-key columns in this case. { ctx->indices_to_recalc = MutationHelpers::getIndicesToRecalculate( - ctx->mutating_pipeline_builder, ctx->metadata_snapshot, ctx->context, ctx->materialized_indices); + ctx->source_part, + ctx->mutating_pipeline_builder, + ctx->metadata_snapshot, + ctx->context, + ctx->materialized_indices); - ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate(ctx->metadata_snapshot, ctx->materialized_projections); + ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate( + ctx->source_part, + ctx->metadata_snapshot, + ctx->materialized_projections); ctx->files_to_skip = MutationHelpers::collectFilesToSkip( ctx->source_part, diff --git a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp deleted file mode 100644 index bb6462b3058..00000000000 --- a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp +++ /dev/null @@ -1,298 +0,0 @@ -#include "PartMetadataManagerWithCache.h" - -#if USE_ROCKSDB -#include -#include -#include -#include -#include -#include - -namespace ProfileEvents -{ - extern const Event MergeTreeMetadataCacheHit; - extern const Event MergeTreeMetadataCacheMiss; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int CORRUPTED_DATA; - extern const int NO_SUCH_PROJECTION_IN_TABLE; -} - -PartMetadataManagerWithCache::PartMetadataManagerWithCache(const IMergeTreeDataPart * part_, const MergeTreeMetadataCachePtr & cache_) - : IPartMetadataManager(part_), cache(cache_) -{ -} - -String PartMetadataManagerWithCache::getKeyFromFilePath(const String & file_path) const -{ - return part->getDataPartStorage().getDiskName() + ":" + file_path; -} - -String PartMetadataManagerWithCache::getFilePathFromKey(const String & key) const -{ - return key.substr(part->getDataPartStorage().getDiskName().size() + 1); -} - -std::unique_ptr PartMetadataManagerWithCache::read(const String & file_name) const -{ - String file_path = fs::path(part->getDataPartStorage().getRelativePath()) / file_name; - String key = getKeyFromFilePath(file_path); - String value; - auto status = cache->get(key, value); - if (!status.ok()) - { - ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheMiss); - auto in = part->getDataPartStorage().readFile(file_name, {}, std::nullopt, std::nullopt); - std::unique_ptr reader; - if (!isCompressedFromFileName(file_name)) - reader = std::move(in); - else - reader = std::make_unique(std::move(in)); - - readStringUntilEOF(value, *reader); - cache->put(key, value); - } - else - { - ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheHit); - } - return std::make_unique(value); -} - -bool PartMetadataManagerWithCache::exists(const String & file_name) const -{ - String file_path = fs::path(part->getDataPartStorage().getRelativePath()) / file_name; - String key = getKeyFromFilePath(file_path); - String value; - auto status = cache->get(key, value); - if (status.ok()) - { - ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheHit); - return true; - } - else - { - ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheMiss); - return part->getDataPartStorage().exists(file_name); - } -} - -void PartMetadataManagerWithCache::deleteAll(bool include_projection) -{ - Strings file_names; - part->appendFilesOfColumnsChecksumsIndexes(file_names, include_projection); - - String value; - for (const auto & file_name : file_names) - { - String file_path = fs::path(part->getDataPartStorage().getRelativePath()) / file_name; - String key = getKeyFromFilePath(file_path); - auto status = cache->del(key); - if (!status.ok()) - { - status = cache->get(key, value); - if (status.IsNotFound()) - continue; - - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "deleteAll failed include_projection:{} status:{}, file_path:{}", - include_projection, - status.ToString(), - file_path); - } - } -} - -void PartMetadataManagerWithCache::updateAll(bool include_projection) -{ - Strings file_names; - part->appendFilesOfColumnsChecksumsIndexes(file_names, include_projection); - - String value; - String read_value; - - /// This is used to remove the keys in case of any exception while caching other keys - Strings keys_added_to_cache; - keys_added_to_cache.reserve(file_names.size()); - - try - { - for (const auto & file_name : file_names) - { - String file_path = fs::path(part->getDataPartStorage().getRelativePath()) / file_name; - if (!part->getDataPartStorage().exists(file_name)) - continue; - auto in = part->getDataPartStorage().readFile(file_name, {}, std::nullopt, std::nullopt); - readStringUntilEOF(value, *in); - - String key = getKeyFromFilePath(file_path); - auto status = cache->put(key, value); - if (!status.ok()) - { - status = cache->get(key, read_value); - if (status.IsNotFound() || read_value == value) - continue; - - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "updateAll failed include_projection:{} status:{}, file_path:{}", - include_projection, - status.ToString(), - file_path); - } - keys_added_to_cache.emplace_back(key); - } - } - catch (...) - { - for (const auto & key : keys_added_to_cache) - { - cache->del(key); - } - throw; - } -} - -void PartMetadataManagerWithCache::assertAllDeleted(bool include_projection) const -{ - Strings keys; - std::vector _; - getKeysAndCheckSums(keys, _); - if (keys.empty()) - return; - - String file_path; - String file_name; - for (const auto & key : keys) - { - file_path = getFilePathFromKey(key); - file_name = fs::path(file_path).filename(); - - /// Metadata file belongs to current part - if (fs::path(part->getDataPartStorage().getRelativePath()) / file_name == file_path) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Data part {} with type {} with meta file {} still in cache", - part->name, - part->getType().toString(), - file_path); - - /// File belongs to projection part of current part - if (!part->isProjectionPart() && include_projection) - { - const auto & projection_parts = part->getProjectionParts(); - for (const auto & [projection_name, projection_part] : projection_parts) - { - if (fs::path(part->getDataPartStorage().getRelativePath()) / (projection_name + ".proj") / file_name == file_path) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Data part {} with type {} with meta file {} with projection name {} still in cache", - part->name, - part->getType().toString(), - file_path, - projection_name); - } - } - } - } -} - -void PartMetadataManagerWithCache::getKeysAndCheckSums(Strings & keys, std::vector & checksums) const -{ - String prefix = getKeyFromFilePath(fs::path(part->getDataPartStorage().getRelativePath()) / ""); - Strings values; - cache->getByPrefix(prefix, keys, values); - size_t size = keys.size(); - for (size_t i = 0; i < size; ++i) - { - ReadBufferFromString rbuf(values[i]); - HashingReadBuffer hbuf(rbuf); - hbuf.ignoreAll(); - checksums.push_back(hbuf.getHash()); - } -} - -std::unordered_map PartMetadataManagerWithCache::check() const -{ - /// Only applies for normal part stored on disk - if (part->isProjectionPart() || !part->isStoredOnDisk()) - return {}; - - /// The directory of projection part is under the directory of its parent part - const auto filenames_without_checksums = part->getFileNamesWithoutChecksums(); - - std::unordered_map results; - Strings keys; - std::vector cache_checksums; - std::vector disk_checksums; - getKeysAndCheckSums(keys, cache_checksums); - for (size_t i = 0; i < keys.size(); ++i) - { - const auto & key = keys[i]; - String file_path = getFilePathFromKey(key); - String file_name = fs::path(file_path).filename(); - results.emplace(file_name, cache_checksums[i]); - - /// File belongs to normal part - if (fs::path(part->getDataPartStorage().getRelativePath()) / file_name == file_path) - { - auto disk_checksum = part->getActualChecksumByFile(file_name); - if (disk_checksum != cache_checksums[i]) - throw Exception( - ErrorCodes::CORRUPTED_DATA, - "Checksums doesn't match in part {} for {}. Expected: {}. Found {}.", - part->name, file_path, - getHexUIntUppercase(disk_checksum), - getHexUIntUppercase(cache_checksums[i])); - - disk_checksums.push_back(disk_checksum); - continue; - } - - /// File belongs to projection part - String proj_dir_name = fs::path(file_path).parent_path().filename(); - auto pos = proj_dir_name.find_last_of('.'); - if (pos == String::npos) - { - throw Exception( - ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE, - "There is no projection in part: {} contains file: {} with directory name: {}", - part->name, - file_path, - proj_dir_name); - } - - String proj_name = proj_dir_name.substr(0, pos); - const auto & projection_parts = part->getProjectionParts(); - auto it = projection_parts.find(proj_name); - if (it == projection_parts.end()) - { - throw Exception( - ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE, - "There is no projection {} in part: {} contains file: {}", - proj_name, part->name, file_path); - } - - auto disk_checksum = it->second->getActualChecksumByFile(file_name); - if (disk_checksum != cache_checksums[i]) - throw Exception( - ErrorCodes::CORRUPTED_DATA, - "Checksums doesn't match in projection part {} {}. Expected: {}. Found {}.", - part->name, proj_name, - getHexUIntUppercase(disk_checksum), - getHexUIntUppercase(cache_checksums[i])); - disk_checksums.push_back(disk_checksum); - } - return results; -} - -} -#endif diff --git a/src/Storages/MergeTree/PartMetadataManagerWithCache.h b/src/Storages/MergeTree/PartMetadataManagerWithCache.h deleted file mode 100644 index e4505fb9462..00000000000 --- a/src/Storages/MergeTree/PartMetadataManagerWithCache.h +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_ROCKSDB -#include -#include - -namespace DB -{ - -/// PartMetadataManagerWithCache stores metadatas of part in RocksDB as cache layer to speed up -/// loading process of merge tree table. -class PartMetadataManagerWithCache : public IPartMetadataManager -{ -public: - PartMetadataManagerWithCache(const IMergeTreeDataPart * part_, const MergeTreeMetadataCachePtr & cache_); - - ~PartMetadataManagerWithCache() override = default; - - /// First read the metadata from RocksDB cache, then from disk. - std::unique_ptr read(const String & file_name) const override; - - /// First judge existence of the metadata in RocksDB cache, then in disk. - bool exists(const String & file_name) const override; - - /// Delete all metadatas in part from RocksDB cache. - void deleteAll(bool include_projection) override; - - /// Assert all metadatas in part from RocksDB cache are deleted. - void assertAllDeleted(bool include_projection) const override; - - /// Update all metadatas in part from RocksDB cache. - /// Need to be called after part directory is renamed. - void updateAll(bool include_projection) override; - - /// Check if all metadatas in part from RocksDB cache are up to date. - std::unordered_map check() const override; - -private: - /// Get cache key from path of metadata file. - /// Format: :relative/full/path/of/metadata/file - String getKeyFromFilePath(const String & file_path) const; - - /// Get metadata file path from cache key. - String getFilePathFromKey(const String & key) const; - - /// Get cache keys and checksums of corresponding metadata in a part(including projection parts) - void getKeysAndCheckSums(Strings & keys, std::vector & checksums) const; - - MergeTreeMetadataCachePtr cache; -}; - -} -#endif diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index 3b382b7b32d..76b8080f64c 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -684,7 +684,7 @@ CancellationCode PartMovesBetweenShardsOrchestrator::killPartMoveToShard(const U continue; } else - throw Coordination::Exception(code, entry.znode_path); + throw Coordination::Exception::fromPath(code, entry.znode_path); } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 07cfced8362..b72c148a4e8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -149,8 +149,7 @@ Float32 ReplicatedMergeTreeCleanupThread::iterate() /// do it under share lock cleaned_other += storage.clearOldWriteAheadLogs(); cleaned_part_like += storage.clearOldTemporaryDirectories(storage.getSettings()->temporary_directories_lifetime.totalSeconds()); - if (storage.getSettings()->merge_tree_enable_clear_old_broken_detached) - cleaned_part_like += storage.clearOldBrokenPartsFromDetachedDirectory(); + cleaned_part_like += storage.clearOldBrokenPartsFromDetachedDirectory(); } /// This is loose condition: no problem if we actually had lost leadership at this moment diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index ffe3f883f80..6de121eb094 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -372,20 +372,14 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St return result; } - part->checkMetadata(); - LOG_INFO(log, "Part {} looks good.", part_name); result.status = {part_name, true, ""}; result.action = ReplicatedCheckResult::DoNothing; return result; } - catch (const Exception & e) + catch (...) { - /// Don't count the part as broken if we got known retryable exception. - /// In fact, there can be other similar situations because not all - /// of the exceptions are classified as retryable/non-retryable. But it is OK, - /// because there is a safety guard against deleting too many parts. - if (isRetryableException(e)) + if (isRetryableException(std::current_exception())) throw; tryLogCurrentException(log, __PRETTY_FUNCTION__); @@ -397,6 +391,7 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St result.status = {part_name, false, message}; result.action = ReplicatedCheckResult::TryFetchMissing; return result; + } } else if (part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER < current_time) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp index 88f4a3ec66f..24d907dbad6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp @@ -12,9 +12,7 @@ static std::array getSipHash(const String & str) { SipHash hash; hash.update(str.data(), str.size()); - std::array result; - hash.get128(result.data()); - return result; + return getSipHash128AsArray(hash); } ReplicatedMergeTreePartHeader ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes( diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 2393f45ebb6..fdc82dfb730 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -77,15 +77,6 @@ void ReplicatedMergeTreeQueue::initialize(zkutil::ZooKeeperPtr zookeeper) virtual_parts.add(part_name, nullptr); } - /// Drop parts can negatively affect virtual parts. So when we load parts - /// from zookeeper we can break invariant with virtual parts. To fix this we - /// have it here. - for (const LogEntryPtr & entry : queue) - { - if (entry->isDropPart(format_version)) - virtual_parts.removePartAndCoveredParts(*entry->getDropRange(format_version)); - } - LOG_TRACE(log, "Queue initialized"); } @@ -576,7 +567,7 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper /// It's ok if replica became readonly due to connection loss after we got current zookeeper (in this case zookeeper must be expired). /// And it's ok if replica became readonly after shutdown. /// In other cases it's likely that someone called pullLogsToQueue(...) when queue is not initialized yet by RestartingThread. - bool not_completely_initialized = storage.is_readonly && !zookeeper->expired() && !storage.shutdown_called; + bool not_completely_initialized = storage.is_readonly && !zookeeper->expired() && !storage.shutdown_prepared_called; if (not_completely_initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "Tried to pull logs to queue (reason: {}) on readonly replica {}, it's a bug", reason, storage.getStorageID().getNameForLogs()); @@ -866,7 +857,7 @@ ActiveDataPartSet getPartNamesToMutate( } -void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback) +void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallbackPtr watch_callback) { std::lock_guard lock(update_mutations_mutex); @@ -1803,14 +1794,21 @@ std::map ReplicatedMergeTreeQueue::getAlterMutationCo Int64 part_data_version = part->info.getDataVersion(); Int64 part_metadata_version = part->getMetadataVersion(); - LOG_DEBUG(log, "Looking for mutations for part {} (part data version {}, part metadata version {})", part->name, part_data_version, part_metadata_version); + LOG_TEST(log, "Looking for mutations for part {} (part data version {}, part metadata version {})", part->name, part_data_version, part_metadata_version); std::map result; + + bool seen_all_data_mutations = false; + bool seen_all_metadata_mutations = false; + /// Here we return mutation commands for part which has bigger alter version than part metadata version. /// Please note, we don't use getDataVersion(). It's because these alter commands are used for in-fly conversions /// of part's metadata. for (const auto & [mutation_version, mutation_status] : in_partition->second | std::views::reverse) { + if (seen_all_data_mutations && seen_all_metadata_mutations) + break; + auto alter_version = mutation_status->entry->alter_version; if (alter_version != -1) { @@ -1820,14 +1818,19 @@ std::map ReplicatedMergeTreeQueue::getAlterMutationCo /// We take commands with bigger metadata version if (alter_version > part_metadata_version) result[mutation_version] = mutation_status->entry->commands; + else + seen_all_metadata_mutations = true; } - else if (mutation_version > part_data_version) + else { - result[mutation_version] = mutation_status->entry->commands; + if (mutation_version > part_data_version) + result[mutation_version] = mutation_status->entry->commands; + else + seen_all_data_mutations = true; } } - LOG_TRACE(log, "Got {} commands for part {} (part data version {}, part metadata version {})", + LOG_TEST(log, "Got {} commands for part {} (part data version {}, part metadata version {})", result.size(), part->name, part_data_version, part_metadata_version); return result; @@ -2169,7 +2172,7 @@ CommittingBlocks BaseMergePredicate::getCommitti { auto & response = locks_children[i]; if (response.error != Coordination::Error::ZOK && !partition_ids_hint) - throw Coordination::Exception(response.error, paths[i]); + throw Coordination::Exception::fromPath(response.error, paths[i]); if (response.error != Coordination::Error::ZOK) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 611866877d8..d5d85e58cb5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -335,7 +335,7 @@ public: /// Load new mutation entries. If something new is loaded, schedule storage.merge_selecting_task. /// If watch_callback is not empty, will call it when new mutations appear in ZK. - void updateMutations(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback = {}); + void updateMutations(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallbackPtr watch_callback = {}); /// Remove a mutation from ZooKeeper and from the local set. Returns the removed entry or nullptr /// if it could not be found. Called during KILL MUTATION query execution. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index d7166b4a3b9..79054ef46da 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -329,7 +329,7 @@ void ReplicatedMergeTreeRestartingThread::activateReplica() void ReplicatedMergeTreeRestartingThread::partialShutdown(bool part_of_full_shutdown) { - setReadonly(part_of_full_shutdown); + setReadonly(/* on_shutdown = */ part_of_full_shutdown); storage.partialShutdown(); } @@ -339,10 +339,15 @@ void ReplicatedMergeTreeRestartingThread::shutdown(bool part_of_full_shutdown) /// Stop restarting_thread before stopping other tasks - so that it won't restart them again. need_stop = true; task->deactivate(); + + /// Explicitly set the event, because the restarting thread will not set it again + if (part_of_full_shutdown) + storage.startup_event.set(); + LOG_TRACE(log, "Restarting thread finished"); - /// Stop other tasks. - partialShutdown(part_of_full_shutdown); + setReadonly(part_of_full_shutdown); + } void ReplicatedMergeTreeRestartingThread::setReadonly(bool on_shutdown) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 9e99baab4c3..02103272a1f 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -25,6 +26,7 @@ public: void start(bool schedule = true) { + LOG_TRACE(log, "Starting restating thread, schedule: {}", schedule); if (schedule) task->activateAndSchedule(); else @@ -36,6 +38,7 @@ public: void shutdown(bool part_of_full_shutdown); void run(); + private: StorageReplicatedMergeTree & storage; String log_name; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 0db3464a637..5b235322394 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -33,7 +34,6 @@ namespace ErrorCodes extern const int TOO_FEW_LIVE_REPLICAS; extern const int UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE; extern const int UNEXPECTED_ZOOKEEPER_ERROR; - extern const int NO_ZOOKEEPER; extern const int READONLY; extern const int UNKNOWN_STATUS_OF_INSERT; extern const int INSERT_WAS_DEDUPLICATED; @@ -49,17 +49,11 @@ namespace ErrorCodes template struct ReplicatedMergeTreeSinkImpl::DelayedChunk { - struct Partition + using BlockInfo = std::conditional_t; + struct Partition : public BlockInfo { - Poco::Logger * log; MergeTreeDataWriter::TemporaryPart temp_part; UInt64 elapsed_ns; - BlockIDsType block_id; - BlockWithPartition block_with_partition; - /// Some merging algorithms can mofidy the block which loses the information about the async insert offsets - /// when preprocessing or filtering data for asnyc inserts deduplication we want to use the initial, unmerged block - std::optional unmerged_block_with_partition; - std::unordered_map> block_id_to_offset_idx; ProfileEvents::Counters part_counters; Partition() = default; @@ -70,127 +64,11 @@ struct ReplicatedMergeTreeSinkImpl::DelayedChunk BlockWithPartition && block_, std::optional && unmerged_block_with_partition_, ProfileEvents::Counters && part_counters_) - : log(log_), + : BlockInfo(log_, std::move(block_id_), std::move(block_), std::move(unmerged_block_with_partition_)), temp_part(std::move(temp_part_)), elapsed_ns(elapsed_ns_), - block_id(std::move(block_id_)), - block_with_partition(std::move(block_)), - unmerged_block_with_partition(std::move(unmerged_block_with_partition_)), part_counters(std::move(part_counters_)) - { - initBlockIDMap(); - } - - void initBlockIDMap() - { - if constexpr (async_insert) - { - block_id_to_offset_idx.clear(); - for (size_t i = 0; i < block_id.size(); ++i) - { - block_id_to_offset_idx[block_id[i]].push_back(i); - } - } - } - - /// this function check if the block contains duplicate inserts. - /// if so, we keep only one insert for every duplicate ones. - bool filterSelfDuplicate() - { - if constexpr (async_insert) - { - std::vector dup_block_ids; - for (const auto & [hash_id, offset_indexes] : block_id_to_offset_idx) - { - /// It means more than one inserts have the same hash id, in this case, we should keep only one of them. - if (offset_indexes.size() > 1) - dup_block_ids.push_back(hash_id); - } - if (dup_block_ids.empty()) - return false; - - filterBlockDuplicate(dup_block_ids, true); - return true; - } - return false; - } - - /// remove the conflict parts of block for rewriting again. - void filterBlockDuplicate(const std::vector & block_paths, bool self_dedup) - { - if constexpr (async_insert) - { - auto * current_block_with_partition = unmerged_block_with_partition.has_value() ? &unmerged_block_with_partition.value() : &block_with_partition; - std::vector offset_idx; - for (const auto & raw_path : block_paths) - { - std::filesystem::path p(raw_path); - String conflict_block_id = p.filename(); - auto it = block_id_to_offset_idx.find(conflict_block_id); - if (it == block_id_to_offset_idx.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown conflict path {}", conflict_block_id); - /// if this filter is for self_dedup, that means the block paths is selected by `filterSelfDuplicate`, which is a self purge. - /// in this case, we don't know if zk has this insert, then we should keep one insert, to avoid missing this insert. - offset_idx.insert(std::end(offset_idx), std::begin(it->second) + self_dedup, std::end(it->second)); - } - std::sort(offset_idx.begin(), offset_idx.end()); - - auto & offsets = current_block_with_partition->offsets; - size_t idx = 0, remove_count = 0; - auto it = offset_idx.begin(); - std::vector new_offsets; - std::vector new_block_ids; - - /// construct filter - size_t rows = current_block_with_partition->block.rows(); - auto filter_col = ColumnUInt8::create(rows, 1u); - ColumnUInt8::Container & vec = filter_col->getData(); - UInt8 * pos = vec.data(); - for (auto & offset : offsets) - { - if (it != offset_idx.end() && *it == idx) - { - size_t start_pos = idx > 0 ? offsets[idx - 1] : 0; - size_t end_pos = offset; - remove_count += end_pos - start_pos; - while (start_pos < end_pos) - { - *(pos + start_pos) = 0; - start_pos++; - } - it++; - } - else - { - new_offsets.push_back(offset - remove_count); - new_block_ids.push_back(block_id[idx]); - } - idx++; - } - - LOG_TRACE(log, "New block IDs: {}, new offsets: {}, size: {}", toString(new_block_ids), toString(new_offsets), new_offsets.size()); - - current_block_with_partition->offsets = std::move(new_offsets); - block_id = std::move(new_block_ids); - auto cols = current_block_with_partition->block.getColumns(); - for (auto & col : cols) - { - col = col->filter(vec, rows - remove_count); - } - current_block_with_partition->block.setColumns(cols); - - LOG_TRACE(log, "New block rows {}", current_block_with_partition->block.rows()); - - initBlockIDMap(); - - if (unmerged_block_with_partition.has_value()) - block_with_partition.block = unmerged_block_with_partition->block; - } - else - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "sync insert should not call rewriteBlock"); - } - } + {} }; DelayedChunk() = default; @@ -236,40 +114,6 @@ namespace if (size > 50) size = 50; return fmt::format("({})", fmt::join(vec.begin(), vec.begin() + size, ",")); } - - std::vector getHashesForBlocks(BlockWithPartition & block, String partition_id) - { - size_t start = 0; - auto cols = block.block.getColumns(); - std::vector block_id_vec; - for (size_t i = 0; i < block.offsets.size(); ++i) - { - size_t offset = block.offsets[i]; - std::string_view token = block.tokens[i]; - if (token.empty()) - { - SipHash hash; - for (size_t j = start; j < offset; ++j) - { - for (const auto & col : cols) - col->updateHashWithValue(j, hash); - } - union - { - char bytes[16]; - UInt64 words[2]; - } hash_value; - hash.get128(hash_value.bytes); - - block_id_vec.push_back(partition_id + "_" + DB::toString(hash_value.words[0]) + "_" + DB::toString(hash_value.words[1])); - } - else - block_id_vec.push_back(partition_id + "_" + std::string(token)); - - start = offset; - } - return block_id_vec; - } } template @@ -305,81 +149,86 @@ ReplicatedMergeTreeSinkImpl::ReplicatedMergeTreeSinkImpl( template ReplicatedMergeTreeSinkImpl::~ReplicatedMergeTreeSinkImpl() = default; -/// Allow to verify that the session in ZooKeeper is still alive. -static void assertSessionIsNotExpired(const zkutil::ZooKeeperPtr & zookeeper) -{ - if (!zookeeper) - throw Exception(ErrorCodes::NO_ZOOKEEPER, "No ZooKeeper session."); - - if (zookeeper->expired()) - throw Exception(ErrorCodes::NO_ZOOKEEPER, "ZooKeeper session has been expired."); -} - template size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const ZooKeeperWithFaultInjectionPtr & zookeeper) { if (!isQuorumEnabled()) return 0; - quorum_info.status_path = storage.zookeeper_path + "/quorum/status"; + size_t replicas_number = 0; - Strings replicas = zookeeper->getChildren(fs::path(storage.zookeeper_path) / "replicas"); + ZooKeeperRetriesControl quorum_retries_ctl("checkQuorumPrecondition", zookeeper_retries_info, context->getProcessListElement()); + quorum_retries_ctl.retryLoop( + [&]() + { + zookeeper->setKeeper(storage.getZooKeeper()); - Strings exists_paths; - exists_paths.reserve(replicas.size()); - for (const auto & replica : replicas) - if (replica != storage.replica_name) - exists_paths.emplace_back(fs::path(storage.zookeeper_path) / "replicas" / replica / "is_active"); + quorum_info.status_path = storage.zookeeper_path + "/quorum/status"; - auto exists_result = zookeeper->exists(exists_paths); - auto get_results = zookeeper->get(Strings{storage.replica_path + "/is_active", storage.replica_path + "/host"}); + Strings replicas = zookeeper->getChildren(fs::path(storage.zookeeper_path) / "replicas"); - Coordination::Error keeper_error = Coordination::Error::ZOK; - size_t active_replicas = 1; /// Assume current replica is active (will check below) - for (size_t i = 0; i < exists_paths.size(); ++i) - { - auto error = exists_result[i].error; - if (error == Coordination::Error::ZOK) - ++active_replicas; - else if (Coordination::isHardwareError(error)) - keeper_error = error; - } + Strings exists_paths; + exists_paths.reserve(replicas.size()); + for (const auto & replica : replicas) + if (replica != storage.replica_name) + exists_paths.emplace_back(fs::path(storage.zookeeper_path) / "replicas" / replica / "is_active"); - size_t replicas_number = replicas.size(); - size_t quorum_size = getQuorumSize(replicas_number); + auto exists_result = zookeeper->exists(exists_paths); + auto get_results = zookeeper->get(Strings{storage.replica_path + "/is_active", storage.replica_path + "/host"}); - if (active_replicas < quorum_size) - { - if (Coordination::isHardwareError(keeper_error)) - throw Coordination::Exception("Failed to check number of alive replicas", keeper_error); + Coordination::Error keeper_error = Coordination::Error::ZOK; + size_t active_replicas = 1; /// Assume current replica is active (will check below) + for (size_t i = 0; i < exists_paths.size(); ++i) + { + auto error = exists_result[i].error; + if (error == Coordination::Error::ZOK) + ++active_replicas; + else if (Coordination::isHardwareError(error)) + keeper_error = error; + } - throw Exception(ErrorCodes::TOO_FEW_LIVE_REPLICAS, "Number of alive replicas ({}) is less than requested quorum ({}/{}).", - active_replicas, quorum_size, replicas_number); - } + replicas_number = replicas.size(); + size_t quorum_size = getQuorumSize(replicas_number); - /** Is there a quorum for the last part for which a quorum is needed? - * Write of all the parts with the included quorum is linearly ordered. - * This means that at any time there can be only one part, - * for which you need, but not yet reach the quorum. - * Information about this part will be located in `/quorum/status` node. - * If the quorum is reached, then the node is deleted. - */ + if (active_replicas < quorum_size) + { + if (Coordination::isHardwareError(keeper_error)) + throw Coordination::Exception::fromMessage(keeper_error, "Failed to check number of alive replicas"); - String quorum_status; - if (!quorum_parallel && zookeeper->tryGet(quorum_info.status_path, quorum_status)) - throw Exception(ErrorCodes::UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE, - "Quorum for previous write has not been satisfied yet. Status: {}", quorum_status); + throw Exception( + ErrorCodes::TOO_FEW_LIVE_REPLICAS, + "Number of alive replicas ({}) is less than requested quorum ({}/{}).", + active_replicas, + quorum_size, + replicas_number); + } - /// Both checks are implicitly made also later (otherwise there would be a race condition). + /** Is there a quorum for the last part for which a quorum is needed? + * Write of all the parts with the included quorum is linearly ordered. + * This means that at any time there can be only one part, + * for which you need, but not yet reach the quorum. + * Information about this part will be located in `/quorum/status` node. + * If the quorum is reached, then the node is deleted. + */ - auto is_active = get_results[0]; - auto host = get_results[1]; + String quorum_status; + if (!quorum_parallel && zookeeper->tryGet(quorum_info.status_path, quorum_status)) + throw Exception( + ErrorCodes::UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE, + "Quorum for previous write has not been satisfied yet. Status: {}", + quorum_status); - if (is_active.error == Coordination::Error::ZNONODE || host.error == Coordination::Error::ZNONODE) - throw Exception(ErrorCodes::READONLY, "Replica is not active right now"); + /// Both checks are implicitly made also later (otherwise there would be a race condition). - quorum_info.is_active_node_version = is_active.stat.version; - quorum_info.host_node_version = host.stat.version; + auto is_active = get_results[0]; + auto host = get_results[1]; + + if (is_active.error == Coordination::Error::ZNONODE || host.error == Coordination::Error::ZNONODE) + throw Exception(ErrorCodes::READONLY, "Replica is not active right now"); + + quorum_info.is_active_node_version = is_active.stat.version; + quorum_info.host_node_version = host.stat.version; + }); return replicas_number; } @@ -412,14 +261,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) * And also check that during the insertion, the replica was not reinitialized or disabled (by the value of `is_active` node). * TODO Too complex logic, you can do better. */ - size_t replicas_num = 0; - ZooKeeperRetriesControl quorum_retries_ctl("checkQuorumPrecondition", zookeeper_retries_info, context->getProcessListElement()); - quorum_retries_ctl.retryLoop( - [&]() - { - zookeeper->setKeeper(storage.getZooKeeper()); - replicas_num = checkQuorumPrecondition(zookeeper); - }); + size_t replicas_num = checkQuorumPrecondition(zookeeper); if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); @@ -475,7 +317,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if constexpr (async_insert) { - block_id = getHashesForBlocks(unmerged_block.has_value() ? *unmerged_block : current_block, temp_part.part->info.partition_id); + block_id = AsyncInsertBlockInfo::getHashesForBlocks(unmerged_block.has_value() ? *unmerged_block : current_block, temp_part.part->info.partition_id); LOG_TRACE(log, "async insert part, part id {}, block id {}, offsets {}, size {}", temp_part.part->info.partition_id, toString(block_id), toString(current_block.offsets), current_block.offsets.size()); } else @@ -633,12 +475,11 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithFa delayed_chunk.reset(); } -template -void ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::MutableDataPartPtr & part) +template<> +bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::MutableDataPartPtr & part) { /// NOTE: No delay in this case. That's Ok. auto origin_zookeeper = storage.getZooKeeper(); - assertSessionIsNotExpired(origin_zookeeper); auto zookeeper = std::make_shared(origin_zookeeper); size_t replicas_num = checkQuorumPrecondition(zookeeper); @@ -646,14 +487,41 @@ void ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData: Stopwatch watch; ProfileEventsScope profile_events_scope; + String original_part_dir = part->getDataPartStorage().getPartDirectory(); + auto try_rollback_part_rename = [this, &part, &original_part_dir]() + { + if (original_part_dir == part->getDataPartStorage().getPartDirectory()) + return; + + if (part->new_part_was_committed_to_zookeeper_after_rename_on_disk) + return; + + /// Probably we have renamed the part on disk, but then failed to commit it to ZK. + /// We should rename it back, otherwise it will be lost (e.g. if it was a part from detached/ and we failed to attach it). + try + { + part->renameTo(original_part_dir, /*remove_new_dir_if_exists*/ false); + } + catch (...) + { + tryLogCurrentException(log); + } + }; + try { part->version.setCreationTID(Tx::PrehistoricTID, nullptr); - commitPart(zookeeper, part, BlockIDsType(), replicas_num, true); - PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot())); + String block_id = deduplicate ? fmt::format("{}_{}", part->info.partition_id, part->checksums.getTotalChecksumHex()) : ""; + bool deduplicated = commitPart(zookeeper, part, block_id, replicas_num, /* writing_existing_part */ true).second; + + /// Set a special error code if the block is duplicate + int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; + PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot()), ExecutionStatus(error)); + return deduplicated; } catch (...) { + try_rollback_part_rename(); PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot()), ExecutionStatus::fromCurrentException("", true)); throw; } @@ -1001,6 +869,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: Coordination::Error multi_code = zookeeper->tryMultiNoThrow(ops, responses); /// 1 RTT if (multi_code == Coordination::Error::ZOK) { + part->new_part_was_committed_to_zookeeper_after_rename_on_disk = true; transaction.commit(); storage.merge_selecting_task->schedule(); @@ -1185,7 +1054,6 @@ template void ReplicatedMergeTreeSinkImpl::onFinish() { auto zookeeper = storage.getZooKeeper(); - assertSessionIsNotExpired(zookeeper); finishDelayedChunk(std::make_shared(zookeeper)); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 868590efa25..4a192a822f5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -56,7 +56,7 @@ public: String getName() const override { return "ReplicatedMergeTreeSink"; } /// For ATTACHing existing data on filesystem. - void writeExistingPart(MergeTreeData::MutableDataPartPtr & part); + bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); /// For proper deduplication in MaterializedViews bool lastBlockIsDuplicate() const override diff --git a/src/Storages/MergeTree/ZooKeeperRetries.h b/src/Storages/MergeTree/ZooKeeperRetries.h index 512c0800de7..e46c3f974c7 100644 --- a/src/Storages/MergeTree/ZooKeeperRetries.h +++ b/src/Storages/MergeTree/ZooKeeperRetries.h @@ -159,7 +159,7 @@ public: void setKeeperError(Coordination::Error code, std::string message) { - setKeeperError(std::make_exception_ptr(zkutil::KeeperException(message, code)), code, message); + setKeeperError(std::make_exception_ptr(zkutil::KeeperException::createDeprecated(message, code)), code, message); } template diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 1967357a840..c493b24ebc9 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -15,7 +15,11 @@ #include #include #include +#include +#if USE_AZURE_BLOB_STORAGE +#include +#endif namespace CurrentMetrics { @@ -50,19 +54,41 @@ bool isNotEnoughMemoryErrorCode(int code) || code == ErrorCodes::CANNOT_MREMAP; } -bool isRetryableException(const Exception & e) +bool isRetryableException(const std::exception_ptr exception_ptr) { - if (isNotEnoughMemoryErrorCode(e.code())) - return true; - - if (e.code() == ErrorCodes::NETWORK_ERROR || e.code() == ErrorCodes::SOCKET_TIMEOUT) - return true; - + try + { + rethrow_exception(exception_ptr); + } #if USE_AWS_S3 - const auto * s3_exception = dynamic_cast(&e); - if (s3_exception && s3_exception->isRetryableError()) - return true; + catch (const S3Exception & s3_exception) + { + if (s3_exception.isRetryableError()) + return true; + } #endif +#if USE_AZURE_BLOB_STORAGE + catch (const Azure::Core::RequestFailedException &) + { + return true; + } +#endif + catch (const Exception & e) + { + if (isNotEnoughMemoryErrorCode(e.code())) + return true; + + if (e.code() == ErrorCodes::NETWORK_ERROR || e.code() == ErrorCodes::SOCKET_TIMEOUT) + return true; + } + catch (const Poco::Net::NetException &) + { + return true; + } + catch (const Poco::TimeoutException &) + { + return true; + } /// In fact, there can be other similar situations. /// But it is OK, because there is a safety guard against deleting too many parts. @@ -201,20 +227,20 @@ static IMergeTreeDataPart::Checksums checkDataPart( continue; auto checksum_it = checksums_data.files.find(file_name); - /// Skip files that we already calculated. Also skip metadata files that are not checksummed. if (checksum_it == checksums_data.files.end() && !files_without_checksums.contains(file_name)) { auto txt_checksum_it = checksums_txt_files.find(file_name); - if (txt_checksum_it == checksums_txt_files.end() || txt_checksum_it->second.uncompressed_size == 0) + if ((txt_checksum_it != checksums_txt_files.end() && txt_checksum_it->second.is_compressed)) + { + /// If we have both compressed and uncompressed in txt or its .cmrk(2/3) or .cidx, then calculate them + checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); + } + else { /// The file is not compressed. checksum_file(file_name); } - else /// If we have both compressed and uncompressed in txt, then calculate them - { - checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); - } } } @@ -322,15 +348,10 @@ IMergeTreeDataPart::Checksums checkDataPart( require_checksums, is_cancelled); } - catch (const Exception & e) - { - if (isRetryableException(e)) - throw; - - return drop_cache_and_check(); - } catch (...) { + if (isRetryableException(std::current_exception())) + throw; return drop_cache_and_check(); } } diff --git a/src/Storages/MergeTree/checkDataPart.h b/src/Storages/MergeTree/checkDataPart.h index 20ddecad3ed..d0e48b6f80a 100644 --- a/src/Storages/MergeTree/checkDataPart.h +++ b/src/Storages/MergeTree/checkDataPart.h @@ -13,6 +13,6 @@ IMergeTreeDataPart::Checksums checkDataPart( std::function is_cancelled = []{ return false; }); bool isNotEnoughMemoryErrorCode(int code); -bool isRetryableException(const Exception & e); +bool isRetryableException(const std::exception_ptr exception_ptr); } diff --git a/src/Storages/MergeTree/localBackup.cpp b/src/Storages/MergeTree/localBackup.cpp index 6faacf3c066..4c645a8628e 100644 --- a/src/Storages/MergeTree/localBackup.cpp +++ b/src/Storages/MergeTree/localBackup.cpp @@ -17,9 +17,15 @@ namespace { void localBackupImpl( - const DiskPtr & disk, IDiskTransaction * transaction, const String & source_path, - const String & destination_path, bool make_source_readonly, size_t level, - std::optional max_level, bool copy_instead_of_hardlinks, + const DiskPtr & disk, + IDiskTransaction * transaction, + const String & source_path, + const String & destination_path, + const WriteSettings & settings, + bool make_source_readonly, + size_t level, + std::optional max_level, + bool copy_instead_of_hardlinks, const NameSet & files_to_copy_instead_of_hardlinks) { if (max_level && level > *max_level) @@ -51,11 +57,11 @@ void localBackupImpl( { if (transaction) { - transaction->copyFile(source, destination); + transaction->copyFile(source, destination, settings); } else { - disk->copyFile(source, *disk, destination); + disk->copyFile(source, *disk, destination, settings); } } else @@ -69,8 +75,16 @@ void localBackupImpl( else { localBackupImpl( - disk, transaction, source, destination, make_source_readonly, level + 1, max_level, - copy_instead_of_hardlinks, files_to_copy_instead_of_hardlinks); + disk, + transaction, + source, + destination, + settings, + make_source_readonly, + level + 1, + max_level, + copy_instead_of_hardlinks, + files_to_copy_instead_of_hardlinks); } } } @@ -112,9 +126,15 @@ private: } void localBackup( - const DiskPtr & disk, const String & source_path, - const String & destination_path, bool make_source_readonly, - std::optional max_level, bool copy_instead_of_hardlinks, const NameSet & files_to_copy_intead_of_hardlinks, DiskTransactionPtr disk_transaction) + const DiskPtr & disk, + const String & source_path, + const String & destination_path, + const WriteSettings & settings, + bool make_source_readonly, + std::optional max_level, + bool copy_instead_of_hardlinks, + const NameSet & files_to_copy_intead_of_hardlinks, + DiskTransactionPtr disk_transaction) { if (disk->exists(destination_path) && !disk->isDirectoryEmpty(destination_path)) { @@ -125,9 +145,6 @@ void localBackup( size_t try_no = 0; const size_t max_tries = 10; - CleanupOnFail cleanup(disk_transaction ? std::function([]{}) : - [disk, destination_path]() { disk->removeRecursive(destination_path); }); - /** Files in the directory can be permanently added and deleted. * If some file is deleted during an attempt to make a backup, then try again, * because it's important to take into account any new files that might appear. @@ -136,10 +153,50 @@ void localBackup( { try { - if (copy_instead_of_hardlinks && !disk_transaction) - disk->copyDirectoryContent(source_path, disk, destination_path); + if (disk_transaction) + { + localBackupImpl( + disk, + disk_transaction.get(), + source_path, + destination_path, + settings, + make_source_readonly, + /* level= */ 0, + max_level, + copy_instead_of_hardlinks, + files_to_copy_intead_of_hardlinks); + } + else if (copy_instead_of_hardlinks) + { + CleanupOnFail cleanup([disk, destination_path]() { disk->removeRecursive(destination_path); }); + disk->copyDirectoryContent(source_path, disk, destination_path, settings); + cleanup.success(); + } else - localBackupImpl(disk, disk_transaction.get(), source_path, destination_path, make_source_readonly, 0, max_level, copy_instead_of_hardlinks, files_to_copy_intead_of_hardlinks); + { + std::function cleaner; + if (disk->supportZeroCopyReplication()) + /// Note: this code will create garbage on s3. We should always remove `copy_instead_of_hardlinks` files. + /// The third argument should be a list of exceptions, but (looks like) it is ignored for keep_all_shared_data = true. + cleaner = [disk, destination_path]() { disk->removeSharedRecursive(destination_path, /*keep_all_shared_data*/ true, {}); }; + else + cleaner = [disk, destination_path]() { disk->removeRecursive(destination_path); }; + + CleanupOnFail cleanup(std::move(cleaner)); + localBackupImpl( + disk, + disk_transaction.get(), + source_path, + destination_path, + settings, + make_source_readonly, + /* level= */ 0, + max_level, + /* copy_instead_of_hardlinks= */ false, + files_to_copy_intead_of_hardlinks); + cleanup.success(); + } } catch (const DB::ErrnoException & e) { @@ -166,8 +223,6 @@ void localBackup( break; } - - cleanup.success(); } } diff --git a/src/Storages/MergeTree/localBackup.h b/src/Storages/MergeTree/localBackup.h index 89906bf1d75..d9b7f3e8b0c 100644 --- a/src/Storages/MergeTree/localBackup.h +++ b/src/Storages/MergeTree/localBackup.h @@ -7,6 +7,8 @@ namespace DB { +struct WriteSettings; + /** Creates a local (at the same mount point) backup (snapshot) directory. * * In the specified destination directory, it creates hard links on all source-directory files @@ -22,6 +24,15 @@ namespace DB * * If `transaction` is provided, the changes will be added to it instead of performend on disk. */ - void localBackup(const DiskPtr & disk, const String & source_path, const String & destination_path, bool make_source_readonly = true, std::optional max_level = {}, bool copy_instead_of_hardlinks = false, const NameSet & files_to_copy_intead_of_hardlinks = {}, DiskTransactionPtr disk_transaction = nullptr); + void localBackup( + const DiskPtr & disk, + const String & source_path, + const String & destination_path, + const WriteSettings & settings, + bool make_source_readonly = true, + std::optional max_level = {}, + bool copy_instead_of_hardlinks = false, + const NameSet & files_to_copy_intead_of_hardlinks = {}, + DiskTransactionPtr disk_transaction = nullptr); } diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 75f1542e30e..0a182789311 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -80,6 +80,7 @@ ORDER BY expr [SAMPLE BY expr] [TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...] [SETTINGS name=value, ...] +[COMMENT 'comment'] See details in documentation: https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree/. Other engines of the family support different syntax, see details in the corresponding documentation topics. diff --git a/src/Storages/MergeTree/tests/gtest_merge_tree_metadata_cache.cpp b/src/Storages/MergeTree/tests/gtest_merge_tree_metadata_cache.cpp deleted file mode 100644 index d2b7561749d..00000000000 --- a/src/Storages/MergeTree/tests/gtest_merge_tree_metadata_cache.cpp +++ /dev/null @@ -1,83 +0,0 @@ -#include "config.h" - -#if USE_ROCKSDB -#include -#include -#include -#include -#include - -using namespace DB; - -class MergeTreeMetadataCacheTest : public ::testing::Test -{ -public: - void SetUp() override - { - cache = MergeTreeMetadataCache::create("./db/", 268435456); - } - - void TearDown() override - { - cache->shutdown(); - cache.reset(); - } - - MergeTreeMetadataCachePtr cache; -}; - -TEST_F(MergeTreeMetadataCacheTest, testCommon) -{ - std::vector files - = {"columns.txt", "checksums.txt", "primary.idx", "count.txt", "partition.dat", "minmax_p.idx", "default_compression_codec.txt"}; - String prefix = "data/test_metadata_cache/check_part_metadata_cache/201806_1_1_0_4/"; - - for (const auto & file : files) - { - auto status = cache->put(prefix + file, prefix + file); - ASSERT_EQ(status.code(), rocksdb::Status::Code::kOk); - } - - for (const auto & file : files) - { - String value; - auto status = cache->get(prefix + file, value); - ASSERT_EQ(status.code(), rocksdb::Status::Code::kOk); - ASSERT_EQ(value, prefix + file); - } - - { - Strings keys; - Strings values; - cache->getByPrefix(prefix, keys, values); - ASSERT_EQ(keys.size(), files.size()); - ASSERT_EQ(values.size(), files.size()); - for (size_t i = 0; i < files.size(); ++i) - { - ASSERT_EQ(values[i], keys[i]); - } - } - - for (const auto & file : files) - { - auto status = cache->del(prefix + file); - ASSERT_EQ(status.code(), rocksdb::Status::Code::kOk); - } - - for (const auto & file : files) - { - String value; - auto status = cache->get(prefix + file, value); - ASSERT_EQ(status.code(), rocksdb::Status::Code::kNotFound); - } - - { - Strings keys; - Strings values; - cache->getByPrefix(prefix, keys, values); - ASSERT_EQ(keys.size(), 0); - ASSERT_EQ(values.size(), 0); - } -} - -#endif diff --git a/src/Storages/MutationCommands.h b/src/Storages/MutationCommands.h index 5ef0cfda1be..dff77c629c1 100644 --- a/src/Storages/MutationCommands.h +++ b/src/Storages/MutationCommands.h @@ -21,7 +21,7 @@ class ReadBuffer; /// to values from set of columns which satisfy predicate. struct MutationCommand { - ASTPtr ast; /// The AST of the whole command + ASTPtr ast = {}; /// The AST of the whole command enum Type { @@ -43,27 +43,27 @@ struct MutationCommand Type type = EMPTY; /// WHERE part of mutation - ASTPtr predicate; + ASTPtr predicate = {}; /// Columns with corresponding actions - std::unordered_map column_to_update_expression; + std::unordered_map column_to_update_expression = {}; /// For MATERIALIZE INDEX and PROJECTION - String index_name; - String projection_name; + String index_name = {}; + String projection_name = {}; /// For MATERIALIZE INDEX, UPDATE and DELETE. - ASTPtr partition; + ASTPtr partition = {}; /// For reads, drops and etc. - String column_name; - DataTypePtr data_type; /// Maybe empty if we just want to drop column + String column_name = {}; + DataTypePtr data_type = {}; /// Maybe empty if we just want to drop column /// We need just clear column, not drop from metadata. bool clear = false; /// Column rename_to - String rename_to; + String rename_to = {}; /// If parse_alter_commands, than consider more Alter commands as mutation commands static std::optional parse(ASTAlterCommand * command, bool parse_alter_commands = false); diff --git a/src/Storages/NATS/StorageNATS.h b/src/Storages/NATS/StorageNATS.h index efe54243ee9..cc7b0d88be5 100644 --- a/src/Storages/NATS/StorageNATS.h +++ b/src/Storages/NATS/StorageNATS.h @@ -38,7 +38,7 @@ public: /// actions require an open connection. Therefore there needs to be a way inside shutdown() method to know whether it is called /// because of drop query. And drop() method is not suitable at all, because it will not only require to reopen connection, but also /// it can be called considerable time after table is dropped (for example, in case of Atomic database), which is not appropriate for the case. - void checkTableCanBeDropped() const override { drop_table = true; } + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override { drop_table = true; } /// Always return virtual columns in addition to required columns void read( diff --git a/src/Storages/PartitionCommands.h b/src/Storages/PartitionCommands.h index 4921cf8e53b..b8b2ec47e71 100644 --- a/src/Storages/PartitionCommands.h +++ b/src/Storages/PartitionCommands.h @@ -80,7 +80,7 @@ struct PartitionCommand using PartitionCommands = std::vector; -/// Result of exectuin of a single partition commands. Partition commands quite +/// Result of executing of a single partition commands. Partition commands quite /// different, so some fields will be empty for some commands. Currently used in /// ATTACH and FREEZE commands. struct PartitionCommandResultInfo @@ -92,14 +92,14 @@ struct PartitionCommandResultInfo /// Part name, always filled String part_name; /// Part name in /detached directory, filled in ATTACH - String old_part_name; + String old_part_name = {}; /// Absolute path to backup directory, filled in FREEZE - String backup_path; + String backup_path = {}; /// Absolute path part backup, filled in FREEZE - String part_backup_path; + String part_backup_path = {}; /// Name of the backup (specified by user or increment value), filled in /// FREEZE - String backup_name; + String backup_name = {}; }; using PartitionCommandsResultInfo = std::vector; diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 48825361a16..cddf252a7e1 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -356,9 +356,8 @@ const ProjectionDescription & ProjectionsDescription::get(const String & project auto it = map.find(projection_name); if (it == map.end()) { - String exception_message = fmt::format("There is no projection {} in table", projection_name); - appendHintsMessage(exception_message, projection_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE, "There is no projection {} in table{}", + projection_name, getHintsMessage(projection_name)); } return *(it->second); @@ -401,9 +400,8 @@ void ProjectionsDescription::remove(const String & projection_name, bool if_exis if (if_exists) return; - String exception_message = fmt::format("There is no projection {} in table", projection_name); - appendHintsMessage(exception_message, projection_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE, "There is no projection {} in table{}", + projection_name, getHintsMessage(projection_name)); } projections.erase(it->second); diff --git a/src/Storages/ProjectionsDescription.h b/src/Storages/ProjectionsDescription.h index c48942eb0ec..30af80d6d85 100644 --- a/src/Storages/ProjectionsDescription.h +++ b/src/Storages/ProjectionsDescription.h @@ -106,7 +106,7 @@ struct ProjectionDescription using ProjectionDescriptionRawPtr = const ProjectionDescription *; /// All projections in storage -struct ProjectionsDescription : public IHints<1, ProjectionsDescription> +struct ProjectionsDescription : public IHints<> { ProjectionsDescription() = default; ProjectionsDescription(ProjectionsDescription && other) = default; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index dc410c4f298..2b40c88ba6e 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -41,7 +41,7 @@ public: /// actions require an open connection. Therefore there needs to be a way inside shutdown() method to know whether it is called /// because of drop query. And drop() method is not suitable at all, because it will not only require to reopen connection, but also /// it can be called considerable time after table is dropped (for example, in case of Atomic database), which is not appropriate for the case. - void checkTableCanBeDropped() const override { drop_table = true; } + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override { drop_table = true; } /// Always return virtual columns in addition to required columns void read( diff --git a/src/Storages/ReadFromStorageProgress.cpp b/src/Storages/ReadFromStorageProgress.cpp deleted file mode 100644 index 8ad1cf92209..00000000000 --- a/src/Storages/ReadFromStorageProgress.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include -#include -#include - -namespace DB -{ - -void updateRowsProgressApprox( - ISource & source, - size_t num_rows, - UInt64 chunk_bytes_size, - UInt64 total_result_size, - UInt64 & total_rows_approx_accumulated, - size_t & total_rows_count_times, - UInt64 & total_rows_approx_max) -{ - if (!total_result_size) - return; - - if (!num_rows) - return; - - const auto progress = source.getReadProgress(); - if (progress && !progress->limits.empty()) - { - for (const auto & limit : progress->limits) - { - if (limit.leaf_limits.max_rows || limit.leaf_limits.max_bytes - || limit.local_limits.size_limits.max_rows || limit.local_limits.size_limits.max_bytes) - return; - } - } - - const auto bytes_per_row = std::ceil(static_cast(chunk_bytes_size) / num_rows); - size_t total_rows_approx = static_cast(std::ceil(static_cast(total_result_size) / bytes_per_row)); - total_rows_approx_accumulated += total_rows_approx; - ++total_rows_count_times; - total_rows_approx = total_rows_approx_accumulated / total_rows_count_times; - - /// We need to add diff, because total_rows_approx is incremental value. - /// It would be more correct to send total_rows_approx as is (not a diff), - /// but incrementation of total_rows_to_read does not allow that. - /// A new counter can be introduced for that to be sent to client, but it does not worth it. - if (total_rows_approx > total_rows_approx_max) - { - size_t diff = total_rows_approx - total_rows_approx_max; - source.addTotalRowsApprox(diff); - total_rows_approx_max = total_rows_approx; - } -} - -} diff --git a/src/Storages/ReadFromStorageProgress.h b/src/Storages/ReadFromStorageProgress.h deleted file mode 100644 index 2be37d26fee..00000000000 --- a/src/Storages/ReadFromStorageProgress.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -class ISource; - -void updateRowsProgressApprox( - ISource & source, - size_t num_rows, - UInt64 chunk_bytes_size, - UInt64 total_result_size, - UInt64 & total_rows_approx_accumulated, - size_t & total_rows_count_times, - UInt64 & total_rows_approx_max); - -} diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 27e8de78b0f..be322a402ee 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -304,6 +304,12 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt } } +void StorageEmbeddedRocksDB::drop() +{ + rocksdb_ptr->Close(); + rocksdb_ptr = nullptr; +} + void StorageEmbeddedRocksDB::initDB() { rocksdb::Status status; diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index 97fd07626a8..d0cf05f261c 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -53,6 +53,7 @@ public: void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const override; void mutate(const MutationCommands &, ContextPtr) override; + void drop() override; bool supportsParallelInsert() const override { return true; } bool supportsIndexForIn() const override { return true; } diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp new file mode 100644 index 00000000000..4624566a517 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp @@ -0,0 +1,353 @@ +#include "IO/VarInt.h" +#include "config.h" + +#if USE_AWS_S3 +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TIMEOUT_EXCEEDED; +} + +namespace +{ + UInt64 getCurrentTime() + { + return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + } +} + +void S3QueueFilesMetadata::S3QueueCollection::read(ReadBuffer & in) +{ + files = {}; + if (in.eof()) + return; + + size_t files_num; + in >> files_num >> "\n"; + while (files_num--) + { + TrackedCollectionItem item; + in >> item.file_path >> "\n"; + in >> item.timestamp >> "\n"; + in >> item.retries_count >> "\n"; + in >> item.last_exception >> "\n"; + files.push_back(item); + } +} + +void S3QueueFilesMetadata::S3QueueCollection::write(WriteBuffer & out) const +{ + out << files.size() << "\n"; + for (const auto & processed_file : files) + { + out << processed_file.file_path << "\n"; + out << processed_file.timestamp << "\n"; + out << processed_file.retries_count << "\n"; + out << processed_file.last_exception << "\n"; + } +} + +String S3QueueFilesMetadata::S3QueueCollection::toString() const +{ + WriteBufferFromOwnString out; + write(out); + return out.str(); +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::S3QueueCollection::getFileNames() +{ + S3FilesCollection keys = {}; + for (const auto & pair : files) + keys.insert(pair.file_path); + return keys; +} + + +S3QueueFilesMetadata::S3QueueProcessedCollection::S3QueueProcessedCollection(const UInt64 & max_size_, const UInt64 & max_age_) + : max_size(max_size_), max_age(max_age_) +{ +} + +void S3QueueFilesMetadata::S3QueueProcessedCollection::parse(const String & collection_str) +{ + ReadBufferFromString buf(collection_str); + read(buf); + if (max_age > 0) // Remove old items + { + std::erase_if( + files, + [timestamp = getCurrentTime(), this](const TrackedCollectionItem & processed_file) + { return (timestamp - processed_file.timestamp) > max_age; }); + } +} + + +void S3QueueFilesMetadata::S3QueueProcessedCollection::add(const String & file_name) +{ + TrackedCollectionItem processed_file; + processed_file.file_path = file_name; + processed_file.timestamp = getCurrentTime(); + files.push_back(processed_file); + + /// TODO: it is strange that in parse() we take into account only max_age, but here only max_size. + while (files.size() > max_size) + { + files.pop_front(); + } +} + + +S3QueueFilesMetadata::S3QueueFailedCollection::S3QueueFailedCollection(const UInt64 & max_retries_count_) + : max_retries_count(max_retries_count_) +{ +} + +void S3QueueFilesMetadata::S3QueueFailedCollection::parse(const String & collection_str) +{ + ReadBufferFromString buf(collection_str); + read(buf); +} + + +bool S3QueueFilesMetadata::S3QueueFailedCollection::add(const String & file_name, const String & exception_message) +{ + auto failed_it = std::find_if( + files.begin(), files.end(), + [&file_name](const TrackedCollectionItem & s) { return s.file_path == file_name; }); + + if (failed_it == files.end()) + { + files.emplace_back(file_name, 0, max_retries_count, exception_message); + } + else if (failed_it->retries_count == 0 || --failed_it->retries_count == 0) + { + return false; + } + return true; +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::S3QueueFailedCollection::getFileNames() +{ + S3FilesCollection failed_keys; + for (const auto & pair : files) + { + if (pair.retries_count == 0) + failed_keys.insert(pair.file_path); + } + return failed_keys; +} + +void S3QueueFilesMetadata::S3QueueProcessingCollection::parse(const String & collection_str) +{ + ReadBufferFromString rb(collection_str); + Strings result; + readQuoted(result, rb); + files = S3FilesCollection(result.begin(), result.end()); +} + +void S3QueueFilesMetadata::S3QueueProcessingCollection::add(const Strings & file_names) +{ + files.insert(file_names.begin(), file_names.end()); +} + +void S3QueueFilesMetadata::S3QueueProcessingCollection::remove(const String & file_name) +{ + files.erase(file_name); +} + +String S3QueueFilesMetadata::S3QueueProcessingCollection::toString() const +{ + return DB::toString(Strings(files.begin(), files.end())); +} + + +S3QueueFilesMetadata::S3QueueFilesMetadata( + const StorageS3Queue * storage_, + const S3QueueSettings & settings_) + : storage(storage_) + , mode(settings_.mode) + , max_set_size(settings_.s3queue_tracked_files_limit.value) + , max_set_age_sec(settings_.s3queue_tracked_file_ttl_sec.value) + , max_loading_retries(settings_.s3queue_loading_retries.value) + , zookeeper_processing_path(fs::path(storage->getZooKeeperPath()) / "processing") + , zookeeper_processed_path(fs::path(storage->getZooKeeperPath()) / "processed") + , zookeeper_failed_path(fs::path(storage->getZooKeeperPath()) / "failed") + , zookeeper_lock_path(fs::path(storage->getZooKeeperPath()) / "lock") + , log(&Poco::Logger::get("S3QueueFilesMetadata")) +{ +} + +void S3QueueFilesMetadata::setFileProcessed(const String & file_path) +{ + auto zookeeper = storage->getZooKeeper(); + auto lock = acquireLock(zookeeper); + + switch (mode) + { + case S3QueueMode::UNORDERED: + { + S3QueueProcessedCollection processed_files(max_set_size, max_set_age_sec); + processed_files.parse(zookeeper->get(zookeeper_processed_path)); + processed_files.add(file_path); + zookeeper->set(zookeeper_processed_path, processed_files.toString()); + break; + } + case S3QueueMode::ORDERED: + { + // Check that we set in ZooKeeper node only maximum processed file path. + // This check can be useful, when multiple table engines consume in ordered mode. + String max_file = getMaxProcessedFile(); + if (max_file.compare(file_path) <= 0) + zookeeper->set(zookeeper_processed_path, file_path); + break; + } + } + removeProcessingFile(file_path); +} + + +bool S3QueueFilesMetadata::setFileFailed(const String & file_path, const String & exception_message) +{ + auto zookeeper = storage->getZooKeeper(); + auto lock = acquireLock(zookeeper); + + S3QueueFailedCollection failed_collection(max_loading_retries); + failed_collection.parse(zookeeper->get(zookeeper_failed_path)); + const bool can_be_retried = failed_collection.add(file_path, exception_message); + zookeeper->set(zookeeper_failed_path, failed_collection.toString()); + removeProcessingFile(file_path); + return can_be_retried; +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::getFailedFiles() +{ + auto zookeeper = storage->getZooKeeper(); + String failed_files = zookeeper->get(zookeeper_failed_path); + + S3QueueFailedCollection failed_collection(max_loading_retries); + failed_collection.parse(failed_files); + return failed_collection.getFileNames(); +} + +String S3QueueFilesMetadata::getMaxProcessedFile() +{ + auto zookeeper = storage->getZooKeeper(); + return zookeeper->get(zookeeper_processed_path); +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::getProcessingFiles() +{ + auto zookeeper = storage->getZooKeeper(); + String processing_files; + if (!zookeeper->tryGet(zookeeper_processing_path, processing_files)) + return {}; + + S3QueueProcessingCollection processing_collection; + if (!processing_files.empty()) + processing_collection.parse(processing_files); + return processing_collection.getFileNames(); +} + +void S3QueueFilesMetadata::setFilesProcessing(const Strings & file_paths) +{ + auto zookeeper = storage->getZooKeeper(); + String processing_files; + zookeeper->tryGet(zookeeper_processing_path, processing_files); + + S3QueueProcessingCollection processing_collection; + if (!processing_files.empty()) + processing_collection.parse(processing_files); + processing_collection.add(file_paths); + + if (zookeeper->exists(zookeeper_processing_path)) + zookeeper->set(zookeeper_processing_path, processing_collection.toString()); + else + zookeeper->create(zookeeper_processing_path, processing_collection.toString(), zkutil::CreateMode::Ephemeral); +} + +void S3QueueFilesMetadata::removeProcessingFile(const String & file_path) +{ + auto zookeeper = storage->getZooKeeper(); + String processing_files; + zookeeper->tryGet(zookeeper_processing_path, processing_files); + + S3QueueProcessingCollection processing_collection; + processing_collection.parse(processing_files); + processing_collection.remove(file_path); + zookeeper->set(zookeeper_processing_path, processing_collection.toString()); +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::getUnorderedProcessedFiles() +{ + auto zookeeper = storage->getZooKeeper(); + S3QueueProcessedCollection processed_collection(max_set_size, max_set_age_sec); + processed_collection.parse(zookeeper->get(zookeeper_processed_path)); + return processed_collection.getFileNames(); +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::getProcessedFailedAndProcessingFiles() +{ + S3FilesCollection processed_and_failed_files = getFailedFiles(); + switch (mode) + { + case S3QueueMode::UNORDERED: + { + processed_and_failed_files.merge(getUnorderedProcessedFiles()); + break; + } + case S3QueueMode::ORDERED: + { + processed_and_failed_files.insert(getMaxProcessedFile()); + break; + } + } + processed_and_failed_files.merge(getProcessingFiles()); + return processed_and_failed_files; +} + +std::shared_ptr S3QueueFilesMetadata::acquireLock(zkutil::ZooKeeperPtr zookeeper) +{ + UInt32 retry_count = 200; + UInt32 sleep_ms = 100; + UInt32 retries = 0; + + while (true) + { + Coordination::Error code = zookeeper->tryCreate(zookeeper_lock_path, "", zkutil::CreateMode::Ephemeral); + if (code == Coordination::Error::ZNONODE || code == Coordination::Error::ZNODEEXISTS) + { + retries++; + if (retries > retry_count) + { + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Can't acquire zookeeper lock"); + } + sleepForMilliseconds(sleep_ms); + } + else if (code != Coordination::Error::ZOK) + { + throw Coordination::Exception::fromPath(code, zookeeper_lock_path); + } + else + { + return zkutil::EphemeralNodeHolder::existing(zookeeper_lock_path, *zookeeper); + } + } +} + +} + +#endif diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.h b/src/Storages/S3Queue/S3QueueFilesMetadata.h new file mode 100644 index 00000000000..577c71b2227 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueFilesMetadata.h @@ -0,0 +1,127 @@ +#pragma once + +#if USE_AWS_S3 + +# include +# include +# include +# include + +namespace DB +{ +class StorageS3Queue; +struct S3QueueSettings; + +class S3QueueFilesMetadata +{ +public: + struct TrackedCollectionItem + { + TrackedCollectionItem() = default; + TrackedCollectionItem(const String & file_path_, UInt64 timestamp_, UInt64 retries_count_, const String & last_exception_) + : file_path(file_path_), timestamp(timestamp_), retries_count(retries_count_), last_exception(last_exception_) {} + String file_path; + UInt64 timestamp = 0; + UInt64 retries_count = 0; + String last_exception; + }; + + using S3FilesCollection = std::unordered_set; + using TrackedFiles = std::deque; + + S3QueueFilesMetadata(const StorageS3Queue * storage_, const S3QueueSettings & settings_); + + void setFilesProcessing(const Strings & file_paths); + void setFileProcessed(const String & file_path); + bool setFileFailed(const String & file_path, const String & exception_message); + + S3FilesCollection getProcessedFailedAndProcessingFiles(); + String getMaxProcessedFile(); + std::shared_ptr acquireLock(zkutil::ZooKeeperPtr zookeeper); + + struct S3QueueCollection + { + public: + virtual ~S3QueueCollection() = default; + virtual String toString() const; + S3FilesCollection getFileNames(); + + virtual void parse(const String & collection_str) = 0; + + protected: + TrackedFiles files; + + void read(ReadBuffer & in); + void write(WriteBuffer & out) const; + }; + + struct S3QueueProcessedCollection : public S3QueueCollection + { + public: + S3QueueProcessedCollection(const UInt64 & max_size_, const UInt64 & max_age_); + + void parse(const String & collection_str) override; + void add(const String & file_name); + + private: + const UInt64 max_size; + const UInt64 max_age; + }; + + struct S3QueueFailedCollection : S3QueueCollection + { + public: + S3QueueFailedCollection(const UInt64 & max_retries_count_); + + void parse(const String & collection_str) override; + bool add(const String & file_name, const String & exception_message); + + S3FilesCollection getFileNames(); + + private: + UInt64 max_retries_count; + }; + + struct S3QueueProcessingCollection + { + public: + S3QueueProcessingCollection() = default; + + void parse(const String & collection_str); + void add(const Strings & file_names); + void remove(const String & file_name); + + String toString() const; + const S3FilesCollection & getFileNames() const { return files; } + + private: + S3FilesCollection files; + }; + +private: + const StorageS3Queue * storage; + const S3QueueMode mode; + const UInt64 max_set_size; + const UInt64 max_set_age_sec; + const UInt64 max_loading_retries; + + const String zookeeper_processing_path; + const String zookeeper_processed_path; + const String zookeeper_failed_path; + const String zookeeper_lock_path; + + mutable std::mutex mutex; + Poco::Logger * log; + + S3FilesCollection getFailedFiles(); + S3FilesCollection getProcessingFiles(); + S3FilesCollection getUnorderedProcessedFiles(); + + void removeProcessingFile(const String & file_path); +}; + + +} + + +#endif diff --git a/src/Storages/S3Queue/S3QueueSettings.cpp b/src/Storages/S3Queue/S3QueueSettings.cpp new file mode 100644 index 00000000000..b74cf8d39bb --- /dev/null +++ b/src/Storages/S3Queue/S3QueueSettings.cpp @@ -0,0 +1,41 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_TRAITS(S3QueueSettingsTraits, LIST_OF_S3QUEUE_SETTINGS) + +void S3QueueSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + try + { + applyChanges(storage_def.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + e.addMessage("for storage " + storage_def.engine->name); + throw; + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage_def.set(storage_def.settings, settings_ast); + } +} + +} diff --git a/src/Storages/S3Queue/S3QueueSettings.h b/src/Storages/S3Queue/S3QueueSettings.h new file mode 100644 index 00000000000..75defc4a57f --- /dev/null +++ b/src/Storages/S3Queue/S3QueueSettings.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +class ASTStorage; + + +#define S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ + M(S3QueueMode, \ + mode, \ + S3QueueMode::ORDERED, \ + "With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKepeer." \ + "With ordered mode, only the max name of the successfully consumed file stored.", \ + 0) \ + M(S3QueueAction, after_processing, S3QueueAction::KEEP, "Delete or keep file in S3 after successful processing", 0) \ + M(String, keeper_path, "", "Zookeeper node path", 0) \ + M(UInt64, s3queue_loading_retries, 0, "Retry loading up to specified number of times", 0) \ + M(UInt64, s3queue_polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \ + M(UInt64, s3queue_polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \ + M(UInt64, s3queue_polling_backoff_ms, 0, "Polling backoff", 0) \ + M(UInt64, s3queue_tracked_files_limit, 1000, "Max set size for tracking processed files in unordered mode in ZooKeeper", 0) \ + M(UInt64, \ + s3queue_tracked_file_ttl_sec, \ + 0, \ + "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", \ + 0) \ + M(UInt64, s3queue_polling_size, 50, "Maximum files to fetch from S3 with SELECT", 0) + +#define LIST_OF_S3QUEUE_SETTINGS(M, ALIAS) \ + S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ + FORMAT_FACTORY_SETTINGS(M, ALIAS) + +DECLARE_SETTINGS_TRAITS(S3QueueSettingsTraits, LIST_OF_S3QUEUE_SETTINGS) + + +struct S3QueueSettings : public BaseSettings +{ + void loadFromQuery(ASTStorage & storage_def); +}; + +} diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp new file mode 100644 index 00000000000..54a863aeb2c --- /dev/null +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -0,0 +1,313 @@ +#include +#include +#include +#include "IO/ParallelReadBuffer.h" +#include "Parsers/ASTCreateQuery.h" +#include "config.h" + +#if USE_AWS_S3 + +# include + +# include + +# include +# include + +# include + +# include +# include + +# include +# include +# include +# include +# include +# include + +# include + +# include +# include +# include + +# include + +# include + +# include +# include +# include + +# include +# include + + +namespace CurrentMetrics +{ +extern const Metric StorageS3Threads; +extern const Metric StorageS3ThreadsActive; +} + +namespace ProfileEvents +{ +extern const Event S3DeleteObjects; +extern const Event S3ListObjects; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int S3_ERROR; +} + + +StorageS3QueueSource::QueueGlobIterator::QueueGlobIterator( + const S3::Client & client_, + const S3::URI & globbed_uri_, + ASTPtr query, + const NamesAndTypesList & virtual_columns, + ContextPtr context, + UInt64 & max_poll_size_, + const S3Settings::RequestSettings & request_settings_) + : max_poll_size(max_poll_size_) + , glob_iterator(std::make_unique( + client_, globbed_uri_, query, virtual_columns, context, nullptr, request_settings_)) +{ + /// todo(kssenii): remove this loop, it should not be here + while (true) + { + KeyWithInfo val = glob_iterator->next(); + if (val.key.empty()) + break; + keys_buf.push_back(val); + } +} + +Strings StorageS3QueueSource::QueueGlobIterator::filterProcessingFiles( + const S3QueueMode & engine_mode, std::unordered_set & exclude_keys, const String & max_file) +{ + for (const KeyWithInfo & val : keys_buf) + { + auto full_path = val.key; + if (exclude_keys.find(full_path) != exclude_keys.end()) + { + LOG_TEST(log, "File {} will be skipped, because it was found in exclude files list " + "(either already processed or failed to be processed)", val.key); + continue; + } + + if ((engine_mode == S3QueueMode::ORDERED) && (full_path.compare(max_file) <= 0)) + continue; + + if ((processing_keys.size() < max_poll_size) || (engine_mode == S3QueueMode::ORDERED)) + { + processing_keys.push_back(val); + } + else + { + break; + } + } + + if (engine_mode == S3QueueMode::ORDERED) + { + std::sort( + processing_keys.begin(), + processing_keys.end(), + [](const KeyWithInfo & lhs, const KeyWithInfo & rhs) { return lhs.key.compare(rhs.key) < 0; }); + + if (processing_keys.size() > max_poll_size) + { + processing_keys.erase(processing_keys.begin() + max_poll_size, processing_keys.end()); + } + } + + Strings keys; + for (const auto & key_info : processing_keys) + keys.push_back(key_info.key); + + processing_keys.push_back(KeyWithInfo()); + processing_iterator = processing_keys.begin(); + return keys; +} + + +StorageS3QueueSource::KeyWithInfo StorageS3QueueSource::QueueGlobIterator::next() +{ + std::lock_guard lock(mutex); + if (processing_iterator != processing_keys.end()) + { + return *processing_iterator++; + } + + return KeyWithInfo(); +} + +StorageS3QueueSource::StorageS3QueueSource( + const ReadFromFormatInfo & info, + const String & format_, + String name_, + ContextPtr context_, + std::optional format_settings_, + UInt64 max_block_size_, + const S3Settings::RequestSettings & request_settings_, + String compression_hint_, + const std::shared_ptr & client_, + const String & bucket_, + const String & version_id_, + const String & url_host_and_port, + std::shared_ptr file_iterator_, + std::shared_ptr files_metadata_, + const S3QueueAction & action_, + const size_t download_thread_num_) + : ISource(info.source_header) + , WithContext(context_) + , name(std::move(name_)) + , bucket(bucket_) + , version_id(version_id_) + , format(format_) + , columns_desc(info.columns_description) + , request_settings(request_settings_) + , client(client_) + , files_metadata(files_metadata_) + , requested_virtual_columns(info.requested_virtual_columns) + , requested_columns(info.requested_columns) + , file_iterator(file_iterator_) + , action(action_) +{ + internal_source = std::make_shared( + info, + format_, + name_, + context_, + format_settings_, + max_block_size_, + request_settings_, + compression_hint_, + client_, + bucket_, + version_id_, + url_host_and_port, + file_iterator, + download_thread_num_, + false, + /* query_info */ std::nullopt); + reader = std::move(internal_source->reader); + if (reader) + reader_future = std::move(internal_source->reader_future); +} + +StorageS3QueueSource::~StorageS3QueueSource() +{ + internal_source->create_reader_pool.wait(); +} + +String StorageS3QueueSource::getName() const +{ + return name; +} + +Chunk StorageS3QueueSource::generate() +{ + auto file_progress = getContext()->getFileProgressCallback(); + while (true) + { + if (isCancelled() || !reader) + { + if (reader) + reader->cancel(); + break; + } + + Chunk chunk; + bool success_in_pulling = false; + try + { + if (reader->pull(chunk)) + { + UInt64 num_rows = chunk.getNumRows(); + auto file_path = reader.getPath(); + + for (const auto & virtual_column : requested_virtual_columns) + { + if (virtual_column.name == "_path") + { + chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_path)->convertToFullColumnIfConst()); + } + else if (virtual_column.name == "_file") + { + size_t last_slash_pos = file_path.find_last_of('/'); + auto column = virtual_column.type->createColumnConst(num_rows, file_path.substr(last_slash_pos + 1)); + chunk.addColumn(column->convertToFullColumnIfConst()); + } + } + success_in_pulling = true; + } + } + catch (const Exception & e) + { + LOG_ERROR(log, "Exception in chunk pulling: {} ", e.displayText()); + files_metadata->setFileFailed(reader.getFile(), e.message()); + success_in_pulling = false; + } + if (success_in_pulling) + { + applyActionAfterProcessing(reader.getFile()); + files_metadata->setFileProcessed(reader.getFile()); + return chunk; + } + + + assert(reader_future.valid()); + reader = reader_future.get(); + + if (!reader) + break; + + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + internal_source->create_reader_pool.wait(); + reader_future = internal_source->createReaderAsync(); + } + + return {}; +} + + +void StorageS3QueueSource::applyActionAfterProcessing(const String & file_path) +{ + switch (action) + { + case S3QueueAction::DELETE: + deleteProcessedObject(file_path); + break; + case S3QueueAction::KEEP: + break; + } +} + +void StorageS3QueueSource::deleteProcessedObject(const String & file_path) +{ + LOG_INFO(log, "Delete processed file {} from bucket {}", file_path, bucket); + + S3::DeleteObjectRequest request; + request.WithKey(file_path).WithBucket(bucket); + auto outcome = client->DeleteObject(request); + if (!outcome.IsSuccess()) + { + const auto & err = outcome.GetError(); + LOG_ERROR(log, "{} (Code: {})", err.GetMessage(), static_cast(err.GetErrorType())); + } + else + { + LOG_TRACE(log, "Object with path {} was removed from S3", file_path); + } +} + +} + +#endif diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h new file mode 100644 index 00000000000..f89384fb096 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -0,0 +1,125 @@ +#pragma once +#include "config.h" + +#if USE_AWS_S3 + +# include + +# include + +# include +# include +# include +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + + +namespace DB +{ + + +class StorageS3QueueSource : public ISource, WithContext +{ +public: + using IIterator = StorageS3Source::IIterator; + using DisclosedGlobIterator = StorageS3Source::DisclosedGlobIterator; + using KeysWithInfo = StorageS3Source::KeysWithInfo; + using KeyWithInfo = StorageS3Source::KeyWithInfo; + class QueueGlobIterator : public IIterator + { + public: + QueueGlobIterator( + const S3::Client & client_, + const S3::URI & globbed_uri_, + ASTPtr query, + const NamesAndTypesList & virtual_columns, + ContextPtr context, + UInt64 & max_poll_size_, + const S3Settings::RequestSettings & request_settings_ = {}); + + KeyWithInfo next() override; + + Strings + filterProcessingFiles(const S3QueueMode & engine_mode, std::unordered_set & exclude_keys, const String & max_file = ""); + + private: + UInt64 max_poll_size; + KeysWithInfo keys_buf; + KeysWithInfo processing_keys; + mutable std::mutex mutex; + std::unique_ptr glob_iterator; + std::vector::iterator processing_iterator; + + Poco::Logger * log = &Poco::Logger::get("StorageS3QueueSourceIterator"); + }; + + static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); + + StorageS3QueueSource( + const ReadFromFormatInfo & info, + const String & format, + String name_, + ContextPtr context_, + std::optional format_settings_, + UInt64 max_block_size_, + const S3Settings::RequestSettings & request_settings_, + String compression_hint_, + const std::shared_ptr & client_, + const String & bucket, + const String & version_id, + const String & url_host_and_port, + std::shared_ptr file_iterator_, + std::shared_ptr files_metadata_, + const S3QueueAction & action_, + size_t download_thread_num); + + ~StorageS3QueueSource() override; + + String getName() const override; + + Chunk generate() override; + + +private: + String name; + String bucket; + String version_id; + String format; + ColumnsDescription columns_desc; + S3Settings::RequestSettings request_settings; + std::shared_ptr client; + + std::shared_ptr files_metadata; + using ReaderHolder = StorageS3Source::ReaderHolder; + ReaderHolder reader; + + NamesAndTypesList requested_virtual_columns; + NamesAndTypesList requested_columns; + std::shared_ptr file_iterator; + const S3QueueAction action; + + Poco::Logger * log = &Poco::Logger::get("StorageS3QueueSource"); + + std::future reader_future; + + mutable std::mutex mutex; + + std::shared_ptr internal_source; + void deleteProcessedObject(const String & file_path); + void applyActionAfterProcessing(const String & file_path); +}; + +} +#endif diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/S3Queue/S3QueueTableMetadata.cpp new file mode 100644 index 00000000000..23eebb6ded9 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueTableMetadata.cpp @@ -0,0 +1,115 @@ +#include + +#if USE_AWS_S3 + +# include +# include +# include +# include +# include +# include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int METADATA_MISMATCH; +} + +S3QueueTableMetadata::S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings) +{ + format_name = configuration.format; + after_processing = engine_settings.after_processing.toString(); + mode = engine_settings.mode.toString(); + s3queue_tracked_files_limit = engine_settings.s3queue_tracked_files_limit; + s3queue_tracked_file_ttl_sec = engine_settings.s3queue_tracked_file_ttl_sec; +} + + +String S3QueueTableMetadata::toString() const +{ + Poco::JSON::Object json; + json.set("after_processing", after_processing); + json.set("mode", mode); + json.set("s3queue_tracked_files_limit", s3queue_tracked_files_limit); + json.set("s3queue_tracked_file_ttl_sec", s3queue_tracked_file_ttl_sec); + json.set("format_name", format_name); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + oss.exceptions(std::ios::failbit); + Poco::JSON::Stringifier::stringify(json, oss); + return oss.str(); +} + +void S3QueueTableMetadata::read(const String & metadata_str) +{ + Poco::JSON::Parser parser; + auto json = parser.parse(metadata_str).extract(); + after_processing = json->getValue("after_processing"); + mode = json->getValue("mode"); + s3queue_tracked_files_limit = json->getValue("s3queue_tracked_files_limit"); + s3queue_tracked_file_ttl_sec = json->getValue("s3queue_tracked_file_ttl_sec"); + format_name = json->getValue("format_name"); +} + +S3QueueTableMetadata S3QueueTableMetadata::parse(const String & metadata_str) +{ + S3QueueTableMetadata metadata; + metadata.read(metadata_str); + return metadata; +} + + +void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const +{ + if (after_processing != from_zk.after_processing) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs " + "in action after processing. Stored in ZooKeeper: {}, local: {}", + DB::toString(from_zk.after_processing), + DB::toString(after_processing)); + + if (mode != from_zk.mode) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in engine mode. " + "Stored in ZooKeeper: {}, local: {}", + DB::toString(from_zk.after_processing), + DB::toString(after_processing)); + + if (s3queue_tracked_files_limit != from_zk.s3queue_tracked_files_limit) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in max set size. " + "Stored in ZooKeeper: {}, local: {}", + from_zk.s3queue_tracked_files_limit, + s3queue_tracked_files_limit); + + if (s3queue_tracked_file_ttl_sec != from_zk.s3queue_tracked_file_ttl_sec) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in max set age. " + "Stored in ZooKeeper: {}, local: {}", + from_zk.s3queue_tracked_file_ttl_sec, + s3queue_tracked_file_ttl_sec); + + if (format_name != from_zk.format_name) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in format name. " + "Stored in ZooKeeper: {}, local: {}", + from_zk.format_name, + format_name); +} + +void S3QueueTableMetadata::checkEquals(const S3QueueTableMetadata & from_zk) const +{ + checkImmutableFieldsEquals(from_zk); +} + +} + +#endif diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h new file mode 100644 index 00000000000..4b6fbc54825 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueTableMetadata.h @@ -0,0 +1,43 @@ +#pragma once + +#if USE_AWS_S3 + +# include +# include +# include + +namespace DB +{ + +class WriteBuffer; +class ReadBuffer; + +/** The basic parameters of S3Queue table engine for saving in ZooKeeper. + * Lets you verify that they match local ones. + */ +struct S3QueueTableMetadata +{ + String format_name; + String after_processing; + String mode; + UInt64 s3queue_tracked_files_limit; + UInt64 s3queue_tracked_file_ttl_sec; + + S3QueueTableMetadata() = default; + S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings); + + void read(const String & metadata_str); + static S3QueueTableMetadata parse(const String & metadata_str); + + String toString() const; + + void checkEquals(const S3QueueTableMetadata & from_zk) const; + +private: + void checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const; +}; + + +} + +#endif diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp new file mode 100644 index 00000000000..ee840ca2ba1 --- /dev/null +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -0,0 +1,616 @@ +#include "config.h" + + +#if USE_AWS_S3 + +# include +# include +# include +# include +# include +# include +# include +# include +# include "IO/ParallelReadBuffer.h" + +# include + +# include + +# include + +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + + +# include + +# include +# include +# include + +# include + +# include + +# include + +# include +# include +# include +# include + +namespace fs = std::filesystem; + +namespace ProfileEvents +{ +extern const Event S3DeleteObjects; +extern const Event S3ListObjects; +} + +namespace DB +{ + +static const String PARTITION_ID_WILDCARD = "{_partition_id}"; +static const auto MAX_THREAD_WORK_DURATION_MS = 60000; + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int S3_ERROR; + extern const int NOT_IMPLEMENTED; + extern const int QUERY_NOT_ALLOWED; + extern const int REPLICA_ALREADY_EXISTS; + extern const int INCOMPATIBLE_COLUMNS; +} + + +StorageS3Queue::StorageS3Queue( + std::unique_ptr s3queue_settings_, + const StorageS3::Configuration & configuration_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + ContextPtr context_, + std::optional format_settings_, + ASTPtr partition_by_) + : IStorage(table_id_) + , WithContext(context_) + , s3queue_settings(std::move(s3queue_settings_)) + , after_processing(s3queue_settings->after_processing) + , configuration{configuration_} + , reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms) + , format_settings(format_settings_) + , partition_by(partition_by_) + , log(&Poco::Logger::get("StorageS3Queue (" + table_id_.table_name + ")")) +{ + if (configuration.url.key.ends_with('/')) + configuration.url.key += '*'; + + if (!withGlobs()) + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue url must either end with '/' or contain globs"); + + std::string zk_path_prefix = getContext()->getSettingsRef().s3queue_default_zookeeper_path.value; + if (zk_path_prefix.empty()) + zk_path_prefix = "/"; + + std::string result_zk_path; + if (s3queue_settings->keeper_path.changed) + { + /// We do not add table uuid here on purpose. + result_zk_path = fs::path(zk_path_prefix) / s3queue_settings->keeper_path.value; + } + else + { + auto database_uuid = DatabaseCatalog::instance().getDatabase(table_id_.database_name)->getUUID(); + result_zk_path = fs::path(zk_path_prefix) / toString(database_uuid) / toString(table_id_.uuid); + } + + zk_path = zkutil::extractZooKeeperPath(result_zk_path, true/* check_starts_with_slash */, log); + LOG_INFO(log, "Using zookeeper path: {}", zk_path); + + FormatFactory::instance().checkFormatName(configuration.format); + context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri); + StorageInMemoryMetadata storage_metadata; + configuration.update(context_); + + if (columns_.empty()) + { + auto columns = StorageS3::getTableStructureFromDataImpl(configuration, format_settings, context_); + storage_metadata.setColumns(columns); + } + else + storage_metadata.setColumns(columns_); + + storage_metadata.setConstraints(constraints_); + storage_metadata.setComment(comment); + setInMemoryMetadata(storage_metadata); + + auto metadata_snapshot = getInMemoryMetadataPtr(); + const bool is_first_replica = createTableIfNotExists(metadata_snapshot); + + if (!is_first_replica) + { + checkTableStructure(zk_path, metadata_snapshot); + } + + files_metadata = std::make_shared(this, *s3queue_settings); + virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + + auto poll_thread = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); }); + task = std::make_shared(std::move(poll_thread)); +} + + +bool StorageS3Queue::supportsSubcolumns() const +{ + return true; +} + +bool StorageS3Queue::supportsSubsetOfColumns(const ContextPtr & context_) const +{ + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context_, format_settings); +} + +Pipe StorageS3Queue::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t /* num_streams */) +{ + if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) + throw Exception( + ErrorCodes::QUERY_NOT_ALLOWED, "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); + + if (mv_attached) + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageS3Queue with attached materialized views"); + + auto query_configuration = updateConfigurationAndGetCopy(local_context); + + std::shared_ptr iterator_wrapper = createFileIterator(local_context, query_info.query); + + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + + const size_t max_download_threads = local_context->getSettingsRef().max_download_threads; + + return Pipe(std::make_shared( + read_from_format_info, + configuration.format, + getName(), + local_context, + format_settings, + max_block_size, + query_configuration.request_settings, + configuration.compression_method, + query_configuration.client, + query_configuration.url.bucket, + query_configuration.url.version_id, + query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()), + iterator_wrapper, + files_metadata, + after_processing, + max_download_threads)); +} + +SinkToStoragePtr StorageS3Queue::write(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, bool) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Write is not supported by storage {}", getName()); +} + +void StorageS3Queue::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate is not supported by storage {}", getName()); +} + +NamesAndTypesList StorageS3Queue::getVirtuals() const +{ + return virtual_columns; +} + +bool StorageS3Queue::supportsPartitionBy() const +{ + return true; +} + +void StorageS3Queue::startup() +{ + if (task) + task->holder->activateAndSchedule(); +} + +void StorageS3Queue::shutdown() +{ + shutdown_called = true; + if (task) + { + task->stream_cancelled = true; + task->holder->deactivate(); + } +} + +size_t StorageS3Queue::getTableDependentCount() const +{ + auto table_id = getStorageID(); + // Check if at least one direct dependency is attached + return DatabaseCatalog::instance().getDependentViews(table_id).size(); +} + +bool StorageS3Queue::hasDependencies(const StorageID & table_id) +{ + // Check if all dependencies are attached + auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); + LOG_TEST(log, "Number of attached views {} for {}", view_ids.size(), table_id.getNameForLogs()); + + if (view_ids.empty()) + return false; + + // Check the dependencies are ready? + for (const auto & view_id : view_ids) + { + auto view = DatabaseCatalog::instance().tryGetTable(view_id, getContext()); + if (!view) + return false; + + // If it materialized view, check it's target table + auto * materialized_view = dynamic_cast(view.get()); + if (materialized_view && !materialized_view->tryGetTargetTable()) + return false; + } + + return true; +} + +void StorageS3Queue::threadFunc() +{ + bool reschedule = true; + try + { + auto table_id = getStorageID(); + + auto dependencies_count = getTableDependentCount(); + if (dependencies_count) + { + auto start_time = std::chrono::steady_clock::now(); + + mv_attached.store(true); + // Keep streaming as long as there are attached views and streaming is not cancelled + while (!task->stream_cancelled) + { + if (!hasDependencies(table_id)) + { + /// For this case, we can not wait for watch thread to wake up + reschedule = true; + break; + } + + LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count); + streamToViews(); + + auto ts = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast(ts - start_time); + if (duration.count() > MAX_THREAD_WORK_DURATION_MS) + { + LOG_TRACE(log, "Thread work duration limit exceeded. Reschedule."); + reschedule = true; + break; + } + + reschedule_processing_interval_ms = s3queue_settings->s3queue_polling_min_timeout_ms; + } + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + mv_attached.store(false); + + if (reschedule && !shutdown_called) + { + LOG_TRACE(log, "Reschedule S3 Queue thread func."); + /// Reschedule with backoff. + if (reschedule_processing_interval_ms < s3queue_settings->s3queue_polling_max_timeout_ms) + reschedule_processing_interval_ms += s3queue_settings->s3queue_polling_backoff_ms; + task->holder->scheduleAfter(reschedule_processing_interval_ms); + } +} + + +void StorageS3Queue::streamToViews() +{ + auto table_id = getStorageID(); + auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); + if (!table) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); + + auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); + + // Create an INSERT query for streaming data + auto insert = std::make_shared(); + insert->table_id = table_id; + + size_t block_size = 100; + + auto s3queue_context = Context::createCopy(getContext()); + s3queue_context->makeQueryContext(); + auto query_configuration = updateConfigurationAndGetCopy(s3queue_context); + + // Create a stream for each consumer and join them in a union stream + // Only insert into dependent views and expect that input blocks contain virtual columns + InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true); + auto block_io = interpreter.execute(); + auto column_names = block_io.pipeline.getHeader().getNames(); + + // Create a stream for each consumer and join them in a union stream + + std::shared_ptr iterator_wrapper = createFileIterator(s3queue_context, nullptr); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(getContext()), getVirtuals()); + const size_t max_download_threads = s3queue_context->getSettingsRef().max_download_threads; + + auto pipe = Pipe(std::make_shared( + read_from_format_info, + configuration.format, + getName(), + s3queue_context, + format_settings, + block_size, + query_configuration.request_settings, + configuration.compression_method, + query_configuration.client, + query_configuration.url.bucket, + query_configuration.url.version_id, + query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()), + iterator_wrapper, + files_metadata, + after_processing, + max_download_threads)); + + + std::atomic_size_t rows = 0; + { + block_io.pipeline.complete(std::move(pipe)); + block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); + CompletedPipelineExecutor executor(block_io.pipeline); + executor.execute(); + } +} + +StorageS3Queue::Configuration StorageS3Queue::updateConfigurationAndGetCopy(ContextPtr local_context) +{ + configuration.update(local_context); + return configuration; +} + +zkutil::ZooKeeperPtr StorageS3Queue::getZooKeeper() const +{ + std::lock_guard lock{zk_mutex}; + if (!zk_client || zk_client->expired()) + { + zk_client = getContext()->getZooKeeper(); + zk_client->sync(zk_path); + } + return zk_client; +} + + +bool StorageS3Queue::createTableIfNotExists(const StorageMetadataPtr & metadata_snapshot) +{ + auto zookeeper = getZooKeeper(); + zookeeper->createAncestors(zk_path); + + for (size_t i = 0; i < zk_create_table_retries; ++i) + { + Coordination::Requests ops; + bool is_first_replica = true; + if (zookeeper->exists(zk_path + "/metadata")) + { + if (!zookeeper->exists(zk_path + "/processing")) + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/processing", "", zkutil::CreateMode::Ephemeral)); + LOG_DEBUG(log, "This table {} is already created, will use existing metadata for checking engine settings", zk_path); + is_first_replica = false; + } + else + { + String metadata_str = S3QueueTableMetadata(configuration, *s3queue_settings).toString(); + ops.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/processed", "", zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/failed", "", zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/processing", "", zkutil::CreateMode::Ephemeral)); + ops.emplace_back(zkutil::makeCreateRequest( + zk_path + "/columns", metadata_snapshot->getColumns().toString(), zkutil::CreateMode::Persistent)); + + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/metadata", metadata_str, zkutil::CreateMode::Persistent)); + } + + Coordination::Responses responses; + auto code = zookeeper->tryMulti(ops, responses); + if (code == Coordination::Error::ZNODEEXISTS) + { + LOG_INFO(log, "It looks like the table {} was created by another server at the same moment, will retry", zk_path); + continue; + } + else if (code != Coordination::Error::ZOK) + { + zkutil::KeeperMultiException::check(code, ops, responses); + } + + return is_first_replica; + } + + throw Exception( + ErrorCodes::REPLICA_ALREADY_EXISTS, + "Cannot create table, because it is created concurrently every time or because " + "of wrong zk_path or because of logical error"); +} + + +/** Verify that list of columns and table settings match those specified in ZK (/metadata). + * If not, throw an exception. + */ +void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot) +{ + auto zookeeper = getZooKeeper(); + + S3QueueTableMetadata old_metadata(configuration, *s3queue_settings); + + Coordination::Stat metadata_stat; + String metadata_str = zookeeper->get(fs::path(zookeeper_prefix) / "metadata", &metadata_stat); + auto metadata_from_zk = S3QueueTableMetadata::parse(metadata_str); + old_metadata.checkEquals(metadata_from_zk); + + Coordination::Stat columns_stat; + auto columns_from_zk = ColumnsDescription::parse(zookeeper->get(fs::path(zookeeper_prefix) / "columns", &columns_stat)); + + const ColumnsDescription & old_columns = metadata_snapshot->getColumns(); + if (columns_from_zk != old_columns) + { + throw Exception( + ErrorCodes::INCOMPATIBLE_COLUMNS, + "Table columns structure in ZooKeeper is different from local table structure. Local columns:\n" + "{}\nZookeeper columns:\n{}", + old_columns.toString(), + columns_from_zk.toString()); + } +} + + +std::shared_ptr +StorageS3Queue::createFileIterator(ContextPtr local_context, ASTPtr query) +{ + auto it = std::make_shared( + *configuration.client, + configuration.url, + query, + virtual_columns, + local_context, + s3queue_settings->s3queue_polling_size.value, + configuration.request_settings); + + auto zookeeper = getZooKeeper(); + auto lock = files_metadata->acquireLock(zookeeper); + S3QueueFilesMetadata::S3FilesCollection files_to_skip = files_metadata->getProcessedFailedAndProcessingFiles(); + + Strings files_to_process; + if (s3queue_settings->mode == S3QueueMode::UNORDERED) + { + files_to_process = it->filterProcessingFiles(s3queue_settings->mode, files_to_skip); + } + else + { + String max_processed_file = files_metadata->getMaxProcessedFile(); + files_to_process = it->filterProcessingFiles(s3queue_settings->mode, files_to_skip, max_processed_file); + } + + LOG_TEST(log, "Found files to process: {}", fmt::join(files_to_process, ", ")); + + files_metadata->setFilesProcessing(files_to_process); + return it; +} + +void StorageS3Queue::drop() +{ + auto zookeeper = getZooKeeper(); + if (zookeeper->exists(zk_path)) + zookeeper->removeRecursive(zk_path); +} + +void registerStorageS3QueueImpl(const String & name, StorageFactory & factory) +{ + factory.registerStorage( + name, + [](const StorageFactory::Arguments & args) + { + if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_s3queue) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3Queue is experimental. You can enable it with the `allow_experimental_s3queue` setting."); + + auto & engine_args = args.engine_args; + if (engine_args.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); + auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext()); + + // Use format settings from global server context + settings from + // the SETTINGS clause of the create query. Settings from current + // session and user are ignored. + std::optional format_settings; + + auto s3queue_settings = std::make_unique(); + if (args.storage_def->settings) + { + s3queue_settings->loadFromQuery(*args.storage_def); + FormatFactorySettings user_format_settings; + + // Apply changed settings from global context, but ignore the + // unknown ones, because we only have the format settings here. + const auto & changes = args.getContext()->getSettingsRef().changes(); + for (const auto & change : changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.set(change.name, change.value); + else + LOG_TRACE(&Poco::Logger::get("StorageS3"), "Remove: {}", change.name); + args.storage_def->settings->changes.removeSetting(change.name); + } + + for (const auto & change : args.storage_def->settings->changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.applyChange(change); + } + format_settings = getFormatSettings(args.getContext(), user_format_settings); + } + else + { + format_settings = getFormatSettings(args.getContext()); + } + + ASTPtr partition_by; + if (args.storage_def->partition_by) + partition_by = args.storage_def->partition_by->clone(); + + return std::make_shared( + std::move(s3queue_settings), + std::move(configuration), + args.table_id, + args.columns, + args.constraints, + args.comment, + args.getContext(), + format_settings, + partition_by); + }, + { + .supports_settings = true, + .supports_sort_order = true, // for partition by + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} + +void registerStorageS3Queue(StorageFactory & factory) +{ + return registerStorageS3QueueImpl("S3Queue", factory); +} + +} + + +#endif diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h new file mode 100644 index 00000000000..98d46e80ad2 --- /dev/null +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -0,0 +1,145 @@ +#pragma once + +#include "config.h" + +#if USE_AWS_S3 + +# include + +# include +# include + +# include +# include +# include +# include +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +namespace Aws::S3 +{ +class Client; +} + +namespace DB +{ + + +class StorageS3Queue : public IStorage, WithContext +{ +public: + using Configuration = typename StorageS3::Configuration; + + StorageS3Queue( + std::unique_ptr s3queue_settings_, + const Configuration & configuration_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + ContextPtr context_, + std::optional format_settings_, + ASTPtr partition_by_ = nullptr); + + String getName() const override { return "S3Queue"; } + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + SinkToStoragePtr write( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context, + bool async_insert) override; + + void truncate( + const ASTPtr & /*query*/, + const StorageMetadataPtr & /*metadata_snapshot*/, + ContextPtr /*local_context*/, + TableExclusiveLockHolder &) override; + + NamesAndTypesList getVirtuals() const override; + + bool supportsPartitionBy() const override; + + const auto & getFormatName() const { return configuration.format; } + + const String & getZooKeeperPath() const { return zk_path; } + + zkutil::ZooKeeperPtr getZooKeeper() const; + +private: + const std::unique_ptr s3queue_settings; + const S3QueueAction after_processing; + + std::shared_ptr files_metadata; + Configuration configuration; + NamesAndTypesList virtual_columns; + UInt64 reschedule_processing_interval_ms; + + std::optional format_settings; + ASTPtr partition_by; + + String zk_path; + mutable zkutil::ZooKeeperPtr zk_client; + mutable std::mutex zk_mutex; + + std::atomic mv_attached = false; + std::atomic shutdown_called{false}; + Poco::Logger * log; + + bool supportsSubcolumns() const override; + bool withGlobs() const { return configuration.url.key.find_first_of("*?{") != std::string::npos; } + + void threadFunc(); + size_t getTableDependentCount() const; + bool hasDependencies(const StorageID & table_id); + + void startup() override; + void shutdown() override; + void drop() override; + + struct TaskContext + { + BackgroundSchedulePool::TaskHolder holder; + std::atomic stream_cancelled{false}; + explicit TaskContext(BackgroundSchedulePool::TaskHolder && task_) : holder(std::move(task_)) { } + }; + std::shared_ptr task; + + bool supportsSubsetOfColumns(const ContextPtr & context_) const; + + const UInt32 zk_create_table_retries = 1000; + bool createTableIfNotExists(const StorageMetadataPtr & metadata_snapshot); + void checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot); + + using KeysWithInfo = StorageS3QueueSource::KeysWithInfo; + + std::shared_ptr + createFileIterator(ContextPtr local_context, ASTPtr query); + + void streamToViews(); + Configuration updateConfigurationAndGetCopy(ContextPtr local_context); +}; + +} + +#endif diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 13d6909fd52..6d52d45c6a9 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -10,7 +10,6 @@ #include #include #include -#include #include @@ -211,8 +210,6 @@ struct SelectQueryInfo /// should we use custom key with the cluster bool use_custom_key = false; - mutable ParallelReplicasReadingCoordinatorPtr coordinator; - TreeRewriterResultPtr syntax_analyzer_result; /// This is an additional filer applied to current table. @@ -258,11 +255,15 @@ struct SelectQueryInfo bool parallel_replicas_disabled = false; bool is_parameterized_view = false; - NameToNameMap parameterized_view_values; + + bool optimize_trivial_count = false; // If limit is not 0, that means it's a trivial limit query. UInt64 limit = 0; + /// For IStorageSystemOneBlock + std::vector columns_mask; + InputOrderInfoPtr getInputOrderInfo() const { return input_order_info ? input_order_info : (projection ? projection->input_order_info : nullptr); diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 0880d058cb5..bdefe781ee6 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -8,7 +8,6 @@ #include #include -#include #include #include @@ -18,20 +17,18 @@ #include #include -#include #include #include +#include #include -#include +#include #include #include #include #include -#include #include #include -#include #include #include #include @@ -51,6 +48,11 @@ namespace CurrentMetrics extern const Metric ObjectStorageAzureThreadsActive; } +namespace ProfileEvents +{ + extern const Event EngineFileLikeReadFiles; +} + namespace DB { @@ -87,7 +89,7 @@ const std::unordered_set optional_configuration_keys = { bool isConnectionString(const std::string & candidate) { - return candidate.starts_with("DefaultEndpointsProtocol"); + return !candidate.starts_with("http"); } } @@ -258,7 +260,7 @@ void registerStorageAzureBlob(StorageFactory & factory) throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); auto configuration = StorageAzureBlob::getConfiguration(engine_args, args.getLocalContext()); - auto client = StorageAzureBlob::createClient(configuration); + auto client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); // Use format settings from global server context + settings from // the SETTINGS clause of the create query. Settings from current // session and user are ignored. @@ -300,6 +302,7 @@ void registerStorageAzureBlob(StorageFactory & factory) args.constraints, args.comment, format_settings, + /* distributed_processing */ false, partition_by); }, { @@ -310,58 +313,113 @@ void registerStorageAzureBlob(StorageFactory & factory) }); } -AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration) +static bool containerExists(std::unique_ptr &blob_service_client, std::string container_name) +{ + Azure::Storage::Blobs::ListBlobContainersOptions options; + options.Prefix = container_name; + options.PageSizeHint = 1; + + auto containers_list_response = blob_service_client->ListBlobContainers(options); + auto containers_list = containers_list_response.BlobContainers; + + for (const auto & container : containers_list) + { + if (container_name == container.Name) + return true; + } + return false; +} + +AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only) { AzureClientPtr result; if (configuration.is_connection_string) { + std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(configuration.connection_url)); result = std::make_unique(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container)); - result->CreateIfNotExists(); - } - else - { - if (configuration.account_name.has_value() && configuration.account_key.has_value()) + bool container_exists = containerExists(blob_service_client,configuration.container); + + if (!container_exists) { - auto storage_shared_key_credential = std::make_shared(*configuration.account_name, *configuration.account_key); - auto blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); + if (is_read_only) + throw Exception( + ErrorCodes::DATABASE_ACCESS_DENIED, + "AzureBlobStorage container does not exist '{}'", + configuration.container); + try { - result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } - catch (const Azure::Storage::StorageException & e) + result->CreateIfNotExists(); + } catch (const Azure::Storage::StorageException & e) { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) - { - auto final_url = configuration.connection_url - + (configuration.connection_url.back() == '/' ? "" : "/") - + configuration.container; - - result = std::make_unique(final_url, storage_shared_key_credential); - } - else + if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict + && e.ReasonPhrase == "The specified container already exists.")) { throw; } } } + } + else + { + std::shared_ptr storage_shared_key_credential; + if (configuration.account_name.has_value() && configuration.account_key.has_value()) + { + storage_shared_key_credential + = std::make_shared(*configuration.account_name, *configuration.account_key); + } + + std::unique_ptr blob_service_client; + if (storage_shared_key_credential) + { + blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); + } else { - auto managed_identity_credential = std::make_shared(); - auto blob_service_client = std::make_unique(configuration.connection_url, managed_identity_credential); + blob_service_client = std::make_unique(configuration.connection_url); + } + + bool container_exists = containerExists(blob_service_client,configuration.container); + + std::string final_url; + size_t pos = configuration.connection_url.find('?'); + if (pos != std::string::npos) + { + auto url_without_sas = configuration.connection_url.substr(0, pos); + final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + configuration.container + + configuration.connection_url.substr(pos); + } + else + final_url + = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container; + + if (container_exists) + { + if (storage_shared_key_credential) + result = std::make_unique(final_url, storage_shared_key_credential); + else + result = std::make_unique(final_url); + } + else + { + if (is_read_only) + throw Exception( + ErrorCodes::DATABASE_ACCESS_DENIED, + "AzureBlobStorage container does not exist '{}'", + configuration.container); try { result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } - catch (const Azure::Storage::StorageException & e) + } catch (const Azure::Storage::StorageException & e) { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) + if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict + && e.ReasonPhrase == "The specified container already exists.") { - auto final_url = configuration.connection_url - + (configuration.connection_url.back() == '/' ? "" : "/") - + configuration.container; - - result = std::make_unique(final_url, managed_identity_credential); + if (storage_shared_key_credential) + result = std::make_unique(final_url, storage_shared_key_credential); + else + result = std::make_unique(final_url); } else { @@ -393,12 +451,13 @@ StorageAzureBlob::StorageAzureBlob( const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_, + bool distributed_processing_, ASTPtr partition_by_) : IStorage(table_id_) , name("AzureBlobStorage") , configuration(configuration_) , object_storage(std::move(object_storage_)) - , distributed_processing(false) + , distributed_processing(distributed_processing_) , format_settings(format_settings_) , partition_by(partition_by_) { @@ -408,7 +467,7 @@ StorageAzureBlob::StorageAzureBlob( StorageInMemoryMetadata storage_metadata; if (columns_.empty()) { - auto columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context); + auto columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context, distributed_processing); storage_metadata.setColumns(columns); } else @@ -422,15 +481,7 @@ StorageAzureBlob::StorageAzureBlob( for (const auto & key : configuration.blobs_paths) objects.emplace_back(key); - auto default_virtuals = NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; - - auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList(); - - virtual_columns = getVirtualsForStorage(columns, default_virtuals); - for (const auto & column : virtual_columns) - virtual_block.insert({column.type->createColumn(), column.type, column.name}); + virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); } void StorageAzureBlob::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) @@ -439,7 +490,7 @@ void StorageAzureBlob::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont { throw Exception( ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", + "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", configuration.blob_path); } @@ -604,7 +655,7 @@ private: Pipe StorageAzureBlob::read( const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, + const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, @@ -616,68 +667,46 @@ Pipe StorageAzureBlob::read( Pipes pipes; - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - std::shared_ptr iterator_wrapper; - if (configuration.withGlobs()) + if (distributed_processing) + { + iterator_wrapper = std::make_shared(local_context, + local_context->getReadTaskCallback()); + } + else if (configuration.withGlobs()) { /// Iterate through disclosed globs and make a source for each file iterator_wrapper = std::make_shared( object_storage.get(), configuration.container, configuration.blob_path, - query_info.query, virtual_block, local_context, nullptr); + query_info.query, virtual_columns, local_context, nullptr, local_context->getFileProgressCallback()); } else { iterator_wrapper = std::make_shared( object_storage.get(), configuration.container, configuration.blobs_paths, - query_info.query, virtual_block, local_context, nullptr); + query_info.query, virtual_columns, local_context, nullptr, local_context->getFileProgressCallback()); } - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) + && local_context->getSettingsRef().optimize_count_from_files; for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( - requested_virtual_columns, + read_from_format_info, configuration.format, getName(), - block_for_format, local_context, format_settings, - columns_description, max_block_size, configuration.compression_method, object_storage.get(), configuration.container, - iterator_wrapper)); + configuration.connection_url, + iterator_wrapper, + need_only_count, + query_info)); } return Pipe::unitePipes(std::move(pipes)); @@ -763,14 +792,9 @@ bool StorageAzureBlob::supportsPartitionBy() const return true; } -bool StorageAzureBlob::supportsSubcolumns() const +bool StorageAzureBlob::supportsSubsetOfColumns(const ContextPtr & context) const { - return FormatFactory::instance().checkIfFormatSupportsSubcolumns(configuration.format); -} - -bool StorageAzureBlob::supportsSubsetOfColumns() const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings); } bool StorageAzureBlob::prefersLargeBlocks() const @@ -783,38 +807,23 @@ bool StorageAzureBlob::parallelizeOutputAfterReading(ContextPtr context) const return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context); } -static void addPathToVirtualColumns(Block & block, const String & path, size_t idx) -{ - if (block.has("_path")) - block.getByName("_path").column->assumeMutableRef().insert(path); - - if (block.has("_file")) - { - auto pos = path.find_last_of('/'); - assert(pos != std::string::npos); - - auto file = path.substr(pos + 1); - block.getByName("_file").column->assumeMutableRef().insert(file); - } - - block.getByName("_idx").column->assumeMutableRef().insert(idx); -} - StorageAzureBlobSource::GlobIterator::GlobIterator( AzureObjectStorage * object_storage_, const std::string & container_, String blob_path_with_globs_, ASTPtr query_, - const Block & virtual_header_, + const NamesAndTypesList & virtual_columns_, ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_) + RelativePathsWithMetadata * outer_blobs_, + std::function file_progress_callback_) : IIterator(context_) , object_storage(object_storage_) , container(container_) , blob_path_with_globs(blob_path_with_globs_) , query(query_) - , virtual_header(virtual_header_) + , virtual_columns(virtual_columns_) , outer_blobs(outer_blobs_) + , file_progress_callback(file_progress_callback_) { const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{")); @@ -826,6 +835,7 @@ StorageAzureBlobSource::GlobIterator::GlobIterator( blobs_with_metadata.emplace_back(blob_path_with_globs, object_metadata); if (outer_blobs) outer_blobs->emplace_back(blobs_with_metadata.back()); + is_finished = true; return; } @@ -844,8 +854,10 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next() { std::lock_guard lock(next_mutex); - if (is_finished) + if (is_finished && index >= blobs_with_metadata.size()) + { return {}; + } bool need_new_batch = blobs_with_metadata.empty() || index >= blobs_with_metadata.size(); @@ -877,36 +889,28 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next() index = 0; if (!is_initialized) { - createFilterAST(new_batch.front().relative_path); + filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(container) / new_batch.front().relative_path, getContext()); is_initialized = true; } if (filter_ast) { - auto block = virtual_header.cloneEmpty(); - for (size_t i = 0; i < new_batch.size(); ++i) - addPathToVirtualColumns(block, fs::path(container) / new_batch[i].relative_path, i); + std::vector paths; + paths.reserve(new_batch.size()); + for (auto & path_with_metadata : new_batch) + paths.push_back(fs::path(container) / path_with_metadata.relative_path); - VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); - const auto & idxs = typeid_cast(*block.getByName("_idx").column); - - blobs_with_metadata.clear(); - for (UInt64 idx : idxs.getData()) - { - total_size.fetch_add(new_batch[idx].metadata.size_bytes, std::memory_order_relaxed); - blobs_with_metadata.emplace_back(std::move(new_batch[idx])); - if (outer_blobs) - outer_blobs->emplace_back(blobs_with_metadata.back()); - } + VirtualColumnUtils::filterByPathOrFile(new_batch, paths, query, virtual_columns, getContext(), filter_ast); } - else - { - if (outer_blobs) - outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end()); - blobs_with_metadata = std::move(new_batch); + if (outer_blobs) + outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end()); + + blobs_with_metadata = std::move(new_batch); + if (file_progress_callback) + { for (const auto & [_, info] : blobs_with_metadata) - total_size.fetch_add(info.size_bytes, std::memory_order_relaxed); + file_progress_callback(FileProgress(0, info.size_bytes)); } } @@ -916,77 +920,42 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next() return blobs_with_metadata[current_index]; } -size_t StorageAzureBlobSource::GlobIterator::getTotalSize() const -{ - return total_size.load(std::memory_order_relaxed); -} - - -void StorageAzureBlobSource::GlobIterator::createFilterAST(const String & any_key) -{ - if (!query || !virtual_header) - return; - - /// Create a virtual block with one row to construct filter - /// Append "idx" column as the filter result - virtual_header.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); - - auto block = virtual_header.cloneEmpty(); - addPathToVirtualColumns(block, fs::path(container) / any_key, 0); - VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); -} - - StorageAzureBlobSource::KeysIterator::KeysIterator( AzureObjectStorage * object_storage_, const std::string & container_, - Strings keys_, + const Strings & keys_, ASTPtr query_, - const Block & virtual_header_, + const NamesAndTypesList & virtual_columns_, ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_) + RelativePathsWithMetadata * outer_blobs, + std::function file_progress_callback) : IIterator(context_) , object_storage(object_storage_) , container(container_) , query(query_) - , virtual_header(virtual_header_) - , outer_blobs(outer_blobs_) + , virtual_columns(virtual_columns_) { Strings all_keys = keys_; - /// Create a virtual block with one row to construct filter - if (query && virtual_header && !all_keys.empty()) + ASTPtr filter_ast; + if (!all_keys.empty()) + filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(container) / all_keys[0], getContext()); + + if (filter_ast) { - /// Append "idx" column as the filter result - virtual_header.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); + Strings paths; + paths.reserve(all_keys.size()); + for (const auto & key : all_keys) + paths.push_back(fs::path(container) / key); - auto block = virtual_header.cloneEmpty(); - addPathToVirtualColumns(block, fs::path(container) / all_keys.front(), 0); - - VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); - - if (filter_ast) - { - block = virtual_header.cloneEmpty(); - for (size_t i = 0; i < all_keys.size(); ++i) - addPathToVirtualColumns(block, fs::path(container) / all_keys[i], i); - - VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); - const auto & idxs = typeid_cast(*block.getByName("_idx").column); - - Strings filtered_keys; - filtered_keys.reserve(block.rows()); - for (UInt64 idx : idxs.getData()) - filtered_keys.emplace_back(std::move(all_keys[idx])); - - all_keys = std::move(filtered_keys); - } + VirtualColumnUtils::filterByPathOrFile(all_keys, paths, query, virtual_columns, getContext(), filter_ast); } for (auto && key : all_keys) { ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); - total_size += object_metadata.size_bytes; + if (file_progress_callback) + file_progress_callback(FileProgress(0, object_metadata.size_bytes)); keys.emplace_back(RelativePathWithMetadata{key, object_metadata}); } @@ -1003,12 +972,6 @@ RelativePathWithMetadata StorageAzureBlobSource::KeysIterator::next() return keys[current_index]; } -size_t StorageAzureBlobSource::KeysIterator::getTotalSize() const -{ - return total_size.load(std::memory_order_relaxed); -} - - Chunk StorageAzureBlobSource::generate() { while (true) @@ -1024,34 +987,19 @@ Chunk StorageAzureBlobSource::generate() if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); - - const auto & file_path = reader.getPath(); - if (num_rows && total_objects_size) - { - size_t chunk_size = reader.getFormat()->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } - - for (const auto & virtual_column : requested_virtual_columns) - { - if (virtual_column.name == "_path") - { - chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_path)->convertToFullColumnIfConst()); - } - else if (virtual_column.name == "_file") - { - size_t last_slash_pos = file_path.find_last_of('/'); - auto column = virtual_column.type->createColumnConst(num_rows, file_path.substr(last_slash_pos + 1)); - chunk.addColumn(column->convertToFullColumnIfConst()); - } - } - + total_rows_in_file += num_rows; + size_t chunk_size = 0; + if (const auto * input_format = reader.getInputFormat()) + chunk_size = input_format->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); + VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, fs::path(container) / reader.getRelativePath()); return chunk; } + if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) + addNumRowsToCache(reader.getRelativePath(), total_rows_in_file); + + total_rows_in_file = 0; assert(reader_future.valid()); reader = reader_future.get(); @@ -1059,13 +1007,6 @@ Chunk StorageAzureBlobSource::generate() if (!reader) break; - size_t object_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); - /// Adjust total_rows_approx_accumulated with new total size. - if (total_objects_size) - total_rows_approx_accumulated = static_cast( - std::ceil(static_cast(total_objects_size + object_size) / total_objects_size * total_rows_approx_accumulated)); - total_objects_size += object_size; - /// Even if task is finished the thread may be not freed in pool. /// So wait until it will be freed before scheduling a new task. create_reader_pool.wait(); @@ -1075,52 +1016,65 @@ Chunk StorageAzureBlobSource::generate() return {}; } -Block StorageAzureBlobSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) +void StorageAzureBlobSource::addNumRowsToCache(const DB::String & path, size_t num_rows) { - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); + String source = fs::path(connection_url) / container / path; + auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); + StorageAzureBlob::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); +} - return sample_block; +std::optional StorageAzureBlobSource::tryGetNumRowsFromCache(const DB::RelativePathWithMetadata & path_with_metadata) +{ + String source = fs::path(connection_url) / container / path_with_metadata.relative_path; + auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); + auto get_last_mod_time = [&]() -> std::optional + { + auto last_mod = path_with_metadata.metadata.last_modified; + if (last_mod) + return last_mod->epochTime(); + return std::nullopt; + }; + + return StorageAzureBlob::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); } StorageAzureBlobSource::StorageAzureBlobSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format_, String name_, - const Block & sample_block_, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, String compression_hint_, AzureObjectStorage * object_storage_, const String & container_, - std::shared_ptr file_iterator_) - :ISource(getHeader(sample_block_, requested_virtual_columns_)) + const String & connection_url_, + std::shared_ptr file_iterator_, + bool need_only_count_, + const SelectQueryInfo & query_info_) + :ISource(info.source_header, false) , WithContext(context_) - , requested_virtual_columns(requested_virtual_columns_) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) , format(format_) , name(std::move(name_)) - , sample_block(sample_block_) + , sample_block(info.format_header) , format_settings(format_settings_) - , columns_desc(columns_) + , columns_desc(info.columns_description) , max_block_size(max_block_size_) , compression_hint(compression_hint_) , object_storage(std::move(object_storage_)) , container(container_) + , connection_url(connection_url_) , file_iterator(file_iterator_) + , need_only_count(need_only_count_) + , query_info(query_info_) , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, 1) , create_reader_scheduler(threadPoolCallbackRunner(create_reader_pool, "AzureReader")) { reader = createReader(); if (reader) - { - const auto & read_buf = reader.getReadBuffer(); - if (read_buf) - total_objects_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); - reader_future = createReaderAsync(); - } } @@ -1136,33 +1090,69 @@ String StorageAzureBlobSource::getName() const StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader() { - auto [current_key, info] = file_iterator->next(); - if (current_key.empty()) + auto path_with_metadata = file_iterator->next(); + if (path_with_metadata.relative_path.empty()) return {}; - size_t object_size = info.size_bytes != 0 ? info.size_bytes : object_storage->getObjectMetadata(current_key).size_bytes; - auto compression_method = chooseCompressionMethod(current_key, compression_hint); - - auto read_buf = createAzureReadBuffer(current_key, object_size); - auto input_format = FormatFactory::instance().getInput( - format, *read_buf, sample_block, getContext(), max_block_size, - format_settings, std::nullopt, std::nullopt, - /* is_remote_fs */ true, compression_method); + if (path_with_metadata.metadata.size_bytes == 0) + path_with_metadata.metadata = object_storage->getObjectMetadata(path_with_metadata.relative_path); QueryPipelineBuilder builder; - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) + std::shared_ptr source; + std::unique_ptr read_buf; + std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(path_with_metadata) : std::nullopt; + if (num_rows_from_cache) { - builder.addSimpleTransform( - [&](const Block & header) - { return std::make_shared(header, columns_desc, *input_format, getContext()); }); + /// We should not return single chunk with all number of rows, + /// because there is a chance that this chunk will be materialized later + /// (it can cause memory problems even with default values in columns or when virtual columns are requested). + /// Instead, we use special ConstChunkGenerator that will generate chunks + /// with max_block_size rows until total number of rows is reached. + source = std::make_shared(sample_block, *num_rows_from_cache, max_block_size); + builder.init(Pipe(source)); } + else + { + std::optional max_parsing_threads; + if (need_only_count) + max_parsing_threads = 1; + + auto compression_method = chooseCompressionMethod(path_with_metadata.relative_path, compression_hint); + read_buf = createAzureReadBuffer(path_with_metadata.relative_path, path_with_metadata.metadata.size_bytes); + auto input_format = FormatFactory::instance().getInput( + format, *read_buf, sample_block, getContext(), max_block_size, + format_settings, max_parsing_threads, std::nullopt, + /* is_remote_fs */ true, compression_method); + input_format->setQueryInfo(query_info, getContext()); + + if (need_only_count) + input_format->needOnlyCount(); + + builder.init(Pipe(input_format)); + + if (columns_desc.hasDefaults()) + { + builder.addSimpleTransform( + [&](const Block & header) + { return std::make_shared(header, columns_desc, *input_format, getContext()); }); + } + + source = input_format; + } + + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto current_reader = std::make_unique(*pipeline); - return ReaderHolder{fs::path(container) / current_key, std::move(read_buf), input_format, std::move(pipeline), std::move(current_reader)}; + ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); + + return ReaderHolder{path_with_metadata, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; } std::future StorageAzureBlobSource::createReaderAsync() @@ -1189,70 +1179,126 @@ std::unique_ptr StorageAzureBlobSource::createAzureReadBuffer(const return object_storage->readObject(StoredObject(key), read_settings, {}, object_size); } +namespace +{ + class ReadBufferIterator : public IReadBufferIterator, WithContext + { + public: + ReadBufferIterator( + const std::shared_ptr & file_iterator_, + AzureObjectStorage * object_storage_, + const StorageAzureBlob::Configuration & configuration_, + const std::optional & format_settings_, + const RelativePathsWithMetadata & read_keys_, + const ContextPtr & context_) + : WithContext(context_) + , file_iterator(file_iterator_) + , object_storage(object_storage_) + , configuration(configuration_) + , format_settings(format_settings_) + , read_keys(read_keys_) + , prev_read_keys_size(read_keys_.size()) + { + } + + std::unique_ptr next() override + { + auto [key, metadata] = file_iterator->next(); + + if (key.empty()) + { + if (first) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because there are no files with provided path " + "in AzureBlobStorage. You must specify table structure manually", configuration.format); + + return nullptr; + } + + current_path = key; + + ///AzureBlobStorage file iterator could get new keys after new iteration, check them in schema cache. + if (getContext()->getSettingsRef().schema_inference_use_cache_for_azure && read_keys.size() > prev_read_keys_size) + { + columns_from_cache = StorageAzureBlob::tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end(), configuration, format_settings, getContext()); + prev_read_keys_size = read_keys.size(); + if (columns_from_cache) + return nullptr; + } + + first = false; + int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); + return wrapReadBufferWithCompressionMethod( + object_storage->readObject(StoredObject(key), getContext()->getReadSettings(), {}, metadata.size_bytes), + chooseCompressionMethod(key, configuration.compression_method), + zstd_window_log_max); + } + + std::optional getCachedColumns() override { return columns_from_cache; } + + void setNumRowsToLastFile(size_t num_rows) override + { + if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3) + return; + + String source = fs::path(configuration.connection_url) / configuration.container / current_path; + auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext()); + StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows); + } + + private: + std::shared_ptr file_iterator; + AzureObjectStorage * object_storage; + const StorageAzureBlob::Configuration & configuration; + const std::optional & format_settings; + const RelativePathsWithMetadata & read_keys; + std::optional columns_from_cache; + size_t prev_read_keys_size; + String current_path; + bool first = true; + }; +} + ColumnsDescription StorageAzureBlob::getTableStructureFromData( AzureObjectStorage * object_storage, const Configuration & configuration, const std::optional & format_settings, - ContextPtr ctx) + ContextPtr ctx, + bool distributed_processing) { RelativePathsWithMetadata read_keys; std::shared_ptr file_iterator; - if (configuration.withGlobs()) + if (distributed_processing) + { + file_iterator = std::make_shared(ctx, + ctx->getReadTaskCallback()); + } + else if (configuration.withGlobs()) { file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blob_path, nullptr, Block{}, ctx, &read_keys); + object_storage, configuration.container, configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys); } else { file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blobs_paths, nullptr, Block{}, ctx, &read_keys); + object_storage, configuration.container, configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys); } std::optional columns_from_cache; - size_t prev_read_keys_size = read_keys.size(); if (ctx->getSettingsRef().schema_inference_use_cache_for_azure) columns_from_cache = tryGetColumnsFromCache(read_keys.begin(), read_keys.end(), configuration, format_settings, ctx); - ReadBufferIterator read_buffer_iterator = [&, first = true](ColumnsDescription & cached_columns) mutable -> std::unique_ptr - { - auto [key, metadata] = file_iterator->next(); - - if (key.empty()) - { - if (first) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files with provided path " - "in AzureBlobStorage. You must specify table structure manually", configuration.format); - - return nullptr; - } - - /// S3 file iterator could get new keys after new iteration, check them in schema cache. - if (ctx->getSettingsRef().schema_inference_use_cache_for_azure && read_keys.size() > prev_read_keys_size) - { - columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end(), configuration, format_settings, ctx); - prev_read_keys_size = read_keys.size(); - if (columns_from_cache) - { - cached_columns = *columns_from_cache; - return nullptr; - } - } - - first = false; - int zstd_window_log_max = static_cast(ctx->getSettingsRef().zstd_window_log_max); - return wrapReadBufferWithCompressionMethod( - object_storage->readObject(StoredObject(key), ctx->getReadSettings(), {}, metadata.size_bytes), - chooseCompressionMethod(key, configuration.compression_method), - zstd_window_log_max); - }; - ColumnsDescription columns; if (columns_from_cache) + { columns = *columns_from_cache; + } else + { + ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, configuration, format_settings, read_keys, ctx); columns = readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx); + } if (ctx->getSettingsRef().schema_inference_use_cache_for_azure) addColumnsToCache(read_keys, columns, configuration, format_settings, configuration.format, ctx); @@ -1271,15 +1317,17 @@ std::optional StorageAzureBlob::tryGetColumnsFromCache( auto & schema_cache = getSchemaCache(ctx); for (auto it = begin; it < end; ++it) { - auto get_last_mod_time = [&] -> time_t + auto get_last_mod_time = [&] -> std::optional { - return it->metadata.last_modified->epochTime(); + if (it->metadata.last_modified) + return it->metadata.last_modified->epochTime(); + return std::nullopt; }; auto host_and_bucket = configuration.connection_url + '/' + configuration.container; String source = host_and_bucket + '/' + it->relative_path; auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, ctx); - auto columns = schema_cache.tryGet(cache_key, get_last_mod_time); + auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); if (columns) return columns; } @@ -1302,7 +1350,7 @@ void StorageAzureBlob::addColumnsToCache( std::transform(keys.begin(), keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; }); auto cache_keys = getKeysForSchemaCache(sources, format_name, format_settings, ctx); auto & schema_cache = getSchemaCache(ctx); - schema_cache.addMany(cache_keys, columns); + schema_cache.addManyColumns(cache_keys, columns); } SchemaCache & StorageAzureBlob::getSchemaCache(const ContextPtr & ctx) diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 3d6b0c64998..448d8e20e05 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -10,7 +10,10 @@ #include #include #include +#include #include +#include +#include namespace DB { @@ -62,10 +65,11 @@ public: const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_, + bool distributed_processing_, ASTPtr partition_by_); static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); - static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration); + static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only); static AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); @@ -93,9 +97,11 @@ public: bool supportsPartitionBy() const override; - bool supportsSubcolumns() const override; + bool supportsSubcolumns() const override { return true; } - bool supportsSubsetOfColumns() const override; + bool supportsSubsetOfColumns(const ContextPtr & context) const; + + bool supportsTrivialCountOptimization() const override { return true; } bool prefersLargeBlocks() const override; @@ -107,19 +113,8 @@ public: AzureObjectStorage * object_storage, const Configuration & configuration, const std::optional & format_settings, - ContextPtr ctx); - -private: - std::string name; - Configuration configuration; - std::unique_ptr object_storage; - NamesAndTypesList virtual_columns; - Block virtual_block; - - const bool distributed_processing; - std::optional format_settings; - ASTPtr partition_by; - + ContextPtr ctx, + bool distributed_processing = false); static std::optional tryGetColumnsFromCache( const RelativePathsWithMetadata::const_iterator & begin, @@ -136,7 +131,15 @@ private: const String & format_name, const ContextPtr & ctx); +private: + std::string name; + Configuration configuration; + std::unique_ptr object_storage; + NamesAndTypesList virtual_columns; + const bool distributed_processing; + std::optional format_settings; + ASTPtr partition_by; }; class StorageAzureBlobSource : public ISource, WithContext @@ -148,7 +151,6 @@ public: IIterator(ContextPtr context_):WithContext(context_) {} virtual ~IIterator() = default; virtual RelativePathWithMetadata next() = 0; - virtual size_t getTotalSize() const = 0; RelativePathWithMetadata operator ()() { return next(); } }; @@ -161,24 +163,23 @@ public: const std::string & container_, String blob_path_with_globs_, ASTPtr query_, - const Block & virtual_header_, + const NamesAndTypesList & virtual_columns_, ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_); + RelativePathsWithMetadata * outer_blobs_, + std::function file_progress_callback_ = {}); RelativePathWithMetadata next() override; - size_t getTotalSize() const override; ~GlobIterator() override = default; - private: + private: AzureObjectStorage * object_storage; std::string container; String blob_path_with_globs; ASTPtr query; ASTPtr filter_ast; - Block virtual_header; + NamesAndTypesList virtual_columns; size_t index = 0; - std::atomic total_size = 0; RelativePathsWithMetadata blobs_with_metadata; RelativePathsWithMetadata * outer_blobs; @@ -191,6 +192,23 @@ public: bool is_finished = false; bool is_initialized = false; std::mutex next_mutex; + + std::function file_progress_callback; + }; + + class ReadIterator : public IIterator + { + public: + explicit ReadIterator(ContextPtr context_, + const ReadTaskCallback & callback_) + : IIterator(context_), callback(callback_) { } + RelativePathWithMetadata next() override + { + return {callback(), {}}; + } + + private: + ReadTaskCallback callback; }; class KeysIterator : public IIterator @@ -199,14 +217,14 @@ public: KeysIterator( AzureObjectStorage * object_storage_, const std::string & container_, - Strings keys_, + const Strings & keys_, ASTPtr query_, - const Block & virtual_header_, + const NamesAndTypesList & virtual_columns_, ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_); + RelativePathsWithMetadata * outer_blobs, + std::function file_progress_callback = {}); RelativePathWithMetadata next() override; - size_t getTotalSize() const override; ~KeysIterator() override = default; private: @@ -215,39 +233,37 @@ public: RelativePathsWithMetadata keys; ASTPtr query; - ASTPtr filter_ast; - Block virtual_header; + NamesAndTypesList virtual_columns; std::atomic index = 0; - std::atomic total_size = 0; - - RelativePathsWithMetadata * outer_blobs; }; StorageAzureBlobSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format_, String name_, - const Block & sample_block_, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, String compression_hint_, AzureObjectStorage * object_storage_, const String & container_, - std::shared_ptr file_iterator_); - + const String & connection_url_, + std::shared_ptr file_iterator_, + bool need_only_count_, + const SelectQueryInfo & query_info_); ~StorageAzureBlobSource() override; Chunk generate() override; String getName() const override; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - private: - std::vector requested_virtual_columns; + void addNumRowsToCache(const String & path, size_t num_rows); + std::optional tryGetNumRowsFromCache(const RelativePathWithMetadata & path_with_metadata); + + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; String format; String name; Block sample_block; @@ -257,20 +273,24 @@ private: String compression_hint; AzureObjectStorage * object_storage; String container; + String connection_url; std::shared_ptr file_iterator; + bool need_only_count; + size_t total_rows_in_file = 0; + SelectQueryInfo query_info; struct ReaderHolder { public: ReaderHolder( - String path_, + RelativePathWithMetadata relative_path_with_metadata_, std::unique_ptr read_buf_, - std::shared_ptr input_format_, + std::shared_ptr source_, std::unique_ptr pipeline_, std::unique_ptr reader_) - : path(std::move(path_)) + : relative_path_with_metadata(std::move(relative_path_with_metadata_)) , read_buf(std::move(read_buf_)) - , input_format(input_format_) + , source(std::move(source_)) , pipeline(std::move(pipeline_)) , reader(std::move(reader_)) { @@ -291,25 +311,23 @@ private: /// reader uses pipeline, pipeline uses read_buf. reader = std::move(other.reader); pipeline = std::move(other.pipeline); - input_format = std::move(other.input_format); + source = std::move(other.source); read_buf = std::move(other.read_buf); - path = std::move(other.path); + relative_path_with_metadata = std::move(other.relative_path_with_metadata); return *this; } explicit operator bool() const { return reader != nullptr; } PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } - const String & getPath() const { return path; } - - const std::unique_ptr & getReadBuffer() const { return read_buf; } - - const std::shared_ptr & getFormat() const { return input_format; } + const String & getRelativePath() const { return relative_path_with_metadata.relative_path; } + const RelativePathWithMetadata & getRelativePathWithMetadata() const { return relative_path_with_metadata; } + const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } private: - String path; + RelativePathWithMetadata relative_path_with_metadata; std::unique_ptr read_buf; - std::shared_ptr input_format; + std::shared_ptr source; std::unique_ptr pipeline; std::unique_ptr reader; }; @@ -322,11 +340,6 @@ private: ThreadPoolCallbackRunner create_reader_scheduler; std::future reader_future; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; - size_t total_objects_size = 0; - /// Recreate ReadBuffer and Pipeline for each file. ReaderHolder createReader(); std::future createReaderAsync(); diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp new file mode 100644 index 00000000000..7592a6e6acc --- /dev/null +++ b/src/Storages/StorageAzureBlobCluster.cpp @@ -0,0 +1,89 @@ +#include "Storages/StorageAzureBlobCluster.h" + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +StorageAzureBlobCluster::StorageAzureBlobCluster( + const String & cluster_name_, + const StorageAzureBlob::Configuration & configuration_, + std::unique_ptr && object_storage_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_, + bool structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageAzureBlobCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + , configuration{configuration_} + , object_storage(std::move(object_storage_)) +{ + context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL()); + StorageInMemoryMetadata storage_metadata; + + if (columns_.empty()) + { + /// `format_settings` is set to std::nullopt, because StorageAzureBlobCluster is used only as table function + auto columns = StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context_, false); + storage_metadata.setColumns(columns); + } + else + storage_metadata.setColumns(columns_); + + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); + + virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); +} + +void StorageAzureBlobCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) +{ + ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); + if (!expression_list) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query)); + + TableFunctionAzureBlobStorageCluster::addColumnsStructureToArguments(expression_list->children, structure, context); +} + +RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const +{ + auto iterator = std::make_shared( + object_storage.get(), configuration.container, configuration.blob_path, + query, virtual_columns, context, nullptr); + auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next().relative_path; }); + return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; +} + +NamesAndTypesList StorageAzureBlobCluster::getVirtuals() const +{ + return virtual_columns; +} + + +} + +#endif diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h new file mode 100644 index 00000000000..2900243708c --- /dev/null +++ b/src/Storages/StorageAzureBlobCluster.h @@ -0,0 +1,56 @@ +#pragma once + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include + +#include "Client/Connection.h" +#include +#include +#include + +namespace DB +{ + +class Context; + +class StorageAzureBlobCluster : public IStorageCluster +{ +public: + StorageAzureBlobCluster( + const String & cluster_name_, + const StorageAzureBlob::Configuration & configuration_, + std::unique_ptr && object_storage_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_, + bool structure_argument_was_provided_); + + std::string getName() const override { return "AzureBlobStorageCluster"; } + + NamesAndTypesList getVirtuals() const override; + + RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + + bool supportsSubcolumns() const override { return true; } + + bool supportsTrivialCountOptimization() const override { return true; } + +private: + void updateBeforeRead(const ContextPtr & /*context*/) override {} + + void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; + + StorageAzureBlob::Configuration configuration; + NamesAndTypesList virtual_columns; + std::unique_ptr object_storage; +}; + + +} + +#endif diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index d021667f771..e011565edc1 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include @@ -682,7 +681,7 @@ void StorageBuffer::startup() } -void StorageBuffer::flush() +void StorageBuffer::flushAndPrepareForShutdown() { if (!flush_handle) return; @@ -996,8 +995,11 @@ void StorageBuffer::reschedule() std::unique_lock lock(buffer.tryLock()); if (lock.owns_lock()) { - min_first_write_time = buffer.first_write_time; - rows += buffer.data.rows(); + if (buffer.data) + { + min_first_write_time = std::min(min_first_write_time, buffer.first_write_time); + rows += buffer.data.rows(); + } } } diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 8f089a4d580..db3cde93be5 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -92,7 +92,7 @@ public: void startup() override; /// Flush all buffers into the subordinate table and stop background thread. - void flush() override; + void flushAndPrepareForShutdown() override; bool optimize( const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index b0b8aba38c7..09f972e4098 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -145,7 +145,7 @@ StorageDictionary::~StorageDictionary() removeDictionaryConfigurationFromRepository(); } -void StorageDictionary::checkTableCanBeDropped() const +void StorageDictionary::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const { if (location == Location::SameDatabaseAndNameAsDictionary) throw Exception(ErrorCodes::CANNOT_DETACH_DICTIONARY_AS_TABLE, @@ -159,7 +159,9 @@ void StorageDictionary::checkTableCanBeDropped() const void StorageDictionary::checkTableCanBeDetached() const { - checkTableCanBeDropped(); + /// Actually query context (from DETACH query) should be passed here. + /// But we don't use it for this type of storage + checkTableCanBeDropped(getContext()); } Pipe StorageDictionary::read( diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index 48230dcfa9f..7d3ed01d185 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -62,7 +62,7 @@ public: ~StorageDictionary() override; - void checkTableCanBeDropped() const override; + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override; void checkTableCanBeDetached() const override; Pipe read( diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index dce54d69327..a9b19976033 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -331,6 +331,9 @@ StorageDistributed::StorageDistributed( , distributed_settings(distributed_settings_) , rng(randomSeed()) { + if (!distributed_settings.flush_on_detach && distributed_settings.monitor_batch_inserts) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Settings flush_on_detach=0 and monitor_batch_inserts=1 are incompatible"); + StorageInMemoryMetadata storage_metadata; if (columns_.empty()) { @@ -434,7 +437,9 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( { /// Always calculate optimized cluster here, to avoid conditions during read() /// (Anyway it will be calculated in the read()) - ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info); + const auto & select = query_info.query->as(); + auto syntax_analyzer_result = query_info.syntax_analyzer_result; + ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, select, syntax_analyzer_result); if (optimized_cluster) { LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}", @@ -691,7 +696,11 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, if (remote_storage_id.hasDatabase()) resolved_remote_storage_id = query_context->resolveStorageID(remote_storage_id); - auto storage = std::make_shared(resolved_remote_storage_id, distributed_storage_snapshot->metadata->getColumns(), distributed_storage_snapshot->object_columns); + auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); + + auto column_names_and_types = distributed_storage_snapshot->getColumns(get_column_options); + + auto storage = std::make_shared(resolved_remote_storage_id, ColumnsDescription{column_names_and_types}); auto table_node = std::make_shared(std::move(storage), query_context); if (table_expression_modifiers) @@ -1296,8 +1305,8 @@ ClusterPtr StorageDistributed::getCluster() const return owned_cluster ? owned_cluster : getContext()->getCluster(cluster_name); } -ClusterPtr StorageDistributed::getOptimizedCluster( - ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const +ClusterPtr StorageDistributed::getOptimizedCluster(ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, + const ASTSelectQuery & select, const TreeRewriterResultPtr & syntax_analyzer_result) const { ClusterPtr cluster = getCluster(); const Settings & settings = local_context->getSettingsRef(); @@ -1306,7 +1315,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster( if (has_sharding_key && sharding_key_is_usable) { - ClusterPtr optimized = skipUnusedShards(cluster, query_info, storage_snapshot, local_context); + ClusterPtr optimized = skipUnusedShards(cluster, select, syntax_analyzer_result, storage_snapshot, local_context); if (optimized) return optimized; } @@ -1355,16 +1364,16 @@ IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, c /// using constraints from "PREWHERE" and "WHERE" conditions, otherwise returns `nullptr` ClusterPtr StorageDistributed::skipUnusedShards( ClusterPtr cluster, - const SelectQueryInfo & query_info, + const ASTSelectQuery & select, + const TreeRewriterResultPtr & syntax_analyzer_result, const StorageSnapshotPtr & storage_snapshot, ContextPtr local_context) const { - const auto & select = query_info.query->as(); if (!select.prewhere() && !select.where()) return nullptr; /// FIXME: support analyzer - if (!query_info.syntax_analyzer_result) + if (!syntax_analyzer_result) return nullptr; ASTPtr condition_ast; @@ -1373,7 +1382,7 @@ ClusterPtr StorageDistributed::skipUnusedShards( { ASTPtr select_without_join_ptr = select.clone(); ASTSelectQuery select_without_join = select_without_join_ptr->as(); - TreeRewriterResult analyzer_result_without_join = *query_info.syntax_analyzer_result; + TreeRewriterResult analyzer_result_without_join = *syntax_analyzer_result; removeJoin(select_without_join, analyzer_result_without_join, local_context); if (!select_without_join.prewhere() && !select_without_join.where()) @@ -1432,7 +1441,7 @@ ActionLock StorageDistributed::getActionLock(StorageActionBlockType type) return {}; } -void StorageDistributed::flush() +void StorageDistributed::flushAndPrepareForShutdown() { try { @@ -1459,9 +1468,18 @@ void StorageDistributed::flushClusterNodesAllData(ContextPtr local_context) directory_monitors.push_back(node.second.directory_monitor); } + bool need_flush = getDistributedSettingsRef().flush_on_detach; + if (!need_flush) + LOG_INFO(log, "Skip flushing data (due to flush_on_detach=0)"); + /// TODO: Maybe it should be executed in parallel for (auto & node : directory_monitors) - node->flushAllData(); + { + if (need_flush) + node->flushAllData(); + else + node->shutdownWithoutFlush(); + } } void StorageDistributed::rename(const String & new_path_to_table_data, const StorageID & new_table_id) diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 615d6e337b6..b8445f5ec16 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -28,6 +28,9 @@ using DiskPtr = std::shared_ptr; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; +struct TreeRewriterResult; +using TreeRewriterResultPtr = std::shared_ptr; + /** A distributed table that resides on multiple servers. * Uses data from the specified database and tables on each server. * @@ -135,7 +138,7 @@ public: void initializeFromDisk(); void shutdown() override; - void flush() override; + void flushAndPrepareForShutdown() override; void drop() override; bool storesDataOnDisk() const override { return data_volume != nullptr; } @@ -182,10 +185,18 @@ private: /// Apply the following settings: /// - optimize_skip_unused_shards /// - force_optimize_skip_unused_shards - ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const; + ClusterPtr getOptimizedCluster( + ContextPtr local_context, + const StorageSnapshotPtr & storage_snapshot, + const ASTSelectQuery & select, + const TreeRewriterResultPtr & syntax_analyzer_result) const; ClusterPtr skipUnusedShards( - ClusterPtr cluster, const SelectQueryInfo & query_info, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const; + ClusterPtr cluster, + const ASTSelectQuery & select, + const TreeRewriterResultPtr & syntax_analyzer_result, + const StorageSnapshotPtr & storage_snapshot, + ContextPtr context) const; /// This method returns optimal query processing stage. /// diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 9fabf1a9fb6..df03301b5e8 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -92,6 +92,8 @@ StorageExecutable::StorageExecutable( .command_termination_timeout_seconds = settings.command_termination_timeout, .command_read_timeout_milliseconds = settings.command_read_timeout, .command_write_timeout_milliseconds = settings.command_write_timeout, + .stderr_reaction = settings.stderr_reaction, + .check_exit_code = settings.check_exit_code, .pool_size = settings.pool_size, .max_command_execution_time_seconds = settings.max_command_execution_time, diff --git a/src/Storages/StorageFactory.h b/src/Storages/StorageFactory.h index f1c1c237393..239f1bb63ef 100644 --- a/src/Storages/StorageFactory.h +++ b/src/Storages/StorageFactory.h @@ -22,7 +22,7 @@ struct StorageID; * In 'columns' Nested data structures must be flattened. * You should subsequently call IStorage::startup method to work with table. */ -class StorageFactory : private boost::noncopyable, public IHints<1, StorageFactory> +class StorageFactory : private boost::noncopyable, public IHints<> { public: diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index cbd32460f7e..0e154803602 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -5,12 +5,14 @@ #include #include #include -#include +#include +#include #include #include #include +#include #include #include #include @@ -22,6 +24,8 @@ #include #include #include +#include +#include #include #include @@ -29,11 +33,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -56,12 +62,12 @@ #include #include - namespace ProfileEvents { extern const Event CreatedReadBufferOrdinary; extern const Event CreatedReadBufferMMap; extern const Event CreatedReadBufferMMapFailed; + extern const Event EngineFileLikeReadFiles; } namespace fs = std::filesystem; @@ -242,8 +248,8 @@ void listFilesWithRegexpMatchingImpl( { if (recursive) { - listFilesWithRegexpMatchingImpl(fs::path(full_path).append(it->path().string()) / "" , - looking_for_directory ? suffix_with_globs.substr(next_slash_after_glob_pos) : current_glob , + listFilesWithRegexpMatchingImpl(fs::path(full_path).append(it->path().string()) / "", + looking_for_directory ? suffix_with_globs.substr(next_slash_after_glob_pos) : current_glob, total_bytes_to_read, result, recursive); } else if (looking_for_directory && re2::RE2::FullMatch(file_name, matcher)) @@ -381,7 +387,6 @@ std::unique_ptr createReadBuffer( ContextPtr context) { CompressionMethod method; - if (use_table_fd) method = chooseCompressionMethod("", compression_method); else @@ -389,14 +394,6 @@ std::unique_ptr createReadBuffer( std::unique_ptr nested_buffer = selectReadBuffer(current_path, use_table_fd, table_fd, file_stat, context); - /// For clickhouse-local and clickhouse-client add progress callback to display progress bar. - if (context->getApplicationType() == Context::ApplicationType::LOCAL - || context->getApplicationType() == Context::ApplicationType::CLIENT) - { - auto & in = static_cast(*nested_buffer); - in.setProgressCallback(context); - } - int zstd_window_log_max = static_cast(context->getSettingsRef().zstd_window_log_max); return wrapReadBufferWithCompressionMethod(std::move(nested_buffer), method, zstd_window_log_max); } @@ -443,6 +440,275 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user return paths; } +namespace +{ + struct ReadBufferFromFileIterator : public IReadBufferIterator, WithContext + { + public: + ReadBufferFromFileIterator( + const std::vector & paths_, + const String & format_, + const String & compression_method_, + const std::optional & format_settings_, + ContextPtr context_) + : WithContext(context_) + , paths(paths_) + , format(format_) + , compression_method(compression_method_) + , format_settings(format_settings_) + { + } + + std::unique_ptr next() override + { + String path; + struct stat file_stat; + bool is_first = current_index == 0; + + do + { + if (current_index == paths.size()) + { + if (is_first) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually", + format); + return nullptr; + } + + path = paths[current_index++]; + file_stat = getFileStat(path, false, -1, "File"); + } while (getContext()->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0); + + return createReadBuffer(path, file_stat, false, -1, compression_method, getContext()); + } + + void setNumRowsToLastFile(size_t num_rows) override + { + if (!getContext()->getSettingsRef().use_cache_for_count_from_files) + return; + + auto key = getKeyForSchemaCache(paths[current_index - 1], format, format_settings, getContext()); + StorageFile::getSchemaCache(getContext()).addNumRows(key, num_rows); + } + + private: + const std::vector & paths; + + size_t current_index = 0; + String format; + String compression_method; + const std::optional & format_settings; + }; + + struct ReadBufferFromArchiveIterator : public IReadBufferIterator, WithContext + { + public: + ReadBufferFromArchiveIterator( + const StorageFile::ArchiveInfo & archive_info_, + const String & format_, + const std::optional & format_settings_, + ContextPtr context_) + : WithContext(context_) + , archive_info(archive_info_) + , format(format_) + , format_settings(format_settings_) + { + } + + std::unique_ptr next() override + { + std::unique_ptr read_buf; + while (true) + { + if (current_archive_index == archive_info.paths_to_archives.size()) + { + if (is_first) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually", + format); + + return nullptr; + } + + const auto & archive = archive_info.paths_to_archives[current_archive_index]; + struct stat file_stat; + file_stat = getFileStat(archive, false, -1, "File"); + if (file_stat.st_size == 0) + { + if (getContext()->getSettingsRef().engine_file_skip_empty_files) + { + ++current_archive_index; + continue; + } + + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because the archive {} is empty. " + "You must specify table structure manually", + format, + archive); + } + + auto archive_reader = createArchiveReader(archive); + + if (archive_info.isSingleFileRead()) + { + read_buf = archive_reader->readFile(archive_info.path_in_archive, false); + ++current_archive_index; + if (!read_buf) + continue; + + last_read_file_path = processed_files.emplace_back(fmt::format("{}::{}", archive_reader->getPath(), archive_info.path_in_archive)); + columns_from_cache = tryGetColumnsFromSchemaCache(archive, last_read_file_path); + + if (columns_from_cache) + return nullptr; + } + else + { + if (last_read_buffer) + file_enumerator = archive_reader->nextFile(std::move(last_read_buffer)); + else + file_enumerator = archive_reader->firstFile(); + + if (!file_enumerator) + { + ++current_archive_index; + continue; + } + + const auto * filename = &file_enumerator->getFileName(); + while (!archive_info.filter(*filename)) + { + if (!file_enumerator->nextFile()) + { + archive_reader = nullptr; + break; + } + + filename = &file_enumerator->getFileName(); + } + + if (!archive_reader) + { + ++current_archive_index; + continue; + } + + last_read_file_path = processed_files.emplace_back(fmt::format("{}::{}", archive_reader->getPath(), *filename)); + columns_from_cache = tryGetColumnsFromSchemaCache(archive, last_read_file_path); + + if (columns_from_cache) + return nullptr; + + read_buf = archive_reader->readFile(std::move(file_enumerator)); + } + + break; + } + + is_first = false; + return read_buf; + } + + std::optional getCachedColumns() override + { + return columns_from_cache; + } + + void setPreviousReadBuffer(std::unique_ptr buffer) override + { + last_read_buffer = std::move(buffer); + } + + void setNumRowsToLastFile(size_t num_rows) override + { + if (!getContext()->getSettingsRef().use_cache_for_count_from_files) + return; + + auto key = getKeyForSchemaCache(last_read_file_path, format, format_settings, getContext()); + StorageFile::getSchemaCache(getContext()).addNumRows(key, num_rows); + } + + std::vector processed_files; + private: + + std::optional tryGetColumnsFromSchemaCache(const std::string & archive_path, const std::string & full_path) + { + auto context = getContext(); + if (!getContext()->getSettingsRef().schema_inference_use_cache_for_file) + return std::nullopt; + + struct stat file_stat; + auto & schema_cache = StorageFile::getSchemaCache(context); + auto get_last_mod_time = [&]() -> std::optional + { + if (0 != stat(archive_path.c_str(), &file_stat)) + return std::nullopt; + + return file_stat.st_mtime; + }; + + auto cache_key = getKeyForSchemaCache(full_path, format, format_settings, context); + auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); + + if (columns) + return columns; + + return std::nullopt; + } + + const StorageFile::ArchiveInfo & archive_info; + + size_t current_archive_index = 0; + + bool is_first = true; + + std::string last_read_file_path; + + std::optional columns_from_cache; + + std::unique_ptr file_enumerator; + std::unique_ptr last_read_buffer; + + String format; + const std::optional & format_settings; + }; + + std::optional tryGetColumnsFromCacheForArchives( + const StorageFile::ArchiveInfo & archive_info, + std::vector & paths_for_schema_cache, + const String & format, + const std::optional & format_settings, + const ContextPtr & context) + { + struct stat file_stat{}; + std::optional columns_from_cache; + + for (const auto & archive : archive_info.paths_to_archives) + { + const auto & full_path = paths_for_schema_cache.emplace_back(fmt::format("{}::{}", archive, archive_info.path_in_archive)); + + auto & schema_cache = StorageFile::getSchemaCache(context); + auto get_last_mod_time = [&]() -> std::optional + { + if (0 != stat(archive.c_str(), &file_stat)) + return std::nullopt; + + return file_stat.st_mtime; + }; + + auto cache_key = getKeyForSchemaCache(full_path, format, format_settings, context); + columns_from_cache = schema_cache.tryGetColumns(cache_key, get_last_mod_time); + } + + return columns_from_cache; + } +} + ColumnsDescription StorageFile::getTableStructureFromFileDescriptor(ContextPtr context) { /// If we want to read schema from file descriptor we should create @@ -452,16 +718,13 @@ ColumnsDescription StorageFile::getTableStructureFromFileDescriptor(ContextPtr c /// in case of file descriptor we have a stream of data and we cannot /// start reading data from the beginning after reading some data for /// schema inference. - ReadBufferIterator read_buffer_iterator = [&](ColumnsDescription &) - { - /// We will use PeekableReadBuffer to create a checkpoint, so we need a place - /// where we can store the original read buffer. - auto file_stat = getFileStat("", true, table_fd, getName()); - read_buffer_from_fd = createReadBuffer("", file_stat, true, table_fd, compression_method, context); - auto read_buf = std::make_unique(*read_buffer_from_fd); - read_buf->setCheckpoint(); - return read_buf; - }; + auto file_stat = getFileStat("", true, table_fd, getName()); + /// We will use PeekableReadBuffer to create a checkpoint, so we need a place + /// where we can store the original read buffer. + read_buffer_from_fd = createReadBuffer("", file_stat, true, table_fd, compression_method, context); + auto read_buf = std::make_unique(*read_buffer_from_fd); + read_buf->setCheckpoint(); + auto read_buffer_iterator = SingleReadBufferIterator(std::move(read_buf)); auto columns = readSchemaFromFormat(format_name, format_settings, read_buffer_iterator, false, context, peekable_read_buffer_from_fd); if (peekable_read_buffer_from_fd) @@ -478,7 +741,8 @@ ColumnsDescription StorageFile::getTableStructureFromFile( const std::vector & paths, const String & compression_method, const std::optional & format_settings, - ContextPtr context) + ContextPtr context, + const std::optional & archive_info) { if (format == "Distributed") { @@ -488,56 +752,60 @@ ColumnsDescription StorageFile::getTableStructureFromFile( return ColumnsDescription(DistributedAsyncInsertSource(paths[0]).getOutputs().front().getHeader().getNamesAndTypesList()); } - if (paths.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format)) + if (((archive_info && archive_info->paths_to_archives.empty()) || (!archive_info && paths.empty())) + && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format)) throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file, because there are no files with provided path. " "You must specify table structure manually", format); - std::optional columns_from_cache; - if (context->getSettingsRef().schema_inference_use_cache_for_file) - columns_from_cache = tryGetColumnsFromCache(paths, format, format_settings, context); - - ReadBufferIterator read_buffer_iterator = [&, it = paths.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr - { - String path; - struct stat file_stat; - do - { - if (it == paths.end()) - { - if (first) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually", - format); - return nullptr; - } - - path = *it++; - file_stat = getFileStat(path, false, -1, "File"); - } - while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0); - - first = false; - return createReadBuffer(path, file_stat, false, -1, compression_method, context); - }; - ColumnsDescription columns; - if (columns_from_cache) - columns = *columns_from_cache; - else - columns = readSchemaFromFormat(format, format_settings, read_buffer_iterator, paths.size() > 1, context); + std::vector archive_paths_for_schema_cache; + std::optional columns_from_cache; if (context->getSettingsRef().schema_inference_use_cache_for_file) - addColumnsToCache(paths, columns, format, format_settings, context); + { + if (archive_info) + columns_from_cache = tryGetColumnsFromCacheForArchives(*archive_info, archive_paths_for_schema_cache, format, format_settings, context); + else + columns_from_cache = tryGetColumnsFromCache(paths, format, format_settings, context); + } + + if (columns_from_cache) + { + columns = std::move(*columns_from_cache); + } + else + { + if (archive_info) + { + ReadBufferFromArchiveIterator read_buffer_iterator(*archive_info, format, format_settings, context); + columns = readSchemaFromFormat( + format, + format_settings, + read_buffer_iterator, + /*retry=*/archive_info->paths_to_archives.size() > 1 || !archive_info->isSingleFileRead(), + context); + + for (auto & file : read_buffer_iterator.processed_files) + archive_paths_for_schema_cache.push_back(std::move(file)); + } + else + { + ReadBufferFromFileIterator read_buffer_iterator(paths, format, compression_method, format_settings, context); + columns = readSchemaFromFormat(format, format_settings, read_buffer_iterator, paths.size() > 1, context); + } + } + + if (context->getSettingsRef().schema_inference_use_cache_for_file) + addColumnsToCache(archive_info.has_value() ? archive_paths_for_schema_cache : paths, columns, format, format_settings, context); return columns; } -bool StorageFile::supportsSubsetOfColumns() const +bool StorageFile::supportsSubsetOfColumns(const ContextPtr & context) const { - return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); + return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context, format_settings); } bool StorageFile::prefersLargeBlocks() const @@ -573,8 +841,12 @@ StorageFile::StorageFile(int table_fd_, CommonArguments args) StorageFile::StorageFile(const std::string & table_path_, const std::string & user_files_path, CommonArguments args) : StorageFile(args) { + if (!args.path_to_archive.empty()) + archive_info = getArchiveInfo(args.path_to_archive, table_path_, user_files_path, args.getContext(), total_bytes_to_read); + else + paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read); + is_db_table = false; - paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read); is_path_with_globs = paths.size() > 1; if (!paths.empty()) path_for_partitioned_write = paths.front(); @@ -628,7 +900,13 @@ void StorageFile::setStorageMetadata(CommonArguments args) columns = getTableStructureFromFileDescriptor(args.getContext()); else { - columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext()); + columns = getTableStructureFromFile( + format_name, + paths, + compression_method, + format_settings, + args.getContext(), + archive_info); if (!args.columns.empty() && args.columns != columns) throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Table structure and file structure are different"); } @@ -640,6 +918,8 @@ void StorageFile::setStorageMetadata(CommonArguments args) storage_metadata.setConstraints(args.constraints); storage_metadata.setComment(args.comment); setInMemoryMetadata(storage_metadata); + + virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); } @@ -658,58 +938,91 @@ using StorageFilePtr = std::shared_ptr; class StorageFileSource : public ISource { public: - struct FilesInfo + class FilesIterator { + public: + explicit FilesIterator( + const Strings & files_, + std::optional archive_info_, + ASTPtr query, + const NamesAndTypesList & virtual_columns, + ContextPtr context_) + : files(files_), archive_info(std::move(archive_info_)) + { + ASTPtr filter_ast; + if (!archive_info && !files.empty() && !files[0].empty()) + filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, files[0], context_); + + if (filter_ast) + VirtualColumnUtils::filterByPathOrFile(files, files, query, virtual_columns, context_, filter_ast); + } + + String next() + { + const auto & fs = isReadFromArchive() ? archive_info->paths_to_archives : files; + + auto current_index = index.fetch_add(1, std::memory_order_relaxed); + if (current_index >= fs.size()) + return ""; + + return fs[current_index]; + } + + bool isReadFromArchive() const + { + return archive_info.has_value(); + } + + bool validFileInArchive(const std::string & path) const + { + return archive_info->filter(path); + } + + bool isSingleFileReadFromArchive() const + { + return archive_info->isSingleFileRead(); + } + + const String & getFileNameInArchive() + { + if (archive_info->path_in_archive.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected only 1 filename but it's empty"); + + return archive_info->path_in_archive; + } + private: std::vector files; - std::atomic next_file_to_read = 0; + std::optional archive_info; - bool need_path_column = false; - bool need_file_column = false; - - size_t total_bytes_to_read = 0; + std::atomic index = 0; }; - using FilesInfoPtr = std::shared_ptr; - - static Block getBlockForSource(const Block & block_for_format, const FilesInfoPtr & files_info) - { - auto res = block_for_format; - if (files_info->need_path_column) - { - res.insert( - {DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_path"}); - } - if (files_info->need_file_column) - { - res.insert( - {DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_file"}); - } - return res; - } + using FilesIteratorPtr = std::shared_ptr; StorageFileSource( + const ReadFromFormatInfo & info, std::shared_ptr storage_, const StorageSnapshotPtr & storage_snapshot_, ContextPtr context_, + const SelectQueryInfo & query_info_, UInt64 max_block_size_, - FilesInfoPtr files_info_, - ColumnsDescription columns_description_, - const Block & block_for_format_, - std::unique_ptr read_buf_) - : ISource(getBlockForSource(block_for_format_, files_info_)) + FilesIteratorPtr files_iterator_, + std::unique_ptr read_buf_, + bool need_only_count_) + : ISource(info.source_header, false) , storage(std::move(storage_)) , storage_snapshot(storage_snapshot_) - , files_info(std::move(files_info_)) + , files_iterator(std::move(files_iterator_)) , read_buf(std::move(read_buf_)) - , columns_description(std::move(columns_description_)) - , block_for_format(block_for_format_) + , columns_description(info.columns_description) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) + , block_for_format(info.format_header) , context(context_) + , query_info(query_info_) , max_block_size(max_block_size_) + , need_only_count(need_only_count_) { if (!storage->use_table_fd) { @@ -784,6 +1097,32 @@ public: return storage->getName(); } + bool tryGetCountFromCache(const struct stat & file_stat) + { + if (!context->getSettingsRef().use_cache_for_count_from_files) + return false; + + auto num_rows_from_cache = tryGetNumRowsFromCache(current_path, file_stat.st_mtime); + if (!num_rows_from_cache) + return false; + + /// We should not return single chunk with all number of rows, + /// because there is a chance that this chunk will be materialized later + /// (it can cause memory problems even with default values in columns or when virtual columns are requested). + /// Instead, we use special ConstChunkGenerator that will generate chunks + /// with max_block_size rows until total number of rows is reached. + auto const_chunk_generator = std::make_shared(block_for_format, *num_rows_from_cache, max_block_size); + QueryPipelineBuilder builder; + builder.init(Pipe(const_chunk_generator)); + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + reader = std::make_unique(*pipeline); + return true; + } + Chunk generate() override { while (!finished_generate) @@ -793,11 +1132,86 @@ public: { if (!storage->use_table_fd) { - auto current_file = files_info->next_file_to_read.fetch_add(1); - if (current_file >= files_info->files.size()) - return {}; + if (files_iterator->isReadFromArchive()) + { + if (files_iterator->isSingleFileReadFromArchive()) + { + auto archive = files_iterator->next(); + if (archive.empty()) + return {}; - current_path = files_info->files[current_file]; + auto file_stat = getFileStat(archive, storage->use_table_fd, storage->table_fd, storage->getName()); + if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) + continue; + + archive_reader = createArchiveReader(archive); + filename_override = files_iterator->getFileNameInArchive(); + + current_path = fmt::format("{}::{}", archive_reader->getPath(), *filename_override); + if (need_only_count && tryGetCountFromCache(file_stat)) + continue; + + read_buf = archive_reader->readFile(*filename_override, /*throw_on_not_found=*/false); + if (!read_buf) + continue; + + if (auto progress_callback = context->getFileProgressCallback()) + progress_callback(FileProgress(0, tryGetFileSizeFromReadBuffer(*read_buf).value_or(0))); + } + else + { + while (true) + { + if (file_enumerator == nullptr) + { + auto archive = files_iterator->next(); + if (archive.empty()) + return {}; + + current_archive_stat = getFileStat(archive, storage->use_table_fd, storage->table_fd, storage->getName()); + if (context->getSettingsRef().engine_file_skip_empty_files && current_archive_stat.st_size == 0) + continue; + + archive_reader = createArchiveReader(archive); + file_enumerator = archive_reader->firstFile(); + continue; + } + + bool file_found = true; + while (!files_iterator->validFileInArchive(file_enumerator->getFileName())) + { + if (!file_enumerator->nextFile()) + { + file_found = false; + break; + } + } + + if (file_found) + { + filename_override = file_enumerator->getFileName(); + break; + } + + file_enumerator = nullptr; + } + + chassert(file_enumerator); + current_path = fmt::format("{}::{}", archive_reader->getPath(), *filename_override); + if (need_only_count && tryGetCountFromCache(current_archive_stat)) + continue; + + read_buf = archive_reader->readFile(std::move(file_enumerator)); + if (auto progress_callback = context->getFileProgressCallback()) + progress_callback(FileProgress(0, tryGetFileSizeFromReadBuffer(*read_buf).value_or(0))); + } + } + else + { + current_path = files_iterator->next(); + if (current_path.empty()) + return {}; + } /// Special case for distributed format. Defaults are not needed here. if (storage->format_name == "Distributed") @@ -810,22 +1224,33 @@ public: if (!read_buf) { - auto file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName()); + struct stat file_stat; + file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName()); + if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) continue; + + if (need_only_count && tryGetCountFromCache(file_stat)) + continue; + read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context); } - size_t file_size = tryGetFileSizeFromReadBuffer(*read_buf).value_or(0); - /// Adjust total_rows_approx_accumulated with new total size. - if (total_files_size) - total_rows_approx_accumulated = static_cast(std::ceil(static_cast(total_files_size + file_size) / total_files_size * total_rows_approx_accumulated)); - total_files_size += file_size; - const Settings & settings = context->getSettingsRef(); - chassert(!storage->paths.empty()); - const auto max_parsing_threads = std::max(settings.max_threads/ storage->paths.size(), 1UL); - input_format = context->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, storage->format_settings, max_parsing_threads); + + size_t file_num = 0; + if (storage->archive_info) + file_num = storage->archive_info->paths_to_archives.size(); + else + file_num = storage->paths.size(); + + chassert(file_num > 0); + + const auto max_parsing_threads = std::max(settings.max_threads / file_num, 1UL); + input_format = context->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, storage->format_settings, need_only_count ? 1 : max_parsing_threads); + input_format->setQueryInfo(query_info, context); + if (need_only_count) + input_format->needOnlyCount(); QueryPipelineBuilder builder; builder.init(Pipe(input_format)); @@ -838,40 +1263,32 @@ public: }); } - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); + + ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); } Chunk chunk; if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); + total_rows_in_file += num_rows; + size_t chunk_size = 0; + if (input_format && storage->format_name != "Distributed") + chunk_size = input_format->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); /// Enrich with virtual columns. - if (files_info->need_path_column) - { - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, current_path); - chunk.addColumn(column->convertToFullColumnIfConst()); - } - - if (files_info->need_file_column) - { - size_t last_slash_pos = current_path.find_last_of('/'); - auto file_name = current_path.substr(last_slash_pos + 1); - - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, std::move(file_name)); - chunk.addColumn(column->convertToFullColumnIfConst()); - } - - if (num_rows && total_files_size) - { - size_t chunk_size = input_format->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } + VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk( + chunk, requested_virtual_columns, current_path, filename_override.has_value() ? &filename_override.value() : nullptr); return chunk; } @@ -879,10 +1296,29 @@ public: if (storage->use_table_fd) finished_generate = true; + if (input_format && storage->format_name != "Distributed" && context->getSettingsRef().use_cache_for_count_from_files) + addNumRowsToCache(current_path, total_rows_in_file); + + total_rows_in_file = 0; + /// Close file prematurely if stream was ended. reader.reset(); pipeline.reset(); input_format.reset(); + + if (files_iterator->isReadFromArchive() && !files_iterator->isSingleFileReadFromArchive()) + { + if (file_enumerator) + { + if (!file_enumerator->nextFile()) + file_enumerator = nullptr; + } + else + { + file_enumerator = archive_reader->nextFile(std::move(read_buf)); + } + } + read_buf.reset(); } @@ -891,38 +1327,60 @@ public: private: + void addNumRowsToCache(const String & path, size_t num_rows) + { + auto key = getKeyForSchemaCache(path, storage->format_name, storage->format_settings, context); + StorageFile::getSchemaCache(context).addNumRows(key, num_rows); + } + + std::optional tryGetNumRowsFromCache(const String & path, time_t last_mod_time) + { + auto & schema_cache = StorageFile::getSchemaCache(context); + auto key = getKeyForSchemaCache(path, storage->format_name, storage->format_settings, context); + auto get_last_mod_time = [&]() -> std::optional + { + return last_mod_time; + }; + + return schema_cache.tryGetNumRows(key, get_last_mod_time); + } + std::shared_ptr storage; StorageSnapshotPtr storage_snapshot; - FilesInfoPtr files_info; + FilesIteratorPtr files_iterator; String current_path; + struct stat current_archive_stat; + std::optional filename_override; Block sample_block; std::unique_ptr read_buf; InputFormatPtr input_format; std::unique_ptr pipeline; std::unique_ptr reader; + std::shared_ptr archive_reader; + std::unique_ptr file_enumerator = nullptr; + ColumnsDescription columns_description; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; Block block_for_format; ContextPtr context; /// TODO Untangle potential issues with context lifetime. + SelectQueryInfo query_info; UInt64 max_block_size; bool finished_generate = false; + bool need_only_count = false; + size_t total_rows_in_file = 0; std::shared_lock shared_lock; - - UInt64 total_rows_approx_accumulated = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_max = 0; - - size_t total_files_size = 0; }; Pipe StorageFile::read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, + SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, @@ -934,32 +1392,37 @@ Pipe StorageFile::read( } else { - if (paths.size() == 1 && !fs::exists(paths[0])) + const std::vector * p; + + if (archive_info.has_value()) + p = &archive_info->paths_to_archives; + else + p = &paths; + + if (p->size() == 1 && !fs::exists(p->at(0))) { if (context->getSettingsRef().engine_file_empty_if_not_exists) return Pipe(std::make_shared(storage_snapshot->getSampleBlockForColumns(column_names))); else - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", paths[0]); + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", p->at(0)); } } - auto files_info = std::make_shared(); - files_info->files = paths; - files_info->total_bytes_to_read = total_bytes_to_read; - - for (const auto & column : column_names) - { - if (column == "_path") - files_info->need_path_column = true; - if (column == "_file") - files_info->need_file_column = true; - } + auto files_iterator + = std::make_shared(paths, archive_info, query_info.query, virtual_columns, context); auto this_ptr = std::static_pointer_cast(shared_from_this()); size_t num_streams = max_num_streams; - if (max_num_streams > paths.size()) - num_streams = paths.size(); + + size_t files_to_read = 0; + if (archive_info) + files_to_read = archive_info->paths_to_archives.size(); + else + files_to_read = paths.size(); + + if (max_num_streams > files_to_read) + num_streams = files_to_read; Pipes pipes; pipes.reserve(num_streams); @@ -967,36 +1430,15 @@ Pipe StorageFile::read( /// Set total number of bytes to process. For progress bar. auto progress_callback = context->getFileProgressCallback(); - if (progress_callback) + if (progress_callback && !archive_info) progress_callback(FileProgress(0, total_bytes_to_read)); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context), getVirtuals()); + bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) + && context->getSettingsRef().optimize_count_from_files; + for (size_t i = 0; i < num_streams; ++i) { - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { - return std::any_of( - virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col) { return col == virtual_col.name; }); - }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - } - - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - /// In case of reading from fd we have to check whether we have already created /// the read buffer from it in Storage constructor (for schema inference) or not. /// If yes, then we should use it in StorageFileSource. Atomic bool flag is needed @@ -1006,14 +1448,15 @@ Pipe StorageFile::read( read_buffer = std::move(peekable_read_buffer_from_fd); pipes.emplace_back(std::make_shared( + read_from_format_info, this_ptr, storage_snapshot, context, + query_info, max_block_size, - files_info, - columns_description, - block_for_format, - std::move(read_buffer))); + files_iterator, + std::move(read_buffer), + need_only_count)); } return Pipe::unitePipes(std::move(pipes)); @@ -1263,6 +1706,9 @@ SinkToStoragePtr StorageFile::write( ContextPtr context, bool /*async_insert*/) { + if (!use_table_fd && archive_info.has_value()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Writing to archives is not supported"); + if (format_name == "Distributed") throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for Distributed format"); @@ -1436,6 +1882,7 @@ void registerStorageFile(StorageFactory & factory) factory_args.constraints, factory_args.comment, {}, + {}, }; ASTs & engine_args_ast = factory_args.engine_args; @@ -1506,7 +1953,7 @@ void registerStorageFile(StorageFactory & factory) else if (type == Field::Types::UInt64) source_fd = static_cast(literal->value.get()); else if (type == Field::Types::String) - source_path = literal->value.get(); + StorageFile::parseFileSource(literal->value.get(), source_path, storage_args.path_to_archive); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument must be path or file descriptor"); } @@ -1527,14 +1974,6 @@ void registerStorageFile(StorageFactory & factory) storage_features); } - -NamesAndTypesList StorageFile::getVirtuals() const -{ - return NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; -} - SchemaCache & StorageFile::getSchemaCache(const ContextPtr & context) { static SchemaCache schema_cache(context->getConfigRef().getUInt("schema_inference_cache_max_elements_for_file", DEFAULT_SCHEMA_CACHE_ELEMENTS)); @@ -1558,7 +1997,7 @@ std::optional StorageFile::tryGetColumnsFromCache( }; auto cache_key = getKeyForSchemaCache(path, format_name, format_settings, context); - auto columns = schema_cache.tryGet(cache_key, get_last_mod_time); + auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); if (columns) return columns; } @@ -1575,7 +2014,65 @@ void StorageFile::addColumnsToCache( { auto & schema_cache = getSchemaCache(context); auto cache_keys = getKeysForSchemaCache(paths, format_name, format_settings, context); - schema_cache.addMany(cache_keys, columns); + schema_cache.addManyColumns(cache_keys, columns); +} + +void StorageFile::parseFileSource(String source, String & filename, String & path_to_archive) +{ + size_t pos = source.find("::"); + if (pos == String::npos) + { + filename = std::move(source); + return; + } + + std::string_view path_to_archive_view = std::string_view{source}.substr(0, pos); + while (path_to_archive_view.back() == ' ') + path_to_archive_view.remove_suffix(1); + + if (path_to_archive_view.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to archive is empty"); + + path_to_archive = path_to_archive_view; + + std::string_view filename_view = std::string_view{source}.substr(pos + 2); + while (filename_view.front() == ' ') + filename_view.remove_prefix(1); + + if (filename_view.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filename is empty"); + + filename = filename_view; +} + +StorageFile::ArchiveInfo StorageFile::getArchiveInfo( + const std::string & path_to_archive, + const std::string & file_in_archive, + const std::string & user_files_path, + ContextPtr context, + size_t & total_bytes_to_read +) +{ + ArchiveInfo archive_info; + archive_info.path_in_archive = file_in_archive; + + if (file_in_archive.find_first_of("*?{") != std::string::npos) + { + auto matcher = std::make_shared(makeRegexpPatternFromGlobs(file_in_archive)); + if (!matcher->ok()) + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, + "Cannot compile regex from glob ({}): {}", file_in_archive, matcher->error()); + + archive_info.filter = [matcher, matcher_mutex = std::make_shared()](const std::string & p) mutable + { + std::lock_guard lock(*matcher_mutex); + return re2::RE2::FullMatch(p, *matcher); + }; + } + + archive_info.paths_to_archives = getPathsList(path_to_archive, user_files_path, context, total_bytes_to_read); + + return archive_info; } } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index ed50ae73e51..f1464b90ab4 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -1,8 +1,9 @@ #pragma once -#include #include +#include #include +#include #include #include @@ -22,8 +23,8 @@ public: const ColumnsDescription & columns; const ConstraintsDescription & constraints; const String & comment; - const std::string rename_after_processing; + std::string path_to_archive; }; /// From file descriptor @@ -65,7 +66,7 @@ public: bool storesDataOnDisk() const override; Strings getDataPaths() const override; - NamesAndTypesList getVirtuals() const override; + NamesAndTypesList getVirtuals() const override { return virtual_columns; } static Strings getPathsList(const String & table_path, const String & user_files_path, ContextPtr context, size_t & total_bytes_to_read); @@ -73,7 +74,9 @@ public: /// Is is useful because such formats could effectively skip unknown columns /// So we can create a header of only required columns in read method and ask /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool supportsSubsetOfColumns() const override; + bool supportsSubsetOfColumns(const ContextPtr & context) const; + + bool supportsSubcolumns() const override { return true; } bool prefersLargeBlocks() const override; @@ -81,6 +84,18 @@ public: bool supportsPartitionBy() const override { return true; } + struct ArchiveInfo + { + std::vector paths_to_archives; + std::string path_in_archive; // used when reading a single file from archive + IArchiveReader::NameFilter filter = {}; // used when files inside archive are defined with a glob + + bool isSingleFileRead() const + { + return !filter; + } + }; + ColumnsDescription getTableStructureFromFileDescriptor(ContextPtr context); static ColumnsDescription getTableStructureFromFile( @@ -88,10 +103,22 @@ public: const std::vector & paths, const String & compression_method, const std::optional & format_settings, - ContextPtr context); + ContextPtr context, + const std::optional & archive_info = std::nullopt); static SchemaCache & getSchemaCache(const ContextPtr & context); + static void parseFileSource(String source, String & filename, String & path_to_archive); + + static ArchiveInfo getArchiveInfo( + const std::string & path_to_archive, + const std::string & file_in_archive, + const std::string & user_files_path, + ContextPtr context, + size_t & total_bytes_to_read); + + bool supportsTrivialCountOptimization() const override { return true; } + protected: friend class StorageFileSource; friend class StorageFileSink; @@ -122,6 +149,8 @@ private: std::string base_path; std::vector paths; + std::optional archive_info; + bool is_db_table = true; /// Table is stored in real database, not user's file bool use_table_fd = false; /// Use table_fd instead of path @@ -146,6 +175,8 @@ private: std::atomic readers_counter = 0; FileRenamer file_renamer; bool was_renamed = false; + + NamesAndTypesList virtual_columns; }; } diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 293beca9c24..8d9b8f5d8d0 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -360,7 +360,7 @@ ColumnPtr fillColumnWithRandomData( auto column = ColumnUUID::create(); column->getData().resize(limit); /// NOTE This is slightly incorrect as random UUIDs should have fixed version 4. - fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit, sizeof(UUID), rng); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit, sizeof(UUID), rng, true); return column; } case TypeIndex::Int8: diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index afe75349864..af285a953dc 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -239,7 +239,7 @@ bool StorageInMemoryMetadata::hasAnyGroupByTTL() const ColumnDependencies StorageInMemoryMetadata::getColumnDependencies( const NameSet & updated_columns, bool include_ttl_target, - const std::function & has_indice_or_projection) const + const HasDependencyCallback & has_dependency) const { if (updated_columns.empty()) return {}; @@ -268,13 +268,13 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies( for (const auto & index : getSecondaryIndices()) { - if (has_indice_or_projection("skp_idx_" + index.name + ".idx") || has_indice_or_projection("skp_idx_" + index.name + ".idx2")) + if (has_dependency(index.name, ColumnDependency::SKIP_INDEX)) add_dependent_columns(index.expression, indices_columns); } for (const auto & projection : getProjections()) { - if (has_indice_or_projection(projection.getDirectoryName())) + if (has_dependency(projection.name, ColumnDependency::PROJECTION)) add_dependent_columns(&projection, projections_columns); } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 4ed7eb8bf29..30b2b303492 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -147,12 +147,14 @@ struct StorageInMemoryMetadata TTLDescriptions getGroupByTTLs() const; bool hasAnyGroupByTTL() const; + using HasDependencyCallback = std::function; + /// Returns columns, which will be needed to calculate dependencies (skip indices, projections, /// TTL expressions) if we update @updated_columns set of columns. ColumnDependencies getColumnDependencies( const NameSet & updated_columns, bool include_ttl_target, - const std::function & has_indice_or_projection) const; + const HasDependencyCallback & has_dependency) const; /// Block with ordinary + materialized columns. Block getSampleBlock() const; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 640706aae17..121d859a3f2 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -177,7 +178,7 @@ void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context) } } -HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, ContextPtr context) const +HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, ContextPtr context, const Names & required_columns_names) const { auto metadata_snapshot = getInMemoryMetadataPtr(); if (!analyzed_join->sameStrictnessAndKind(strictness, kind)) @@ -237,8 +238,10 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, /// Qualifies will be added by join implementation (TableJoin contains a rename mapping). analyzed_join->setRightKeys(key_names); analyzed_join->setLeftKeys(left_key_names_resorted); - - HashJoinPtr join_clone = std::make_shared(analyzed_join, getRightSampleBlock()); + Block right_sample_block; + for (const auto & name : required_columns_names) + right_sample_block.insert(getRightSampleBlock().getByName(name)); + HashJoinPtr join_clone = std::make_shared(analyzed_join, right_sample_block); RWLockImpl::LockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Read, context); join_clone->setLock(holder); diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 5559b5d1ec8..4626d744a38 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -49,7 +49,7 @@ public: /// Return instance of HashJoin holding lock that protects from insertions to StorageJoin. /// HashJoin relies on structure of hash table that's why we need to return it with locked mutex. - HashJoinPtr getJoinLocked(std::shared_ptr analyzed_join, ContextPtr context) const; + HashJoinPtr getJoinLocked(std::shared_ptr analyzed_join, ContextPtr context, const Names & required_columns_names) const; /// Get result type for function "joinGet(OrNull)" DataTypePtr joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const; diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index f2b1b907832..f98728c012e 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -419,7 +419,7 @@ StorageKeeperMap::StorageKeeperMap( } else if (code != Coordination::Error::ZOK) { - throw Coordination::Exception(code, dropped_lock_path); + throw Coordination::Exception::fromPath(code, dropped_lock_path); } else { @@ -918,7 +918,7 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca { auto code = client->tryRemove(delete_request->getPath()); if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE) - throw zkutil::KeeperException(code, delete_request->getPath()); + throw zkutil::KeeperException::fromPath(code, delete_request->getPath()); } } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index d8065b8bb3c..90cf55e53b2 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -777,7 +777,7 @@ void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr num_marks_saved = 0; total_rows = 0; total_bytes = 0; - getContext()->dropMMappedFileCache(); + getContext()->clearMMappedFileCache(); } @@ -946,9 +946,10 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c fs::path data_path_in_backup_fs = data_path_in_backup; auto temp_dir_owner = std::make_shared(disk, "tmp/"); - fs::path temp_dir = temp_dir_owner->getPath(); + fs::path temp_dir = temp_dir_owner->getRelativePath(); disk->createDirectories(temp_dir); + const auto & read_settings = backup_entries_collector.getReadSettings(); bool copy_encrypted = !backup_entries_collector.getBackupSettings().decrypt_files_from_encrypted_disks; /// *.bin @@ -980,7 +981,7 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c /// sizes.json String files_info_path = file_checker.getPath(); backup_entries_collector.addBackupEntry( - data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, copy_encrypted)); + data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, read_settings, copy_encrypted)); /// columns.txt backup_entries_collector.addBackupEntry( diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index 08fbb61960f..2cd589bfd75 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -41,6 +41,13 @@ public: void drop() override { nested_storage->drop(); } + bool supportsTrivialCountOptimization() const override { return false; } + + IndexSizeByName getSecondaryIndexSizes() const override + { + return nested_storage->getSecondaryIndexSizes(); + } + private: [[noreturn]] static void throwNotAllowed() { diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 16d724d54d8..7354dd56552 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -228,10 +228,13 @@ void StorageMaterializedView::dropInnerTableIfAny(bool sync, ContextPtr local_co { /// We will use `sync` argument wneh this function is called from a DROP query /// and will ignore database_atomic_wait_for_drop_and_detach_synchronously when it's called from drop task. - /// See the comment in StorageMaterializedView::drop + /// See the comment in StorageMaterializedView::drop. + /// DDL queries with StorageMaterializedView are fundamentally broken. + /// Best-effort to make them work: the inner table name is almost always less than the MV name (so it's safe to lock DDLGuard) + bool may_lock_ddl_guard = getStorageID().getQualifiedName() < target_table_id.getQualifiedName(); if (has_inner_table && tryGetTargetTable()) InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, getContext(), local_context, target_table_id, - sync, /* ignore_sync_setting */ true); + sync, /* ignore_sync_setting */ true, may_lock_ddl_guard); } void StorageMaterializedView::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 6ed0583bd44..2ef1d8d3183 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -277,11 +277,13 @@ namespace const std::shared_ptr blocks_, const String & data_path_in_backup, const DiskPtr & temp_disk_, + const ReadSettings & read_settings_, UInt64 max_compress_block_size_) : context(context_) , metadata_snapshot(metadata_snapshot_) , blocks(blocks_) , temp_disk(temp_disk_) + , read_settings(read_settings_) , max_compress_block_size(max_compress_block_size_) { fs::path data_path_in_backup_fs = data_path_in_backup; @@ -314,7 +316,7 @@ namespace backup_entries.resize(file_paths.size()); temp_dir_owner.emplace(temp_disk); - fs::path temp_dir = temp_dir_owner->getPath(); + fs::path temp_dir = temp_dir_owner->getRelativePath(); temp_disk->createDirectories(temp_dir); /// Writing data.bin @@ -371,7 +373,7 @@ namespace file_checker.update(temp_dir / fs::path{file_paths[i]}.filename()); } file_checker.save(); - backup_entries[sizes_json_pos] = {file_paths[sizes_json_pos], std::make_shared(temp_disk, sizes_json_path)}; + backup_entries[sizes_json_pos] = {file_paths[sizes_json_pos], std::make_shared(temp_disk, sizes_json_path, read_settings)}; } /// We don't need to keep `blocks` any longer. @@ -386,6 +388,7 @@ namespace std::shared_ptr blocks; DiskPtr temp_disk; std::optional temp_dir_owner; + ReadSettings read_settings; UInt64 max_compress_block_size; Strings file_paths; size_t data_bin_pos, index_mrk_pos, columns_txt_pos, count_txt_pos, sizes_json_pos; @@ -395,13 +398,16 @@ namespace void StorageMemory::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { auto temp_disk = backup_entries_collector.getContext()->getGlobalTemporaryVolume()->getDisk(0); + const auto & read_settings = backup_entries_collector.getReadSettings(); auto max_compress_block_size = backup_entries_collector.getContext()->getSettingsRef().max_compress_block_size; + backup_entries_collector.addBackupEntries(std::make_shared( backup_entries_collector.getContext(), getInMemoryMetadataPtr(), data.get(), data_path_in_backup, temp_disk, + read_settings, max_compress_block_size)->getBackupEntries()); } @@ -453,10 +459,10 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat if (!dynamic_cast(in.get())) { temp_data_file.emplace(temporary_disk); - auto out = std::make_unique(temp_data_file->getPath()); + auto out = std::make_unique(temp_data_file->getAbsolutePath()); copyData(*in, *out); out.reset(); - in = createReadBufferFromFileBase(temp_data_file->getPath(), {}); + in = createReadBufferFromFileBase(temp_data_file->getAbsolutePath(), {}); } std::unique_ptr in_from_file{static_cast(in.release())}; CompressedReadBufferFromFile compressed_in{std::move(in_from_file)}; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index b0ed242d14d..272f35303bd 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -795,6 +795,10 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( bool filter_by_database_virtual_column /* = false */, bool filter_by_table_virtual_column /* = false */) const { + /// FIXME: filtering does not work with allow_experimental_analyzer due to + /// different column names there (it has "table_name._table" not just + /// "_table") + assert(!filter_by_database_virtual_column || !filter_by_table_virtual_column || query); const Settings & settings = query_context->getSettingsRef(); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 32e100edc4d..bae91ec8bb1 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -220,7 +219,8 @@ void StorageMergeTree::read( local_context, query_info.query, table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); - auto cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); + String cluster_for_parallel_replicas = local_context->getSettingsRef().cluster_for_parallel_replicas; + auto cluster = local_context->getCluster(cluster_for_parallel_replicas); Block header; @@ -237,9 +237,13 @@ void StorageMergeTree::read( processed_stage); ClusterProxy::executeQueryWithParallelReplicas( - query_plan, getStorageID(), /*remove_table_function_ptr*/ nullptr, - select_stream_factory, modified_query_ast, - local_context, query_info, cluster); + query_plan, + getStorageID(), + select_stream_factory, + modified_query_ast, + local_context, + query_info.storage_limits, + cluster); } else { @@ -287,7 +291,7 @@ StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & met *this, metadata_snapshot, settings.max_partitions_per_insert_block, local_context); } -void StorageMergeTree::checkTableCanBeDropped() const +void StorageMergeTree::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const { if (!supportsReplication() && isStaticStorage()) return; @@ -332,6 +336,11 @@ void StorageMergeTree::alter( changeSettings(new_metadata.settings_changes, table_lock_holder); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, new_metadata); } + else if (commands.isCommentAlter()) + { + setInMemoryMetadata(new_metadata); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, new_metadata); + } else { if (!maybe_mutation_commands.empty() && maybe_mutation_commands.containBarrierCommand()) @@ -919,7 +928,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (getCurrentMutationVersion(left, lock) != getCurrentMutationVersion(right, lock)) { - disable_reason = "Some parts have differ mmutatuon version"; + disable_reason = "Some parts have different mutation version"; return false; } @@ -1154,16 +1163,25 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( } TransactionID first_mutation_tid = mutations_begin_it->second.tid; - MergeTreeTransactionPtr txn = tryGetTransactionForMutation(mutations_begin_it->second, log); - assert(txn || first_mutation_tid.isPrehistoric()); + MergeTreeTransactionPtr txn; - if (txn) + if (!first_mutation_tid.isPrehistoric()) { + /// Mutate visible parts only /// NOTE Do not mutate visible parts in Outdated state, because it does not make sense: /// mutation will fail anyway due to serialization error. - if (!part->version.isVisible(*txn)) + + /// It's possible that both mutation and transaction are already finished, + /// because that part should not be mutated because it was not visible for that transaction. + if (!part->version.isVisible(first_mutation_tid.start_csn, first_mutation_tid)) continue; + + txn = tryGetTransactionForMutation(mutations_begin_it->second, log); + if (!txn) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find transaction {} that has started mutation {} " + "that is going to be applied to part {}", + first_mutation_tid, mutations_begin_it->second.file_name, part->name); } auto commands = std::make_shared(); @@ -1370,8 +1388,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign cleared_count += clearOldWriteAheadLogs(); cleared_count += clearOldMutations(); cleared_count += clearEmptyParts(); - if (getSettings()->merge_tree_enable_clear_old_broken_detached) - cleared_count += clearOldBrokenPartsFromDetachedDirectory(); + cleared_count += clearOldBrokenPartsFromDetachedDirectory(); return cleared_count; /// TODO maybe take into account number of cleared objects when calculating backoff }, common_assignee_trigger, getStorageID()), /* need_trigger */ false); @@ -1644,11 +1661,7 @@ struct FutureNewEmptyPart MergeTreePartition partition; std::string part_name; - scope_guard tmp_dir_guard; - StorageMergeTree::MutableDataPartPtr data_part; - - std::string getDirName() const { return StorageMergeTree::EMPTY_PART_TMP_PREFIX + part_name; } }; using FutureNewEmptyParts = std::vector; @@ -1679,19 +1692,19 @@ FutureNewEmptyParts initCoverageWithNewEmptyParts(const DataPartsVector & old_pa return future_parts; } -StorageMergeTree::MutableDataPartsVector createEmptyDataParts(MergeTreeData & data, FutureNewEmptyParts & future_parts, const MergeTreeTransactionPtr & txn) +std::pair> createEmptyDataParts( + MergeTreeData & data, FutureNewEmptyParts & future_parts, const MergeTreeTransactionPtr & txn) { - StorageMergeTree::MutableDataPartsVector data_parts; + std::pair> data_parts; for (auto & part: future_parts) - data_parts.push_back(data.createEmptyPart(part.part_info, part.partition, part.part_name, txn)); + { + auto [new_data_part, tmp_dir_holder] = data.createEmptyPart(part.part_info, part.partition, part.part_name, txn); + data_parts.first.emplace_back(std::move(new_data_part)); + data_parts.second.emplace_back(std::move(tmp_dir_holder)); + } return data_parts; } -void captureTmpDirectoryHolders(MergeTreeData & data, FutureNewEmptyParts & future_parts) -{ - for (auto & part : future_parts) - part.tmp_dir_guard = data.getTemporaryPartDirectoryHolder(part.getDirName()); -} void StorageMergeTree::renameAndCommitEmptyParts(MutableDataPartsVector & new_parts, Transaction & transaction) { @@ -1729,15 +1742,24 @@ void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont { /// Asks to complete merges and does not allow them to start. /// This protects against "revival" of data for a removed partition after completion of merge. - auto merge_blocker = stopMergesAndWait(); waitForOutdatedPartsToBeLoaded(); + auto merge_blocker = stopMergesAndWait(); Stopwatch watch; ProfileEventsScope profile_events_scope; auto txn = query_context->getCurrentTransaction(); - MergeTreeData::Transaction transaction(*this, txn.get()); + if (txn) { + auto data_parts_lock = lockParts(); + auto parts_to_remove = getVisibleDataPartsVectorUnlocked(query_context, data_parts_lock); + removePartsFromWorkingSet(txn.get(), parts_to_remove, true, data_parts_lock); + LOG_INFO(log, "Removed {} parts: [{}]", parts_to_remove.size(), fmt::join(getPartsNames(parts_to_remove), ", ")); + } + else + { + MergeTreeData::Transaction transaction(*this, txn.get()); + auto operation_data_parts_lock = lockOperationsWithParts(); auto parts = getVisibleDataPartsVector(query_context); @@ -1749,9 +1771,7 @@ void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "), transaction.getTID()); - captureTmpDirectoryHolders(*this, future_parts); - - auto new_data_parts = createEmptyDataParts(*this, future_parts, txn); + auto [new_data_parts, tmp_dir_holders] = createEmptyDataParts(*this, future_parts, txn); renameAndCommitEmptyParts(new_data_parts, transaction); PartLog::addNewParts(query_context, PartLog::createPartLogEntries(new_data_parts, watch.elapsed(), profile_events_scope.getSnapshot())); @@ -1781,8 +1801,15 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt /// It's important to create it outside of lock scope because /// otherwise it can lock parts in destructor and deadlock is possible. auto txn = query_context->getCurrentTransaction(); - MergeTreeData::Transaction transaction(*this, txn.get()); + if (txn) { + if (auto part = outdatePart(txn.get(), part_name, /*force=*/ true)) + dropPartsImpl({part}, detach); + } + else + { + MergeTreeData::Transaction transaction(*this, txn.get()); + auto operation_data_parts_lock = lockOperationsWithParts(); auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active}); @@ -1792,8 +1819,10 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt if (detach) { auto metadata_snapshot = getInMemoryMetadataPtr(); - LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); - part->makeCloneInDetached("", metadata_snapshot); + String part_dir = part->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } { @@ -1803,9 +1832,7 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames({part}), ", "), transaction.getTID()); - captureTmpDirectoryHolders(*this, future_parts); - - auto new_data_parts = createEmptyDataParts(*this, future_parts, txn); + auto [new_data_parts, tmp_dir_holders] = createEmptyDataParts(*this, future_parts, txn); renameAndCommitEmptyParts(new_data_parts, transaction); PartLog::addNewParts(query_context, PartLog::createPartLogEntries(new_data_parts, watch.elapsed(), profile_events_scope.getSnapshot())); @@ -1839,8 +1866,26 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont /// It's important to create it outside of lock scope because /// otherwise it can lock parts in destructor and deadlock is possible. auto txn = query_context->getCurrentTransaction(); - MergeTreeData::Transaction transaction(*this, txn.get()); + if (txn) { + DataPartsVector parts_to_remove; + { + auto data_parts_lock = lockParts(); + if (partition_ast && partition_ast->all) + parts_to_remove = getVisibleDataPartsVectorUnlocked(query_context, data_parts_lock); + else + { + String partition_id = getPartitionIDFromQuery(partition, query_context, &data_parts_lock); + parts_to_remove = getVisibleDataPartsVectorInPartition(query_context, partition_id, data_parts_lock); + } + removePartsFromWorkingSet(txn.get(), parts_to_remove, true, data_parts_lock); + } + dropPartsImpl(std::move(parts_to_remove), detach); + } + else + { + MergeTreeData::Transaction transaction(*this, txn.get()); + auto operation_data_parts_lock = lockOperationsWithParts(); DataPartsVector parts; @@ -1855,12 +1900,16 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont } if (detach) + { for (const auto & part : parts) { auto metadata_snapshot = getInMemoryMetadataPtr(); - LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); - part->makeCloneInDetached("", metadata_snapshot); + String part_dir = part->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } + } auto future_parts = initCoverageWithNewEmptyParts(parts); @@ -1869,9 +1918,8 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "), transaction.getTID()); - captureTmpDirectoryHolders(*this, future_parts); - auto new_data_parts = createEmptyDataParts(*this, future_parts, txn); + auto [new_data_parts, tmp_dir_holders] = createEmptyDataParts(*this, future_parts, txn); renameAndCommitEmptyParts(new_data_parts, transaction); PartLog::addNewParts(query_context, PartLog::createPartLogEntries(new_data_parts, watch.elapsed(), profile_events_scope.getSnapshot())); @@ -1889,6 +1937,35 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont clearEmptyParts(); } +void StorageMergeTree::dropPartsImpl(DataPartsVector && parts_to_remove, bool detach) +{ + auto metadata_snapshot = getInMemoryMetadataPtr(); + + if (detach) + { + /// If DETACH clone parts to detached/ directory + /// NOTE: no race with background cleanup until we hold pointers to parts + for (const auto & part : parts_to_remove) + { + String part_dir = part->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); + } + } + + if (deduplication_log) + { + for (const auto & part : parts_to_remove) + deduplication_log->dropPart(part->info); + } + + if (detach) + LOG_INFO(log, "Detached {} parts: [{}]", parts_to_remove.size(), fmt::join(getPartsNames(parts_to_remove), ", ")); + else + LOG_INFO(log, "Removed {} parts: [{}]", parts_to_remove.size(), fmt::join(getPartsNames(parts_to_remove), ", ")); +} + PartitionCommandsResultInfo StorageMergeTree::attachPartition( const ASTPtr & partition, const StorageMetadataPtr & /* metadata_snapshot */, bool attach_part, ContextPtr local_context) @@ -1920,6 +1997,7 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( renamed_parts.old_and_new_names[i].old_name.clear(); results.push_back(PartitionCommandResultInfo{ + .command_type = "ATTACH_PART", .partition_id = loaded_parts[i]->info.partition_id, .part_name = loaded_parts[i]->name, .old_part_name = old_name, @@ -1929,7 +2007,7 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( } /// New parts with other data may appear in place of deleted parts. - local_context->dropCaches(); + local_context->clearCaches(); return results; } @@ -1965,7 +2043,7 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()}; - auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, my_metadata_snapshot, clone_params); + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, my_metadata_snapshot, clone_params, local_context->getWriteSettings()); dst_parts.emplace_back(std::move(dst_part)); dst_parts_locks.emplace_back(std::move(part_lock)); } @@ -2064,7 +2142,7 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()}; - auto [dst_part, part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, clone_params); + auto [dst_part, part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, clone_params, local_context->getWriteSettings()); dst_parts.emplace_back(std::move(dst_part)); dst_parts_locks.emplace_back(std::move(part_lock)); } @@ -2151,7 +2229,6 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ auto & part_mutable = const_cast(*part); part_mutable.writeChecksums(part->checksums, local_context->getWriteSettings()); - part->checkMetadata(); results.emplace_back(part->name, true, "Checksums recounted and written to disk."); } catch (const Exception & ex) @@ -2165,7 +2242,6 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ try { checkDataPart(part, true); - part->checkMetadata(); results.emplace_back(part->name, true, ""); } catch (const Exception & ex) @@ -2181,6 +2257,7 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ void StorageMergeTree::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) { const auto & backup_settings = backup_entries_collector.getBackupSettings(); + const auto & read_settings = backup_entries_collector.getReadSettings(); auto local_context = backup_entries_collector.getContext(); DataPartsVector data_parts; @@ -2193,7 +2270,7 @@ void StorageMergeTree::backupData(BackupEntriesCollector & backup_entries_collec for (const auto & data_part : data_parts) min_data_version = std::min(min_data_version, data_part->info.getDataVersion() + 1); - auto parts_backup_entries = backupParts(data_parts, data_path_in_backup, backup_settings, local_context); + auto parts_backup_entries = backupParts(data_parts, data_path_in_backup, backup_settings, read_settings, local_context); for (auto & part_backup_entries : parts_backup_entries) backup_entries_collector.addBackupEntries(std::move(part_backup_entries.backup_entries)); diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 936ba1b7f18..89da9ab839e 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -102,7 +102,7 @@ public: void alter(const AlterCommands & commands, ContextPtr context, AlterLockHolder & table_lock_holder) override; - void checkTableCanBeDropped() const override; + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override; ActionLock getActionLock(StorageActionBlockType action_type) override; @@ -237,6 +237,7 @@ private: void dropPartNoWaitNoThrow(const String & part_name) override; void dropPart(const String & part_name, bool detach, ContextPtr context) override; void dropPartition(const ASTPtr & partition, bool detach, ContextPtr context) override; + void dropPartsImpl(DataPartsVector && parts_to_remove, bool detach); PartitionCommandsResultInfo attachPartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, bool part, ContextPtr context) override; void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, ContextPtr context) override; diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 3e928c3a811..76a439eabaf 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -34,16 +35,6 @@ namespace ErrorCodes extern const int UNKNOWN_TABLE; } -static String backQuoteMySQL(const String & x) -{ - String res(x.size(), '\0'); - { - WriteBufferFromString wb(res); - writeBackQuotedStringMySQL(x, wb); - } - return res; -} - StorageMySQL::StorageMySQL( const StorageID & table_id_, mysqlxx::PoolWithFailover && pool_, @@ -113,6 +104,7 @@ Pipe StorageMySQL::read( column_names_, storage_snapshot->metadata->getColumns().getOrdinary(), IdentifierQuotingStyle::BackticksMySQL, + LiteralEscapingStyle::Regular, remote_database_name, remote_table_name, context_); diff --git a/src/Storages/StorageNull.cpp b/src/Storages/StorageNull.cpp index 0ced128c8ef..5e4fde99306 100644 --- a/src/Storages/StorageNull.cpp +++ b/src/Storages/StorageNull.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 3551ee36819..7961c44e844 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -122,7 +122,7 @@ Pipe StoragePostgreSQL::read( query_info_, column_names_, storage_snapshot->metadata->getColumns().getOrdinary(), - IdentifierQuotingStyle::DoubleQuotes, remote_table_schema, remote_table_name, context_); + IdentifierQuotingStyle::DoubleQuotes, LiteralEscapingStyle::PostgreSQL, remote_table_schema, remote_table_name, context_); LOG_TRACE(log, "Query: {}", query); Block sample_block; diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 14b7fc15af2..a4304faeaec 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -139,7 +139,7 @@ public: void startup() override { getNested()->startup(); } void shutdown() override { getNested()->shutdown(); } - void flush() override { getNested()->flush(); } + void flushAndPrepareForShutdown() override { getNested()->flushAndPrepareForShutdown(); } ActionLock getActionLock(StorageActionBlockType action_type) override { return getNested()->getActionLock(action_type); } @@ -149,8 +149,8 @@ public: return getNested()->mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot); } - CheckResults checkData(const ASTPtr & query , ContextPtr context) override { return getNested()->checkData(query, context); } - void checkTableCanBeDropped() const override { getNested()->checkTableCanBeDropped(); } + CheckResults checkData(const ASTPtr & query, ContextPtr context) override { return getNested()->checkData(query, context); } + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override { getNested()->checkTableCanBeDropped(query_context); } bool storesDataOnDisk() const override { return getNested()->storesDataOnDisk(); } Strings getDataPaths() const override { return getNested()->getDataPaths(); } StoragePolicyPtr getStoragePolicy() const override { return getNested()->getStoragePolicy(); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 13c0fb3f7c2..4eda4176cba 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -86,7 +87,6 @@ #include #include #include -#include #include #include #include @@ -185,6 +185,7 @@ namespace ErrorCodes extern const int CHECKSUM_DOESNT_MATCH; extern const int NOT_INITIALIZED; extern const int TOO_LARGE_DISTRIBUTED_DEPTH; + extern const int TABLE_IS_DROPPED; } namespace ActionLocks @@ -195,6 +196,7 @@ namespace ActionLocks extern const StorageActionBlockType ReplicationQueue; extern const StorageActionBlockType PartsTTLMerge; extern const StorageActionBlockType PartsMove; + extern const StorageActionBlockType PullReplicationLog; } @@ -335,6 +337,8 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( mutations_updating_task->deactivate(); + mutations_watch_callback = std::make_shared(mutations_updating_task->getWatchCallback()); + merge_selecting_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mergeSelectingTask)", [this] { mergeSelectingTask(); }); @@ -710,7 +714,7 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() { auto res = future.get(); if (res.error != Coordination::Error::ZOK && res.error != Coordination::Error::ZNODEEXISTS) - throw Coordination::Exception(fmt::format("Failed to create new nodes at {}", zookeeper_path), res.error); + throw Coordination::Exception(res.error, "Failed to create new nodes at {}", zookeeper_path); } } @@ -746,7 +750,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr } else if (code != Coordination::Error::ZOK) { - throw Coordination::Exception(code, drop_lock_path); + throw Coordination::Exception::fromPath(code, drop_lock_path); } else { @@ -977,6 +981,16 @@ void StorageReplicatedMergeTree::drop() { /// Session could expire, get it again zookeeper = getZooKeeperIfTableShutDown(); + + auto lost_part_count_path = fs::path(zookeeper_path) / "lost_part_count"; + Coordination::Stat lost_part_count_stat; + String lost_part_count_str; + if (zookeeper->tryGet(lost_part_count_path, lost_part_count_str, &lost_part_count_stat)) + { + UInt64 lost_part_count = lost_part_count_str.empty() ? 0 : parse(lost_part_count_str); + if (lost_part_count > 0) + LOG_INFO(log, "Dropping table with non-zero lost_part_count equal to {}", lost_part_count); + } dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings(), &has_metadata_in_zookeeper); } } @@ -1273,10 +1287,6 @@ void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart( if (!paranoid_check_for_covered_parts) return; - /// FIXME https://github.com/ClickHouse/ClickHouse/issues/51182 - if (getSettings()->use_metadata_cache) - return; - ActiveDataPartSet active_set(format_version); for (const auto & part_name : parts_in_zk) active_set.add(part_name); @@ -1588,6 +1598,7 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd while (true) { LOG_DEBUG(log, "Committing part {} to zookeeper", part->name); + Coordination::Requests ops; NameSet absent_part_paths_on_replicas; @@ -2018,7 +2029,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::executeFetchShared } } -static void paranoidCheckForCoveredPartsInZooKeeper(const StorageReplicatedMergeTree * storage, const ZooKeeperPtr & zookeeper, const String & replica_path, +static void paranoidCheckForCoveredPartsInZooKeeper(const ZooKeeperPtr & zookeeper, const String & replica_path, MergeTreeDataFormatVersion format_version, const String & covering_part_name) { #ifdef ABORT_ON_LOGICAL_ERROR @@ -2032,10 +2043,6 @@ static void paranoidCheckForCoveredPartsInZooKeeper(const StorageReplicatedMerge if (!paranoid_check_for_covered_parts) return; - /// FIXME https://github.com/ClickHouse/ClickHouse/issues/51182 - if (storage->getSettings()->use_metadata_cache) - return; - auto drop_range_info = MergeTreePartInfo::fromPartName(covering_part_name, format_version); Strings parts_remain = zookeeper->getChildren(replica_path + "/parts"); for (const auto & part_name : parts_remain) @@ -2094,15 +2101,17 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) { if (auto part_to_detach = part.getPartIfItWasActive()) { - LOG_INFO(log, "Detaching {}", part_to_detach->getDataPartStorage().getPartDirectory()); - part_to_detach->makeCloneInDetached("", metadata_snapshot); + String part_dir = part_to_detach->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + part_to_detach->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } } } /// Forcibly remove parts from ZooKeeper removePartsFromZooKeeperWithRetries(parts_to_remove); - paranoidCheckForCoveredPartsInZooKeeper(this, getZooKeeper(), replica_path, format_version, entry.new_part_name); + paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry.new_part_name); if (entry.detach) LOG_DEBUG(log, "Detached {} parts inside {}.", parts_to_remove.size(), entry.new_part_name); @@ -2239,7 +2248,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) LOG_INFO(log, "All parts from REPLACE PARTITION command have been already attached"); removePartsFromZooKeeperWithRetries(parts_to_remove); if (replace) - paranoidCheckForCoveredPartsInZooKeeper(this, getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name); + paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name); return true; } @@ -2458,7 +2467,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( - part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info, metadata_snapshot, clone_params); + part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info, metadata_snapshot, clone_params, getContext()->getWriteSettings()); part_desc->res_part = std::move(res_part); part_desc->temporary_part_lock = std::move(temporary_part_lock); } @@ -2513,7 +2522,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) renameTempPartAndReplace(part_desc->res_part, transaction); getCommitPartOps(ops, part_desc->res_part); - lockSharedData(*part_desc->res_part, false, part_desc->hardlinked_files); + lockSharedData(*part_desc->res_part, /* replace_existing_lock */ true, part_desc->hardlinked_files); } @@ -2552,7 +2561,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) removePartsFromZooKeeperWithRetries(parts_to_remove); if (replace) - paranoidCheckForCoveredPartsInZooKeeper(this, getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name); + paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name); res_parts.clear(); parts_to_remove.clear(); cleanup_thread.wakeup(); @@ -2825,7 +2834,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo for (const auto & part : parts_to_remove_from_working_set) { LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); - part->makeCloneInDetached("clone", metadata_snapshot); + part->makeCloneInDetached("clone", metadata_snapshot, /*disk_transaction*/ {}); } } @@ -3212,7 +3221,7 @@ void StorageReplicatedMergeTree::mutationsUpdatingTask() { try { - queue.updateMutations(getZooKeeper(), mutations_updating_task->getWatchCallback()); + queue.updateMutations(getZooKeeper(), mutations_watch_callback); } catch (const Coordination::Exception & e) { @@ -3309,7 +3318,7 @@ bool StorageReplicatedMergeTree::scheduleDataProcessingJob(BackgroundJobsAssigne auto job_type = selected_entry->log_entry->type; /// Depending on entry type execute in fetches (small) pool or big merge_mutate pool - if (job_type == LogEntry::GET_PART) + if (job_type == LogEntry::GET_PART || job_type == LogEntry::ATTACH_PART) { assignee.scheduleFetchTask(std::make_shared( [this, selected_entry] () mutable @@ -3791,12 +3800,12 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n chassert(!broken_part); chassert(!storage_init); part->was_removed_as_broken = true; - part->makeCloneInDetached("broken", getInMemoryMetadataPtr()); + part->makeCloneInDetached("broken", getInMemoryMetadataPtr(), /*disk_transaction*/ {}); broken_part = part; } else { - part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr()); + part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr(), /*disk_transaction*/ {}); } detached_parts.push_back(part->name); } @@ -3921,7 +3930,10 @@ void StorageReplicatedMergeTree::startBeingLeader() void StorageReplicatedMergeTree::stopBeingLeader() { if (!is_leader) + { + LOG_TRACE(log, "stopBeingLeader called but we are not a leader already"); return; + } LOG_INFO(log, "Stopped being leader"); is_leader = false; @@ -3978,6 +3990,153 @@ String StorageReplicatedMergeTree::findReplicaHavingPart(const String & part_nam return {}; } +void StorageReplicatedMergeTree::addLastSentPart(const MergeTreePartInfo & info) +{ + { + std::lock_guard lock(last_sent_parts_mutex); + last_sent_parts.emplace_back(info); + static constexpr size_t LAST_SENT_PARS_WINDOW_SIZE = 1000; + while (last_sent_parts.size() > LAST_SENT_PARS_WINDOW_SIZE) + last_sent_parts.pop_front(); + } + + last_sent_parts_cv.notify_all(); +} + +void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(StorageReplicatedMergeTree::ShutdownDeadline shutdown_deadline_) +{ + /// Will be true in case in case of query + if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr) + { + LOG_TRACE(log, "Will not wait for unique parts to be fetched by other replicas because shutdown called from DROP/DETACH query"); + return; + } + + if (!shutdown_called.load()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Called waitForUniquePartsToBeFetchedByOtherReplicas before shutdown, it's a bug"); + + auto settings_ptr = getSettings(); + + auto wait_ms = settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds(); + if (wait_ms == 0) + { + LOG_INFO(log, "Will not wait for unique parts to be fetched by other replicas because wait time is zero"); + return; + } + + if (shutdown_deadline_ <= std::chrono::system_clock::now()) + { + LOG_INFO(log, "Will not wait for unique parts to be fetched by other replicas because shutdown_deadline already passed"); + return; + } + + auto zookeeper = getZooKeeperIfTableShutDown(); + + auto unique_parts_set = findReplicaUniqueParts(replica_name, zookeeper_path, format_version, zookeeper, log); + if (unique_parts_set.empty()) + { + LOG_INFO(log, "Will not wait for unique parts to be fetched because we don't have any unique parts"); + return; + } + else + { + LOG_INFO(log, "Will wait for {} unique parts to be fetched", unique_parts_set.size()); + } + + auto wait_predicate = [&] () -> bool + { + for (auto it = unique_parts_set.begin(); it != unique_parts_set.end();) + { + const auto & part = *it; + + bool found = false; + for (const auto & sent_part : last_sent_parts | std::views::reverse) + { + if (sent_part.contains(part)) + { + LOG_TRACE(log, "Part {} was fetched by some replica", part.getPartNameForLogs()); + found = true; + it = unique_parts_set.erase(it); + break; + } + } + if (!found) + break; + } + return unique_parts_set.empty(); + }; + + std::unique_lock lock(last_sent_parts_mutex); + if (!last_sent_parts_cv.wait_until(lock, shutdown_deadline_, wait_predicate)) + LOG_INFO(log, "Failed to wait for unique parts to be fetched in {} ms, {} parts can be left on this replica", wait_ms, unique_parts_set.size()); + else + LOG_INFO(log, "Successfully waited all the parts"); +} + +std::set StorageReplicatedMergeTree::findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_) +{ + if (!zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica_name_ / "is_active")) + { + LOG_INFO(log_, "Our replica is not active, nobody will try to fetch anything"); + return {}; + } + + Strings replicas = zookeeper_->getChildren(fs::path(zookeeper_path_) / "replicas"); + Strings our_parts; + std::vector data_parts_on_replicas; + for (const String & replica : replicas) + { + if (!zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica / "is_active")) + { + LOG_TRACE(log_, "Replica {} is not active, skipping", replica); + continue; + } + + Strings parts = zookeeper_->getChildren(fs::path(zookeeper_path_) / "replicas" / replica / "parts"); + if (replica == replica_name_) + { + LOG_TRACE(log_, "Our replica parts collected {}", replica); + our_parts = parts; + } + else + { + LOG_TRACE(log_, "Fetching parts for replica {}: [{}]", replica, fmt::join(parts, ", ")); + data_parts_on_replicas.emplace_back(format_version_, parts); + } + } + + if (data_parts_on_replicas.empty()) + { + LOG_TRACE(log_, "Has no active replicas, will no try to wait for fetch"); + return {}; + } + + std::set our_unique_parts; + for (const auto & part : our_parts) + { + bool found = false; + for (const auto & active_parts_set : data_parts_on_replicas) + { + if (!active_parts_set.getContainingPart(part).empty()) + { + found = true; + break; + } + } + + if (!found) + { + LOG_TRACE(log_, "Part not {} found on other replicas", part); + our_unique_parts.emplace(MergeTreePartInfo::fromPartName(part, format_version_)); + } + } + + if (!our_parts.empty() && our_unique_parts.empty()) + LOG_TRACE(log_, "All parts found on replicas"); + + return our_unique_parts; +} + String StorageReplicatedMergeTree::findReplicaHavingCoveringPart(LogEntry & entry, bool active) { auto zookeeper = getZooKeeper(); @@ -4154,7 +4313,7 @@ void StorageReplicatedMergeTree::updateQuorum(const String & part_name, bool is_ continue; } else - throw Coordination::Exception(code, quorum_status_path); + throw Coordination::Exception::fromPath(code, quorum_status_path); } else { @@ -4178,7 +4337,7 @@ void StorageReplicatedMergeTree::updateQuorum(const String & part_name, bool is_ continue; } else - throw Coordination::Exception(code, quorum_status_path); + throw Coordination::Exception::fromPath(code, quorum_status_path); } } } @@ -4188,7 +4347,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) { auto zookeeper = getZooKeeper(); - LOG_DEBUG(log, "Cleaning up last parent node for partition {}", partition_id); + LOG_DEBUG(log, "Cleaning up last part node for partition {}", partition_id); /// The name of the previous part for which the quorum was reached. const String quorum_last_part_path = fs::path(zookeeper_path) / "quorum" / "last_part"; @@ -4209,6 +4368,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) if (!parts_with_quorum.added_parts.contains(partition_id)) { /// There is no information about interested part. + LOG_TEST(log, "There is no information about the partition"); break; } @@ -4226,6 +4386,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) else if (code == Coordination::Error::ZNONODE) { /// Node is deleted. It is impossible, but it is Ok. + LOG_WARNING(log, "The last part node {} was deleted", quorum_last_part_path); break; } else if (code == Coordination::Error::ZBADVERSION) @@ -4234,7 +4395,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) continue; } else - throw Coordination::Exception(code, quorum_last_part_path); + throw Coordination::Exception::fromPath(code, quorum_last_part_path); } } @@ -4401,7 +4562,7 @@ bool StorageReplicatedMergeTree::fetchPart( { chassert(!is_zero_copy_part(part_to_clone)); IDataPartStorage::ClonePartParams clone_params{ .keep_metadata_version = true }; - auto [cloned_part, lock] = cloneAndLoadDataPartOnSameDisk(part_to_clone, "tmp_clone_", part_info, metadata_snapshot, clone_params); + auto [cloned_part, lock] = cloneAndLoadDataPartOnSameDisk(part_to_clone, "tmp_clone_", part_info, metadata_snapshot, clone_params, getContext()->getWriteSettings()); part_directory_lock = std::move(lock); return cloned_part; }; @@ -4637,6 +4798,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::fetchExistsPart( void StorageReplicatedMergeTree::startup() { + LOG_TRACE(log, "Starting up table"); startOutdatedDataPartsLoadingTask(); if (attach_thread) { @@ -4658,6 +4820,8 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) since_metadata_err_incr_readonly_metric = true; CurrentMetrics::add(CurrentMetrics::ReadonlyReplica); } + + LOG_TRACE(log, "No connection to ZooKeeper or no metadata in ZooKeeper, will not startup"); return; } @@ -4692,6 +4856,7 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) if (from_attach_thread) { + LOG_TRACE(log, "Trying to startup table from right now"); /// Try activating replica in current thread. restarting_thread.run(); } @@ -4701,9 +4866,18 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) /// NOTE It does not mean that replication is actually started after receiving this event. /// It only means that an attempt to startup replication was made. /// Table may be still in readonly mode if this attempt failed for any reason. - startup_event.wait(); + while (!startup_event.tryWait(10 * 1000)) + LOG_TRACE(log, "Waiting for RestartingThread to startup table"); } + auto lock = std::unique_lock(flush_and_shutdown_mutex, std::defer_lock); + do + { + if (shutdown_prepared_called.load() || shutdown_called.load()) + throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Cannot startup table because it is dropped"); + } + while (!lock.try_lock()); + /// And this is just a callback session_expired_callback_handler = EventNotifier::instance().subscribe(Coordination::Error::ZSESSIONEXPIRED, [this]() { @@ -4727,6 +4901,16 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) if (from_attach_thread) { restarting_thread.shutdown(/* part_of_full_shutdown */false); + + auto data_parts_exchange_ptr = std::atomic_exchange(&data_parts_exchange_endpoint, InterserverIOEndpointPtr{}); + if (data_parts_exchange_ptr) + { + getContext()->getInterserverIOHandler().removeEndpointIfExists(data_parts_exchange_ptr->getId(getEndpointName())); + /// Ask all parts exchange handlers to finish asap. New ones will fail to start + data_parts_exchange_ptr->blocker.cancelForever(); + /// Wait for all of them + std::lock_guard lock(data_parts_exchange_ptr->rwlock); + } } else { @@ -4744,6 +4928,37 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) } +void StorageReplicatedMergeTree::flushAndPrepareForShutdown() +{ + std::lock_guard lock{flush_and_shutdown_mutex}; + if (shutdown_prepared_called.exchange(true)) + return; + + try + { + auto settings_ptr = getSettings(); + /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. + fetcher.blocker.cancelForever(); + merger_mutator.merges_blocker.cancelForever(); + parts_mover.moves_blocker.cancelForever(); + stopBeingLeader(); + + if (attach_thread) + attach_thread->shutdown(); + + restarting_thread.shutdown(/* part_of_full_shutdown */true); + /// Explicitly set the event, because the restarting thread will not set it again + startup_event.set(); + shutdown_deadline.emplace(std::chrono::system_clock::now() + std::chrono::milliseconds(settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds())); + } + catch (...) + { + /// Don't wait anything in case of improper prepare for shutdown + shutdown_deadline.emplace(std::chrono::system_clock::now()); + throw; + } +} + void StorageReplicatedMergeTree::partialShutdown() { ProfileEvents::increment(ProfileEvents::ReplicaPartialShutdown); @@ -4779,21 +4994,28 @@ void StorageReplicatedMergeTree::shutdown() if (shutdown_called.exchange(true)) return; + flushAndPrepareForShutdown(); + + if (!shutdown_deadline.has_value()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Shutdown deadline is not set in shutdown"); + + try + { + waitForUniquePartsToBeFetchedByOtherReplicas(*shutdown_deadline); + } + catch (const Exception & ex) + { + if (ex.code() == ErrorCodes::LOGICAL_ERROR) + throw; + + tryLogCurrentException(log, __PRETTY_FUNCTION__); + } + session_expired_callback_handler.reset(); stopOutdatedDataPartsLoadingTask(); - /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. - fetcher.blocker.cancelForever(); - merger_mutator.merges_blocker.cancelForever(); - parts_mover.moves_blocker.cancelForever(); - mutations_finalizing_task->deactivate(); - stopBeingLeader(); + partialShutdown(); - if (attach_thread) - attach_thread->shutdown(); - - restarting_thread.shutdown(/* part_of_full_shutdown */true); - background_operations_assignee.finish(); part_moves_between_shards_orchestrator.shutdown(); { @@ -4902,67 +5124,107 @@ void StorageReplicatedMergeTree::read( snapshot_data.alter_conversions = {}; }); - /** The `select_sequential_consistency` setting has two meanings: - * 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas. - * 2. Do not read parts that have not yet been written to the quorum of the replicas. - * For this you have to synchronously go to ZooKeeper. - */ - if (local_context->getSettingsRef().select_sequential_consistency) - { - auto max_added_blocks = std::make_shared(getMaxAddedBlocks()); - if (auto plan = reader.read( - column_names, storage_snapshot, query_info, local_context, - max_block_size, num_streams, processed_stage, std::move(max_added_blocks), /*enable_parallel_reading*/false)) - query_plan = std::move(*plan); - return; - } + const auto & settings = local_context->getSettingsRef(); + + /// The `select_sequential_consistency` setting has two meanings: + /// 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas. + /// 2. Do not read parts that have not yet been written to the quorum of the replicas. + /// For this you have to synchronously go to ZooKeeper. + if (settings.select_sequential_consistency) + return readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); if (local_context->canUseParallelReplicasOnInitiator()) + return readParallelReplicasImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); + + readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); +} + +void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) +{ + auto max_added_blocks = std::make_shared(getMaxAddedBlocks()); + auto plan = reader.read(column_names, storage_snapshot, query_info, local_context, + max_block_size, num_streams, processed_stage, std::move(max_added_blocks), + /* enable_parallel_reading= */false); + if (plan) + query_plan = std::move(*plan); +} + +void StorageReplicatedMergeTree::readParallelReplicasImpl( + QueryPlan & query_plan, + const Names & /*column_names*/, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + const size_t /*max_block_size*/, + const size_t /*num_streams*/) +{ + auto table_id = getStorageID(); + + auto scalars = local_context->hasQueryContext() ? local_context->getQueryContext()->getScalars() : Scalars{}; + String cluster_for_parallel_replicas = local_context->getSettingsRef().cluster_for_parallel_replicas; + auto parallel_replicas_cluster = local_context->getCluster(cluster_for_parallel_replicas); + + ASTPtr modified_query_ast; + Block header; + if (local_context->getSettingsRef().allow_experimental_analyzer) { - auto table_id = getStorageID(); + auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree); - ASTPtr modified_query_ast; - - Block header; - - if (local_context->getSettingsRef().allow_experimental_analyzer) - { - auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree); - - header = InterpreterSelectQueryAnalyzer::getSampleBlock( - modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); - modified_query_ast = queryNodeToSelectQuery(modified_query_tree); - } - else - { - modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, - table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); - header - = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); - } - - auto cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); - - ClusterProxy::SelectStreamFactory select_stream_factory = - ClusterProxy::SelectStreamFactory( - header, - {}, - storage_snapshot, - processed_stage); - - ClusterProxy::executeQueryWithParallelReplicas( - query_plan, getStorageID(), /*remove_table_function_ptr*/ nullptr, - select_stream_factory, modified_query_ast, - local_context, query_info, cluster); + header = InterpreterSelectQueryAnalyzer::getSampleBlock( + modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); + modified_query_ast = queryNodeToSelectQuery(modified_query_tree); } else { - if (auto plan = reader.read( - column_names, storage_snapshot, query_info, - local_context, max_block_size, num_streams, - processed_stage, nullptr, /*enable_parallel_reading*/local_context->canUseParallelReplicasOnFollower())) - query_plan = std::move(*plan); + modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, + table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); + header + = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } + + ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory( + header, + {}, + storage_snapshot, + processed_stage); + + ClusterProxy::executeQueryWithParallelReplicas( + query_plan, + getStorageID(), + select_stream_factory, + modified_query_ast, + local_context, + query_info.storage_limits, + parallel_replicas_cluster); +} + +void StorageReplicatedMergeTree::readLocalImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + const size_t max_block_size, + const size_t num_streams) +{ + auto plan = reader.read( + column_names, storage_snapshot, query_info, + local_context, max_block_size, num_streams, + processed_stage, + /* max_block_numbers_to_read= */ nullptr, + /* enable_parallel_reading= */ local_context->canUseParallelReplicasOnFollower()); + if (plan) + query_plan = std::move(*plan); } template @@ -5456,6 +5718,17 @@ void StorageReplicatedMergeTree::alter( return; } + if (commands.isCommentAlter()) + { + StorageInMemoryMetadata future_metadata = getInMemoryMetadata(); + commands.apply(future_metadata, query_context); + + setInMemoryMetadata(future_metadata); + + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, future_metadata); + return; + } + auto ast_to_str = [](ASTPtr query) -> String { if (!query) @@ -5525,12 +5798,27 @@ void StorageReplicatedMergeTree::alter( String new_columns_str = future_metadata.columns.toString(); ops.emplace_back(zkutil::makeSetRequest(fs::path(zookeeper_path) / "columns", new_columns_str, -1)); - if (ast_to_str(current_metadata->settings_changes) != ast_to_str(future_metadata.settings_changes)) + bool settings_are_changed = (ast_to_str(current_metadata->settings_changes) != ast_to_str(future_metadata.settings_changes)); + bool comment_is_changed = (current_metadata->comment != future_metadata.comment); + + if (settings_are_changed || comment_is_changed) { - /// Just change settings StorageInMemoryMetadata metadata_copy = *current_metadata; - metadata_copy.settings_changes = future_metadata.settings_changes; - changeSettings(metadata_copy.settings_changes, table_lock_holder); + + if (settings_are_changed) + { + /// Just change settings + metadata_copy.settings_changes = future_metadata.settings_changes; + changeSettings(metadata_copy.settings_changes, table_lock_holder); + } + + /// The comment is not replicated as of today, but we can implement it later. + if (comment_is_changed) + { + metadata_copy.setComment(future_metadata.comment); + setInMemoryMetadata(metadata_copy); + } + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, metadata_copy); } @@ -5647,7 +5935,7 @@ void StorageReplicatedMergeTree::alter( } else { - throw Coordination::Exception("Alter cannot be assigned because of Zookeeper error", rc); + throw Coordination::Exception::fromMessage(rc, "Alter cannot be assigned because of Zookeeper error"); } } @@ -5892,8 +6180,9 @@ PartitionCommandsResultInfo StorageReplicatedMergeTree::attachPartition( MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, query_context, renamed_parts); /// TODO Allow to use quorum here. - ReplicatedMergeTreeSink output(*this, metadata_snapshot, 0, 0, 0, false, false, false, query_context, - /*is_attach*/true); + ReplicatedMergeTreeSink output(*this, metadata_snapshot, /* quorum */ 0, /* quorum_timeout_ms */ 0, /* max_parts_per_block */ 0, + /* quorum_parallel */ false, query_context->getSettingsRef().insert_deduplicate, + /* majority_quorum */ false, query_context, /*is_attach*/true); for (size_t i = 0; i < loaded_parts.size(); ++i) { @@ -5906,6 +6195,7 @@ PartitionCommandsResultInfo StorageReplicatedMergeTree::attachPartition( LOG_DEBUG(log, "Attached part {} as {}", old_name, loaded_parts[i]->name); results.push_back(PartitionCommandResultInfo{ + .command_type = "ATTACH PART", .partition_id = loaded_parts[i]->info.partition_id, .part_name = loaded_parts[i]->name, .old_part_name = old_name, @@ -5915,7 +6205,7 @@ PartitionCommandsResultInfo StorageReplicatedMergeTree::attachPartition( } -void StorageReplicatedMergeTree::checkTableCanBeDropped() const +void StorageReplicatedMergeTree::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const { auto table_id = getStorageID(); getContext()->checkTableCanBeDropped(table_id.database_name, table_id.table_name, getTotalActiveSizeInBytes()); @@ -6132,7 +6422,7 @@ bool StorageReplicatedMergeTree::tryWaitForReplicaToProcessLogEntry( const auto & stop_waiting = [&]() { - bool stop_waiting_itself = waiting_itself && partial_shutdown_called; + bool stop_waiting_itself = waiting_itself && (partial_shutdown_called || shutdown_prepared_called || shutdown_called); bool timeout_exceeded = check_timeout && wait_for_inactive_timeout < time_waiting.elapsedSeconds(); bool stop_waiting_inactive = (!wait_for_inactive || timeout_exceeded) && !getZooKeeper()->exists(fs::path(table_zookeeper_path) / "replicas" / replica / "is_active"); @@ -6534,7 +6824,7 @@ void StorageReplicatedMergeTree::fetchPartition( try { - /// part name , metadata, part_path , true, 0, zookeeper + /// part name, metadata, part_path, true, 0, zookeeper if (!fetchPart(part_name, metadata_snapshot, from_zookeeper_name, part_path, true, 0, zookeeper, /* try_fetch_shared = */ false)) throw Exception(ErrorCodes::UNFINISHED, "Failed to fetch part {} from {}", part_name, from_); } @@ -6808,7 +7098,7 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, Conte continue; } else - throw Coordination::Exception("Unable to create a mutation znode", rc); + throw Coordination::Exception::fromMessage(rc, "Unable to create a mutation znode"); } merge_selecting_task->schedule(); @@ -7320,6 +7610,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); + std::set replaced_parts; for (const auto & src_part : src_all_parts) { /// We also make some kind of deduplication to avoid duplicated parts in case of ATTACH PARTITION @@ -7332,13 +7623,15 @@ void StorageReplicatedMergeTree::replacePartitionFrom( "' has inconsistent granularity with table", partition_id, src_part->name); String hash_hex = src_part->checksums.getTotalChecksumHex(); + const bool is_duplicated_part = replaced_parts.contains(hash_hex); + replaced_parts.insert(hash_hex); if (replace) LOG_INFO(log, "Trying to replace {} with hash_hex {}", src_part->name, hash_hex); else LOG_INFO(log, "Trying to attach {} with hash_hex {}", src_part->name, hash_hex); - String block_id_path = replace ? "" : (fs::path(zookeeper_path) / "blocks" / (partition_id + "_replace_from_" + hash_hex)); + String block_id_path = (replace || is_duplicated_part) ? "" : (fs::path(zookeeper_path) / "blocks" / (partition_id + "_replace_from_" + hash_hex)); auto lock = allocateBlockNumber(partition_id, zookeeper, block_id_path); if (!lock) @@ -7357,7 +7650,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( .copy_instead_of_hardlink = zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport(), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; - auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, metadata_snapshot, clone_params); + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, metadata_snapshot, clone_params, query_context->getWriteSettings()); src_parts.emplace_back(src_part); dst_parts.emplace_back(dst_part); dst_parts_locks.emplace_back(std::move(part_lock)); @@ -7597,7 +7890,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta .copy_instead_of_hardlink = zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport(), .metadata_version_to_write = dest_metadata_snapshot->getMetadataVersion() }; - auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, clone_params); + auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, clone_params, query_context->getWriteSettings()); src_parts.emplace_back(src_part); dst_parts.emplace_back(dst_part); @@ -7931,6 +8224,9 @@ ActionLock StorageReplicatedMergeTree::getActionLock(StorageActionBlockType acti if (action_type == ActionLocks::PartsMove) return parts_mover.moves_blocker.cancel(); + if (action_type == ActionLocks::PullReplicationLog) + return queue.pull_log_blocker.cancel(); + return {}; } @@ -8428,7 +8724,7 @@ void StorageReplicatedMergeTree::createTableSharedID() const } else if (code != Coordination::Error::ZOK) { - throw zkutil::KeeperException(code, zookeeper_table_id_path); + throw zkutil::KeeperException::fromPath(code, zookeeper_table_id_path); } } @@ -8536,6 +8832,14 @@ void StorageReplicatedMergeTree::getLockSharedDataOps( { String zookeeper_node = fs::path(zc_zookeeper_path) / id / replica_name; + if (!path_to_set_hardlinked_files.empty() && !hardlinks.empty()) + { + LOG_DEBUG(log, "Locking shared node {} with hardlinks from the other shared node {}, " + "hardlinks: [{}]", + zookeeper_node, path_to_set_hardlinked_files, + boost::algorithm::join(hardlinks, ",")); + } + getZeroCopyLockNodeCreateOps( zookeeper, zookeeper_node, requests, zkutil::CreateMode::Persistent, replace_existing_lock, path_to_set_hardlinked_files, hardlinks); @@ -8871,7 +9175,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } else { - throw zkutil::KeeperException(ec, zookeeper_part_replica_node); + throw zkutil::KeeperException::fromPath(ec, zookeeper_part_replica_node); } } @@ -8906,7 +9210,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } else { - throw zkutil::KeeperException(error_code, zookeeper_part_uniq_node); + throw zkutil::KeeperException::fromPath(error_code, zookeeper_part_uniq_node); } @@ -8938,7 +9242,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } else { - throw zkutil::KeeperException(error_code, zookeeper_part_uniq_node); + throw zkutil::KeeperException::fromPath(error_code, zookeeper_part_uniq_node); } } else @@ -9265,7 +9569,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP } } - MergeTreeData::MutableDataPartPtr new_data_part = createEmptyPart(new_part_info, partition, lost_part_name, NO_TRANSACTION_PTR); + auto [new_data_part, tmp_dir_holder] = createEmptyPart(new_part_info, partition, lost_part_name, NO_TRANSACTION_PTR); new_data_part->setName(lost_part_name); try @@ -9435,6 +9739,15 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode( /// In rare case other replica can remove path between createAncestors and createIfNotExists /// So we make up to 5 attempts + auto is_ephemeral = [&](const String & node_path) -> bool + { + String dummy_res; + Coordination::Stat node_stat; + if (zookeeper->tryGet(node_path, dummy_res, &node_stat)) + return node_stat.ephemeralOwner; + return false; + }; + bool created = false; for (int attempts = 5; attempts > 0; --attempts) { @@ -9454,6 +9767,9 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode( if (error == Coordination::Error::ZNODEEXISTS) { + if (is_ephemeral(zookeeper_node)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Node {} already exists, but it is ephemeral", zookeeper_node); + size_t failed_op = zkutil::getFailedOpIndex(error, responses); /// Part was locked before, unfortunately it's possible during moves if (ops[failed_op]->getPath() == zookeeper_node) @@ -9582,6 +9898,7 @@ void StorageReplicatedMergeTree::backupData( /// because we need to coordinate them with other replicas (other replicas can have better parts). const auto & backup_settings = backup_entries_collector.getBackupSettings(); + const auto & read_settings = backup_entries_collector.getReadSettings(); auto local_context = backup_entries_collector.getContext(); DataPartsVector data_parts; @@ -9590,7 +9907,7 @@ void StorageReplicatedMergeTree::backupData( else data_parts = getVisibleDataPartsVector(local_context); - auto parts_backup_entries = backupParts(data_parts, /* data_path_in_backup */ "", backup_settings, local_context); + auto parts_backup_entries = backupParts(data_parts, /* data_path_in_backup */ "", backup_settings, read_settings, local_context); auto coordination = backup_entries_collector.getBackupCoordination(); String shared_id = getTableSharedID(); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 1a1b3c3b10c..1f37416f881 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -112,8 +112,35 @@ public: bool need_check_structure); void startup() override; - void shutdown() override; + + /// To many shutdown methods.... + /// + /// Partial shutdown called if we loose connection to zookeeper. + /// Table can also recover after partial shutdown and continue + /// to work. This method can be called regularly. void partialShutdown(); + + /// These two methods are called during final table shutdown (DROP/DETACH/overall server shutdown). + /// The shutdown process is split into two methods to make it more soft and fast. In database shutdown() + /// looks like: + /// for (table : tables) + /// table->flushAndPrepareForShutdown() + /// + /// for (table : tables) + /// table->shutdown() + /// + /// So we stop producing all the parts first for all tables (fast operation). And after we can wait in shutdown() + /// for other replicas to download parts. + /// + /// In flushAndPrepareForShutdown we cancel all part-producing operations: + /// merges, fetches, moves and so on. If it wasn't called before shutdown() -- shutdown() will + /// call it (defensive programming). + void flushAndPrepareForShutdown() override; + /// In shutdown we completely terminate table -- remove + /// is_active node and interserver handler. Also optionally + /// wait until other replicas will download some parts from our replica. + void shutdown() override; + ~StorageReplicatedMergeTree() override; static String getDefaultZooKeeperPath(const Poco::Util::AbstractConfiguration & config); @@ -130,7 +157,7 @@ public: const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, - ContextPtr context, + ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, size_t num_streams) override; @@ -174,7 +201,7 @@ public: bool supportsIndexForIn() const override { return true; } - void checkTableCanBeDropped() const override; + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override; ActionLock getActionLock(StorageActionBlockType action_type) override; @@ -340,6 +367,13 @@ public: /// Get a sequential consistent view of current parts. ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock getMaxAddedBlocks() const; + void addLastSentPart(const MergeTreePartInfo & info); + + /// Wait required amount of milliseconds to give other replicas a chance to + /// download unique parts from our replica + using ShutdownDeadline = std::chrono::time_point; + void waitForUniquePartsToBeFetchedByOtherReplicas(ShutdownDeadline shutdown_deadline); + private: std::atomic_bool are_restoring_replica {false}; @@ -351,7 +385,7 @@ private: friend class ReplicatedMergeTreeSinkImpl; friend class ReplicatedMergeTreePartCheckThread; friend class ReplicatedMergeTreeCleanupThread; - friend class AsyncBlockIDsCache; + friend class AsyncBlockIDsCache; friend class ReplicatedMergeTreeAlterThread; friend class ReplicatedMergeTreeRestartingThread; friend class ReplicatedMergeTreeAttachThread; @@ -444,15 +478,26 @@ private: Poco::Event partial_shutdown_event {false}; /// Poco::Event::EVENT_MANUALRESET std::atomic shutdown_called {false}; - std::atomic flush_called {false}; + std::atomic shutdown_prepared_called {false}; + std::optional shutdown_deadline; + + /// We call flushAndPrepareForShutdown before acquiring DDLGuard, so we can shutdown a table that is being created right now + mutable std::mutex flush_and_shutdown_mutex; + + + mutable std::mutex last_sent_parts_mutex; + std::condition_variable last_sent_parts_cv; + std::deque last_sent_parts; /// Threads. + /// /// A task that keeps track of the updates in the logs of all replicas and loads them into the queue. bool queue_update_in_progress = false; BackgroundSchedulePool::TaskHolder queue_updating_task; BackgroundSchedulePool::TaskHolder mutations_updating_task; + Coordination::WatchCallbackPtr mutations_watch_callback; /// A task that selects parts to merge. BackgroundSchedulePool::TaskHolder merge_selecting_task; @@ -467,7 +512,7 @@ private: /// A thread that removes old parts, log entries, and blocks. ReplicatedMergeTreeCleanupThread cleanup_thread; - AsyncBlockIDsCache async_block_ids_cache; + AsyncBlockIDsCache async_block_ids_cache; /// A thread that checks the data of the parts, as well as the queue of the parts to be checked. ReplicatedMergeTreePartCheckThread part_check_thread; @@ -513,6 +558,36 @@ private: static std::optional distributedWriteFromClusterStorage(const std::shared_ptr & src_storage_cluster, const ASTInsertQuery & query, ContextPtr context); + void readLocalImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams); + + void readLocalSequentialConsistencyImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams); + + void readParallelReplicasImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams); + template void foreachActiveParts(Func && func, bool select_sequential_consistency) const; @@ -699,6 +774,7 @@ private: */ String findReplicaHavingCoveringPart(LogEntry & entry, bool active); String findReplicaHavingCoveringPart(const String & part_name, bool active, String & found_part_name); + static std::set findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_); /** Download the specified part from the specified replica. * If `to_detached`, the part is placed in the `detached` directory. diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 728d61298ec..341d8b3f768 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -25,11 +25,9 @@ #include #include #include -#include #include #include #include -#include #include #include @@ -42,8 +40,10 @@ #include #include +#include #include #include +#include #include @@ -77,6 +77,7 @@ namespace ProfileEvents { extern const Event S3DeleteObjects; extern const Event S3ListObjects; + extern const Event EngineFileLikeReadFiles; } namespace DB @@ -121,23 +122,6 @@ namespace ErrorCodes class IOutputFormat; using OutputFormatPtr = std::shared_ptr; -static void addPathToVirtualColumns(Block & block, const String & path, size_t idx) -{ - if (block.has("_path")) - block.getByName("_path").column->assumeMutableRef().insert(path); - - if (block.has("_file")) - { - auto pos = path.find_last_of('/'); - assert(pos != std::string::npos); - - auto file = path.substr(pos + 1); - block.getByName("_file").column->assumeMutableRef().insert(file); - } - - block.getByName("_idx").column->assumeMutableRef().insert(idx); -} - class StorageS3Source::DisclosedGlobIterator::Impl : WithContext { public: @@ -145,19 +129,21 @@ public: const S3::Client & client_, const S3::URI & globbed_uri_, ASTPtr & query_, - const Block & virtual_header_, + const NamesAndTypesList & virtual_columns_, ContextPtr context_, KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_) + const S3Settings::RequestSettings & request_settings_, + std::function file_progress_callback_) : WithContext(context_) , client(client_.clone()) , globbed_uri(globbed_uri_) , query(query_) - , virtual_header(virtual_header_) + , virtual_columns(virtual_columns_) , read_keys(read_keys_) , request_settings(request_settings_) , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, 1) , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, "ListObjects")) + , file_progress_callback(file_progress_callback_) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); @@ -194,11 +180,6 @@ public: return nextAssumeLocked(); } - size_t getTotalSize() const - { - return total_size.load(std::memory_order_relaxed); - } - ~Impl() { list_objects_pool.wait(); @@ -249,7 +230,7 @@ private: if (!outcome.IsSuccess()) { - throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", + throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", quoteString(request.GetBucket()), quoteString(request.GetPrefix()), backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); } @@ -296,31 +277,26 @@ private: if (!is_initialized) { - createFilterAST(temp_buffer.front().key); + filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(globbed_uri.bucket) / temp_buffer.front().key, getContext()); is_initialized = true; } if (filter_ast) { - auto block = virtual_header.cloneEmpty(); - for (size_t i = 0; i < temp_buffer.size(); ++i) - addPathToVirtualColumns(block, fs::path(globbed_uri.bucket) / temp_buffer[i].key, i); + std::vector paths; + paths.reserve(temp_buffer.size()); + for (const auto & key_with_info : temp_buffer) + paths.push_back(fs::path(globbed_uri.bucket) / key_with_info.key); - VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); - const auto & idxs = typeid_cast(*block.getByName("_idx").column); - - buffer.reserve(block.rows()); - for (UInt64 idx : idxs.getData()) - { - total_size.fetch_add(temp_buffer[idx].info->size, std::memory_order_relaxed); - buffer.emplace_back(std::move(temp_buffer[idx])); - } + VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, query, virtual_columns, getContext(), filter_ast); } - else + + buffer = std::move(temp_buffer); + + if (file_progress_callback) { - buffer = std::move(temp_buffer); for (const auto & [_, info] : buffer) - total_size.fetch_add(info->size, std::memory_order_relaxed); + file_progress_callback(FileProgress(0, info->size)); } /// Set iterator only after the whole batch is processed @@ -330,20 +306,6 @@ private: read_keys->insert(read_keys->end(), buffer.begin(), buffer.end()); } - void createFilterAST(const String & any_key) - { - if (!query || !virtual_header) - return; - - /// Create a virtual block with one row to construct filter - /// Append "idx" column as the filter result - virtual_header.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); - - auto block = virtual_header.cloneEmpty(); - addPathToVirtualColumns(block, fs::path(globbed_uri.bucket) / any_key, 0); - VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); - } - std::future listObjectsAsync() { return list_objects_scheduler([this] @@ -367,7 +329,7 @@ private: std::unique_ptr client; S3::URI globbed_uri; ASTPtr query; - Block virtual_header; + NamesAndTypesList virtual_columns; bool is_initialized{false}; ASTPtr filter_ast; std::unique_ptr matcher; @@ -381,18 +343,19 @@ private: ThreadPool list_objects_pool; ThreadPoolCallbackRunner list_objects_scheduler; std::future outcome_future; - std::atomic total_size = 0; + std::function file_progress_callback; }; StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( const S3::Client & client_, const S3::URI & globbed_uri_, ASTPtr query, - const Block & virtual_header, + const NamesAndTypesList & virtual_columns_, ContextPtr context, KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_) - : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, read_keys_, request_settings_)) + const S3Settings::RequestSettings & request_settings_, + std::function file_progress_callback_) + : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_)) { } @@ -401,11 +364,6 @@ StorageS3Source::KeyWithInfo StorageS3Source::DisclosedGlobIterator::next() return pimpl->next(); } -size_t StorageS3Source::DisclosedGlobIterator::getTotalSize() const -{ - return pimpl->getTotalSize(); -} - class StorageS3Source::KeysIterator::Impl : WithContext { public: @@ -416,64 +374,39 @@ public: const String & bucket_, const S3Settings::RequestSettings & request_settings_, ASTPtr query_, - const Block & virtual_header_, + const NamesAndTypesList & virtual_columns_, ContextPtr context_, - bool need_total_size, - KeysWithInfo * read_keys_) + KeysWithInfo * read_keys_, + std::function file_progress_callback_) : WithContext(context_) + , keys(keys_) + , client(client_.clone()) + , version_id(version_id_) , bucket(bucket_) + , request_settings(request_settings_) , query(query_) - , virtual_header(virtual_header_) + , virtual_columns(virtual_columns_) + , file_progress_callback(file_progress_callback_) { - Strings all_keys = keys_; + ASTPtr filter_ast; + if (!keys.empty()) + filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(bucket) / keys[0], getContext()); - /// Create a virtual block with one row to construct filter - if (query && virtual_header && !all_keys.empty()) + if (filter_ast) { - /// Append "idx" column as the filter result - virtual_header.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); + std::vector paths; + paths.reserve(keys.size()); + for (const auto & key : keys) + paths.push_back(fs::path(bucket) / key); - auto block = virtual_header.cloneEmpty(); - addPathToVirtualColumns(block, fs::path(bucket) / all_keys.front(), 0); - - ASTPtr filter_ast; - VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); - - if (filter_ast) - { - block = virtual_header.cloneEmpty(); - for (size_t i = 0; i < all_keys.size(); ++i) - addPathToVirtualColumns(block, fs::path(bucket) / all_keys[i], i); - - VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); - const auto & idxs = typeid_cast(*block.getByName("_idx").column); - - Strings filtered_keys; - filtered_keys.reserve(block.rows()); - for (UInt64 idx : idxs.getData()) - filtered_keys.emplace_back(std::move(all_keys[idx])); - - all_keys = std::move(filtered_keys); - } - } - - for (auto && key : all_keys) - { - std::optional info; - /// In case all_keys.size() > 1, avoid getting object info now - /// (it will be done anyway eventually, but with delay and in parallel). - /// But progress bar will not work in this case. - if (need_total_size && all_keys.size() == 1) - { - info = S3::getObjectInfo(client_, bucket, key, version_id_, request_settings_); - total_size += info->size; - } - - keys.emplace_back(std::move(key), std::move(info)); + VirtualColumnUtils::filterByPathOrFile(keys, paths, query, virtual_columns, getContext(), filter_ast); } if (read_keys_) - *read_keys_ = keys; + { + for (const auto & key : keys) + read_keys_->push_back({key, {}}); + } } KeyWithInfo next() @@ -481,24 +414,27 @@ public: size_t current_index = index.fetch_add(1, std::memory_order_relaxed); if (current_index >= keys.size()) return {}; + auto key = keys[current_index]; + std::optional info; + if (file_progress_callback) + { + info = S3::getObjectInfo(*client, bucket, key, version_id, request_settings); + file_progress_callback(FileProgress(0, info->size)); + } - return keys[current_index]; - } - - size_t getTotalSize() const - { - return total_size; + return {key, info}; } private: - KeysWithInfo keys; + Strings keys; std::atomic_size_t index = 0; - + std::unique_ptr client; + String version_id; String bucket; + S3Settings::RequestSettings request_settings; ASTPtr query; - Block virtual_header; - - size_t total_size = 0; + NamesAndTypesList virtual_columns; + std::function file_progress_callback; }; StorageS3Source::KeysIterator::KeysIterator( @@ -508,13 +444,13 @@ StorageS3Source::KeysIterator::KeysIterator( const String & bucket_, const S3Settings::RequestSettings & request_settings_, ASTPtr query, - const Block & virtual_header, + const NamesAndTypesList & virtual_columns_, ContextPtr context, - bool need_total_size, - KeysWithInfo * read_keys) + KeysWithInfo * read_keys, + std::function file_progress_callback_) : pimpl(std::make_shared( client_, version_id_, keys_, bucket_, request_settings_, - query, virtual_header, context, need_total_size, read_keys)) + query, virtual_columns_, context, read_keys, file_progress_callback_)) { } @@ -523,98 +459,127 @@ StorageS3Source::KeyWithInfo StorageS3Source::KeysIterator::next() return pimpl->next(); } -size_t StorageS3Source::KeysIterator::getTotalSize() const -{ - return pimpl->getTotalSize(); -} - -Block StorageS3Source::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - StorageS3Source::StorageS3Source( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format_, String name_, - const Block & sample_block_, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, const S3Settings::RequestSettings & request_settings_, String compression_hint_, const std::shared_ptr & client_, const String & bucket_, const String & version_id_, + const String & url_host_and_port_, std::shared_ptr file_iterator_, - const size_t download_thread_num_) - : ISource(getHeader(sample_block_, requested_virtual_columns_)) + const size_t max_parsing_threads_, + bool need_only_count_, + std::optional query_info_) + : ISource(info.source_header, false) , WithContext(context_) , name(std::move(name_)) , bucket(bucket_) , version_id(version_id_) + , url_host_and_port(url_host_and_port_) , format(format_) - , columns_desc(columns_) + , columns_desc(info.columns_description) + , requested_columns(info.requested_columns) , max_block_size(max_block_size_) , request_settings(request_settings_) , compression_hint(std::move(compression_hint_)) , client(client_) - , sample_block(sample_block_) + , sample_block(info.format_header) , format_settings(format_settings_) - , requested_virtual_columns(requested_virtual_columns_) + , query_info(std::move(query_info_)) + , requested_virtual_columns(info.requested_virtual_columns) , file_iterator(file_iterator_) - , download_thread_num(download_thread_num_) + , max_parsing_threads(max_parsing_threads_) + , need_only_count(need_only_count_) , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, 1) , create_reader_scheduler(threadPoolCallbackRunner(create_reader_pool, "CreateS3Reader")) { reader = createReader(); if (reader) - { - total_objects_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); reader_future = createReaderAsync(); - } } StorageS3Source::ReaderHolder StorageS3Source::createReader() { KeyWithInfo key_with_info; - size_t object_size; do { key_with_info = (*file_iterator)(); if (key_with_info.key.empty()) return {}; - object_size = key_with_info.info ? key_with_info.info->size : S3::getObjectSize(*client, bucket, key_with_info.key, version_id, request_settings); + if (!key_with_info.info) + key_with_info.info = S3::getObjectInfo(*client, bucket, key_with_info.key, version_id, request_settings); } - while (getContext()->getSettingsRef().s3_skip_empty_files && object_size == 0); - - auto compression_method = chooseCompressionMethod(key_with_info.key, compression_hint); - - auto read_buf = createS3ReadBuffer(key_with_info.key, object_size); - auto input_format = FormatFactory::instance().getInput( - format, *read_buf, sample_block, getContext(), max_block_size, - format_settings, std::nullopt, std::nullopt, - /* is_remote_fs */ true, compression_method); + while (getContext()->getSettingsRef().s3_skip_empty_files && key_with_info.info->size == 0); QueryPipelineBuilder builder; - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) + std::shared_ptr source; + std::unique_ptr read_buf; + std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(key_with_info) : std::nullopt; + if (num_rows_from_cache) { - builder.addSimpleTransform( - [&](const Block & header) - { return std::make_shared(header, columns_desc, *input_format, getContext()); }); + /// We should not return single chunk with all number of rows, + /// because there is a chance that this chunk will be materialized later + /// (it can cause memory problems even with default values in columns or when virtual columns are requested). + /// Instead, we use special ConstChunkGenerator that will generate chunks + /// with max_block_size rows until total number of rows is reached. + source = std::make_shared(sample_block, *num_rows_from_cache, max_block_size); + builder.init(Pipe(source)); } + else + { + auto compression_method = chooseCompressionMethod(key_with_info.key, compression_hint); + read_buf = createS3ReadBuffer(key_with_info.key, key_with_info.info->size); + + auto input_format = FormatFactory::instance().getInput( + format, + *read_buf, + sample_block, + getContext(), + max_block_size, + format_settings, + need_only_count ? 1 : max_parsing_threads, + /* max_download_threads= */ std::nullopt, + /* is_remote_fs */ true, + compression_method); + + if (query_info.has_value()) + input_format->setQueryInfo(query_info.value(), getContext()); + + if (need_only_count) + input_format->needOnlyCount(); + + builder.init(Pipe(input_format)); + + if (columns_desc.hasDefaults()) + { + builder.addSimpleTransform( + [&](const Block & header) + { return std::make_shared(header, columns_desc, *input_format, getContext()); }); + } + + source = input_format; + } + + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto current_reader = std::make_unique(*pipeline); - return ReaderHolder{fs::path(bucket) / key_with_info.key, std::move(read_buf), input_format, std::move(pipeline), std::move(current_reader)}; + ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); + + return ReaderHolder{key_with_info, bucket, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; } std::future StorageS3Source::createReaderAsync() @@ -713,34 +678,19 @@ Chunk StorageS3Source::generate() if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); - - const auto & file_path = reader.getPath(); - - if (num_rows && total_objects_size) - { - size_t chunk_size = reader.getFormat()->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox(*this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } - - for (const auto & virtual_column : requested_virtual_columns) - { - if (virtual_column.name == "_path") - { - chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_path)->convertToFullColumnIfConst()); - } - else if (virtual_column.name == "_file") - { - size_t last_slash_pos = file_path.find_last_of('/'); - auto column = virtual_column.type->createColumnConst(num_rows, file_path.substr(last_slash_pos + 1)); - chunk.addColumn(column->convertToFullColumnIfConst()); - } - } - + total_rows_in_file += num_rows; + size_t chunk_size = 0; + if (const auto * input_format = reader.getInputFormat()) + chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); + VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath()); return chunk; } + if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) + addNumRowsToCache(reader.getFile(), total_rows_in_file); + + total_rows_in_file = 0; assert(reader_future.valid()); reader = reader_future.get(); @@ -748,13 +698,6 @@ Chunk StorageS3Source::generate() if (!reader) break; - size_t object_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); - /// Adjust total_rows_approx_accumulated with new total size. - if (total_objects_size) - total_rows_approx_accumulated = static_cast( - std::ceil(static_cast(total_objects_size + object_size) / total_objects_size * total_rows_approx_accumulated)); - total_objects_size += object_size; - /// Even if task is finished the thread may be not freed in pool. /// So wait until it will be freed before scheduling a new task. create_reader_pool.wait(); @@ -764,6 +707,25 @@ Chunk StorageS3Source::generate() return {}; } +void StorageS3Source::addNumRowsToCache(const String & key, size_t num_rows) +{ + String source = fs::path(url_host_and_port) / bucket / key; + auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); + StorageS3::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); +} + +std::optional StorageS3Source::tryGetNumRowsFromCache(const KeyWithInfo & key_with_info) +{ + String source = fs::path(url_host_and_port) / bucket / key_with_info.key; + auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); + auto get_last_mod_time = [&]() -> std::optional + { + return key_with_info.info->last_modification_time; + }; + + return StorageS3::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); +} + class StorageS3Sink : public SinkToStorage { public: @@ -989,14 +951,7 @@ StorageS3::StorageS3( storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - auto default_virtuals = NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; - - auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList(); - virtual_columns = getVirtualsForStorage(columns, default_virtuals); - for (const auto & column : virtual_columns) - virtual_block.insert({column.type->createColumn(), column.type, column.name}); + virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); } std::shared_ptr StorageS3::createFileIterator( @@ -1004,9 +959,9 @@ std::shared_ptr StorageS3::createFileIterator( bool distributed_processing, ContextPtr local_context, ASTPtr query, - const Block & virtual_block, - bool need_total_size, - KeysWithInfo * read_keys) + const NamesAndTypesList & virtual_columns, + KeysWithInfo * read_keys, + std::function file_progress_callback) { if (distributed_processing) { @@ -1016,26 +971,21 @@ std::shared_ptr StorageS3::createFileIterator( { /// Iterate through disclosed globs and make a source for each file return std::make_shared( - *configuration.client, configuration.url, query, virtual_block, - local_context, read_keys, configuration.request_settings); + *configuration.client, configuration.url, query, virtual_columns, + local_context, read_keys, configuration.request_settings, file_progress_callback); } else { return std::make_shared( *configuration.client, configuration.url.version_id, configuration.keys, configuration.url.bucket, configuration.request_settings, query, - virtual_block, local_context, need_total_size, read_keys); + virtual_columns, local_context, read_keys, file_progress_callback); } } -bool StorageS3::supportsSubcolumns() const +bool StorageS3::supportsSubsetOfColumns(const ContextPtr & context) const { - return FormatFactory::instance().checkIfFormatSupportsSubcolumns(configuration.format); -} - -bool StorageS3::supportsSubsetOfColumns() const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings); } bool StorageS3::prefersLargeBlocks() const @@ -1064,60 +1014,35 @@ Pipe StorageS3::read( Pipes pipes; - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - std::shared_ptr iterator_wrapper = createFileIterator( - query_configuration, distributed_processing, local_context, query_info.query, virtual_block); + query_configuration, distributed_processing, local_context, query_info.query, virtual_columns, nullptr, local_context->getFileProgressCallback()); - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) + && local_context->getSettingsRef().optimize_count_from_files; - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); + const size_t max_threads = local_context->getSettingsRef().max_threads; + const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / num_streams); - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - - const size_t max_download_threads = local_context->getSettingsRef().max_download_threads; for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( - requested_virtual_columns, + read_from_format_info, query_configuration.format, getName(), - block_for_format, local_context, format_settings, - columns_description, max_block_size, query_configuration.request_settings, query_configuration.compression_method, query_configuration.client, query_configuration.url.bucket, query_configuration.url.version_id, + query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()), iterator_wrapper, - max_download_threads)); + max_parsing_threads, + need_only_count, + query_info)); } return Pipe::unitePipes(std::move(pipes)); @@ -1226,7 +1151,7 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, if (!response.IsSuccess()) { const auto & err = response.GetError(); - throw Exception(ErrorCodes::S3_ERROR, "{}: {}", std::to_string(static_cast(err.GetErrorType())), err.GetMessage()); + throw S3Exception(err.GetMessage(), err.GetErrorType()); } for (const auto & error : response.GetResult().GetErrors()) @@ -1282,7 +1207,8 @@ void StorageS3::Configuration::connect(ContextPtr context) context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, /* for_disk_s3 = */ false, request_settings.get_request_throttler, - request_settings.put_request_throttler); + request_settings.put_request_throttler, + url.uri.getScheme()); client_configuration.endpointOverride = url.endpoint; client_configuration.maxConnections = static_cast(request_settings.max_connections); @@ -1469,6 +1395,94 @@ ColumnsDescription StorageS3::getTableStructureFromData( return getTableStructureFromDataImpl(configuration, format_settings, ctx); } +namespace +{ + class ReadBufferIterator : public IReadBufferIterator, WithContext + { + public: + ReadBufferIterator( + std::shared_ptr file_iterator_, + const StorageS3Source::KeysWithInfo & read_keys_, + const StorageS3::Configuration & configuration_, + const std::optional & format_settings_, + const ContextPtr & context_) + : WithContext(context_) + , file_iterator(file_iterator_) + , read_keys(read_keys_) + , configuration(configuration_) + , format_settings(format_settings_) + , prev_read_keys_size(read_keys_.size()) + { + } + + std::unique_ptr next() override + { + while (true) + { + current_key_with_info = (*file_iterator)(); + + if (current_key_with_info.key.empty()) + { + if (first) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because there are no files with provided path " + "in S3 or all files are empty. You must specify table structure manually", + configuration.format); + + return nullptr; + } + + /// S3 file iterator could get new keys after new iteration, check them in schema cache. + if (getContext()->getSettingsRef().schema_inference_use_cache_for_s3 && read_keys.size() > prev_read_keys_size) + { + columns_from_cache = StorageS3::tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end(), configuration, format_settings, getContext()); + prev_read_keys_size = read_keys.size(); + if (columns_from_cache) + return nullptr; + } + + if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info.info && current_key_with_info.info->size == 0) + continue; + + int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); + auto impl = std::make_unique(configuration.client, configuration.url.bucket, current_key_with_info.key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings()); + if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof()) + { + first = false; + return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info.key, configuration.compression_method), zstd_window_log_max); + } + } + } + + std::optional getCachedColumns() override + { + return columns_from_cache; + } + + void setNumRowsToLastFile(size_t num_rows) override + { + if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3) + return; + + String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info.key; + auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext()); + StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows); + } + + private: + std::shared_ptr file_iterator; + const StorageS3Source::KeysWithInfo & read_keys; + const StorageS3::Configuration & configuration; + const std::optional & format_settings; + std::optional columns_from_cache; + StorageS3Source::KeyWithInfo current_key_with_info; + size_t prev_read_keys_size; + bool first = true; + }; + +} + ColumnsDescription StorageS3::getTableStructureFromDataImpl( const Configuration & configuration, const std::optional & format_settings, @@ -1476,61 +1490,22 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( { KeysWithInfo read_keys; - auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, false, &read_keys); + auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, &read_keys); std::optional columns_from_cache; - size_t prev_read_keys_size = read_keys.size(); if (ctx->getSettingsRef().schema_inference_use_cache_for_s3) columns_from_cache = tryGetColumnsFromCache(read_keys.begin(), read_keys.end(), configuration, format_settings, ctx); - ReadBufferIterator read_buffer_iterator = [&, first = true](ColumnsDescription & cached_columns) mutable -> std::unique_ptr - { - while (true) - { - auto key_with_info = (*file_iterator)(); - - if (key_with_info.key.empty()) - { - if (first) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files with provided path " - "in S3 or all files are empty. You must specify table structure manually", - configuration.format); - - return nullptr; - } - - /// S3 file iterator could get new keys after new iteration, check them in schema cache. - if (ctx->getSettingsRef().schema_inference_use_cache_for_s3 && read_keys.size() > prev_read_keys_size) - { - columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end(), configuration, format_settings, ctx); - prev_read_keys_size = read_keys.size(); - if (columns_from_cache) - { - cached_columns = *columns_from_cache; - return nullptr; - } - } - - if (ctx->getSettingsRef().s3_skip_empty_files && key_with_info.info && key_with_info.info->size == 0) - continue; - - int zstd_window_log_max = static_cast(ctx->getSettingsRef().zstd_window_log_max); - auto impl = std::make_unique(configuration.client, configuration.url.bucket, key_with_info.key, configuration.url.version_id, configuration.request_settings, ctx->getReadSettings()); - if (!ctx->getSettingsRef().s3_skip_empty_files || !impl->eof()) - { - first = false; - return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(key_with_info.key, configuration.compression_method), zstd_window_log_max); - } - } - }; - ColumnsDescription columns; if (columns_from_cache) + { columns = *columns_from_cache; + } else + { + ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format_settings, ctx); columns = readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx); + } if (ctx->getSettingsRef().schema_inference_use_cache_for_s3) addColumnsToCache(read_keys, configuration, columns, configuration.format, format_settings, ctx); @@ -1667,7 +1642,7 @@ std::optional StorageS3::tryGetColumnsFromCache( String path = fs::path(configuration.url.bucket) / it->key; String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path; auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, ctx); - auto columns = schema_cache.tryGet(cache_key, get_last_mod_time); + auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); if (columns) return columns; } @@ -1689,7 +1664,7 @@ void StorageS3::addColumnsToCache( std::transform(keys.begin(), keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem.key; }); auto cache_keys = getKeysForSchemaCache(sources, format_name, format_settings, ctx); auto & schema_cache = getSchemaCache(ctx); - schema_cache.addMany(cache_keys, columns); + schema_cache.addManyColumns(cache_keys, columns); } } diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 8d571dd796f..ee03b9f18c2 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -20,7 +21,9 @@ #include #include #include +#include #include +#include namespace Aws::S3 { @@ -56,7 +59,6 @@ public: public: virtual ~IIterator() = default; virtual KeyWithInfo next() = 0; - virtual size_t getTotalSize() const = 0; KeyWithInfo operator ()() { return next(); } }; @@ -68,13 +70,13 @@ public: const S3::Client & client_, const S3::URI & globbed_uri_, ASTPtr query, - const Block & virtual_header, + const NamesAndTypesList & virtual_columns, ContextPtr context, KeysWithInfo * read_keys_ = nullptr, - const S3Settings::RequestSettings & request_settings_ = {}); + const S3Settings::RequestSettings & request_settings_ = {}, + std::function progress_callback_ = {}); KeyWithInfo next() override; - size_t getTotalSize() const override; private: class Impl; @@ -92,13 +94,12 @@ public: const String & bucket_, const S3Settings::RequestSettings & request_settings_, ASTPtr query, - const Block & virtual_header, + const NamesAndTypesList & virtual_columns, ContextPtr context, - bool need_total_size = true, - KeysWithInfo * read_keys = nullptr); + KeysWithInfo * read_keys = nullptr, + std::function progress_callback_ = {}); KeyWithInfo next() override; - size_t getTotalSize() const override; private: class Impl; @@ -113,30 +114,27 @@ public: KeyWithInfo next() override { return {callback(), {}}; } - size_t getTotalSize() const override { return 0; } - private: ReadTaskCallback callback; }; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - StorageS3Source( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format, String name_, - const Block & sample_block, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, const S3Settings::RequestSettings & request_settings_, String compression_hint_, const std::shared_ptr & client_, const String & bucket, const String & version_id, + const String & url_host_and_port, std::shared_ptr file_iterator_, - size_t download_thread_num); + size_t max_parsing_threads, + bool need_only_count_, + std::optional query_info); ~StorageS3Source() override; @@ -145,30 +143,37 @@ public: Chunk generate() override; private: + friend class StorageS3QueueSource; + String name; String bucket; String version_id; + String url_host_and_port; String format; ColumnsDescription columns_desc; + NamesAndTypesList requested_columns; UInt64 max_block_size; S3Settings::RequestSettings request_settings; String compression_hint; std::shared_ptr client; Block sample_block; std::optional format_settings; + std::optional query_info; struct ReaderHolder { public: ReaderHolder( - String path_, + KeyWithInfo key_with_info_, + String bucket_, std::unique_ptr read_buf_, - std::shared_ptr input_format_, + std::shared_ptr source_, std::unique_ptr pipeline_, std::unique_ptr reader_) - : path(std::move(path_)) + : key_with_info(std::move(key_with_info_)) + , bucket(std::move(bucket_)) , read_buf(std::move(read_buf_)) - , input_format(input_format_) + , source(std::move(source_)) , pipeline(std::move(pipeline_)) , reader(std::move(reader_)) { @@ -189,34 +194,37 @@ private: /// reader uses pipeline, pipeline uses read_buf. reader = std::move(other.reader); pipeline = std::move(other.pipeline); - input_format = std::move(other.input_format); + source = std::move(other.source); read_buf = std::move(other.read_buf); - path = std::move(other.path); + key_with_info = std::move(other.key_with_info); + bucket = std::move(other.bucket); return *this; } - const std::unique_ptr & getReadBuffer() const { return read_buf; } - - const std::shared_ptr & getFormat() const { return input_format; } - explicit operator bool() const { return reader != nullptr; } PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } - const String & getPath() const { return path; } + String getPath() const { return fs::path(bucket) / key_with_info.key; } + const String & getFile() const { return key_with_info.key; } + const KeyWithInfo & getKeyWithInfo() const { return key_with_info; } + + const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } private: - String path; + KeyWithInfo key_with_info; + String bucket; std::unique_ptr read_buf; - std::shared_ptr input_format; + std::shared_ptr source; std::unique_ptr pipeline; std::unique_ptr reader; }; ReaderHolder reader; - std::vector requested_virtual_columns; + NamesAndTypesList requested_virtual_columns; std::shared_ptr file_iterator; - size_t download_thread_num = 1; + size_t max_parsing_threads = 1; + bool need_only_count; Poco::Logger * log = &Poco::Logger::get("StorageS3Source"); @@ -224,10 +232,7 @@ private: ThreadPoolCallbackRunner create_reader_scheduler; std::future reader_future; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; - size_t total_objects_size = 0; + size_t total_rows_in_file = 0; /// Recreate ReadBuffer and Pipeline for each file. ReaderHolder createReader(); @@ -235,6 +240,9 @@ private: std::unique_ptr createS3ReadBuffer(const String & key, size_t object_size); std::unique_ptr createAsyncS3ReadBuffer(const String & key, const ReadSettings & read_settings, size_t object_size); + + void addNumRowsToCache(const String & key, size_t num_rows); + std::optional tryGetNumRowsFromCache(const KeyWithInfo & key_with_info); }; /** @@ -322,53 +330,8 @@ public: const std::optional & format_settings, ContextPtr ctx); -protected: - virtual Configuration updateConfigurationAndGetCopy(ContextPtr local_context); - - virtual void updateConfiguration(ContextPtr local_context); - - void useConfiguration(const Configuration & new_configuration); - - const Configuration & getConfiguration(); - -private: - friend class StorageS3Cluster; - friend class TableFunctionS3Cluster; - - Configuration configuration; - std::mutex configuration_update_mutex; - NamesAndTypesList virtual_columns; - Block virtual_block; - - String name; - const bool distributed_processing; - std::optional format_settings; - ASTPtr partition_by; - using KeysWithInfo = StorageS3Source::KeysWithInfo; - static std::shared_ptr createFileIterator( - const Configuration & configuration, - bool distributed_processing, - ContextPtr local_context, - ASTPtr query, - const Block & virtual_block, - bool need_total_size = true, - KeysWithInfo * read_keys = nullptr); - - static ColumnsDescription getTableStructureFromDataImpl( - const Configuration & configuration, - const std::optional & format_settings, - ContextPtr ctx); - - bool supportsSubcolumns() const override; - - bool supportsSubsetOfColumns() const override; - - bool prefersLargeBlocks() const override; - - bool parallelizeOutputAfterReading(ContextPtr context) const override; - static std::optional tryGetColumnsFromCache( const KeysWithInfo::const_iterator & begin, const KeysWithInfo::const_iterator & end, @@ -383,6 +346,53 @@ private: const String & format_name, const std::optional & format_settings, const ContextPtr & ctx); + + bool supportsTrivialCountOptimization() const override { return true; } + +protected: + virtual Configuration updateConfigurationAndGetCopy(ContextPtr local_context); + + virtual void updateConfiguration(ContextPtr local_context); + + void useConfiguration(const Configuration & new_configuration); + + const Configuration & getConfiguration(); + +private: + friend class StorageS3Cluster; + friend class TableFunctionS3Cluster; + friend class StorageS3Queue; + + Configuration configuration; + std::mutex configuration_update_mutex; + NamesAndTypesList virtual_columns; + + String name; + const bool distributed_processing; + std::optional format_settings; + ASTPtr partition_by; + + static std::shared_ptr createFileIterator( + const Configuration & configuration, + bool distributed_processing, + ContextPtr local_context, + ASTPtr query, + const NamesAndTypesList & virtual_columns, + KeysWithInfo * read_keys = nullptr, + std::function progress_callback = {}); + + static ColumnsDescription getTableStructureFromDataImpl( + const Configuration & configuration, + const std::optional & format_settings, + ContextPtr ctx); + + bool supportsSubcolumns() const override { return true; } + + bool supportsSubsetOfColumns(const ContextPtr & context) const; + + bool prefersLargeBlocks() const override; + + bool parallelizeOutputAfterReading(ContextPtr context) const override; }; } diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 646fa59b33b..c8715938c6f 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include @@ -61,14 +61,7 @@ StorageS3Cluster::StorageS3Cluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - auto default_virtuals = NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; - - auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList(); - virtual_columns = getVirtualsForStorage(columns, default_virtuals); - for (const auto & column : virtual_columns) - virtual_block.insert({column.type->createColumn(), column.type, column.name}); + virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); } void StorageS3Cluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) @@ -88,7 +81,7 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context) RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const { auto iterator = std::make_shared( - *s3_configuration.client, s3_configuration.url, query, virtual_block, context); + *s3_configuration.client, s3_configuration.url, query, virtual_columns, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback()); auto callback = std::make_shared>([iterator]() mutable -> String { return iterator->next().key; }); return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; } diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index 5c2229875e5..81fb48d2398 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -36,6 +36,10 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + bool supportsSubcolumns() const override { return true; } + + bool supportsTrivialCountOptimization() const override { return true; } + protected: void updateConfigurationIfChanged(ContextPtr local_context); @@ -46,7 +50,6 @@ private: StorageS3::Configuration s3_configuration; NamesAndTypesList virtual_columns; - Block virtual_block; }; diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 581665a7dc5..e3d577ca0b3 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -77,6 +77,8 @@ struct S3Settings const PartUploadSettings & getUploadSettings() const { return upload_settings; } + void setStorageClassName(const String & storage_class_name) { upload_settings.storage_class_name = storage_class_name; } + RequestSettings() = default; explicit RequestSettings(const Settings & settings); explicit RequestSettings(const NamedCollection & collection); diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index d5ae6f2383f..d5db5763da9 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -91,6 +91,7 @@ Pipe StorageSQLite::read( column_names, storage_snapshot->metadata->getColumns().getOrdinary(), IdentifierQuotingStyle::DoubleQuotes, + LiteralEscapingStyle::Regular, "", remote_table_name, context_); diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 31770c9a32b..c0e85900794 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -113,22 +113,15 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co return *column; } -Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names, const NameToNameMap & parameter_values) const +Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const { Block res; const auto & columns = getMetadataForQuery()->getColumns(); for (const auto & column_name : column_names) { - std::string substituted_column_name = column_name; - - /// substituted_column_name is used for parameterized view (which are created using query parameters - /// and SELECT is used with substitution of these query parameters ) - if (!parameter_values.empty()) - substituted_column_name = StorageView::replaceValueWithQueryParameter(column_name, parameter_values); - - auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name); - auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name); + auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, column_name); + auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, column_name); if (column && !object_column) { res.insert({column->type->createColumn(), column->type, column_name}); @@ -147,7 +140,7 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names, cons else { throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, - "Column {} not found in table {}", backQuote(substituted_column_name), storage.getStorageID().getNameForLogs()); + "Column {} not found in table {}", backQuote(column_name), storage.getStorageID().getNameForLogs()); } } return res; diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h index 946e8a98cf2..a69f9b95955 100644 --- a/src/Storages/StorageSnapshot.h +++ b/src/Storages/StorageSnapshot.h @@ -71,7 +71,7 @@ struct StorageSnapshot NameAndTypePair getColumn(const GetColumnsOptions & options, const String & column_name) const; /// Block with ordinary + materialized + aliases + virtuals + subcolumns. - Block getSampleBlockForColumns(const Names & column_names, const NameToNameMap & parameter_values = {}) const; + Block getSampleBlockForColumns(const Names & column_names) const; ColumnsDescription getDescriptionForColumns(const Names & column_names) const; diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index d8bbd523cbf..83336cbd22e 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -426,7 +426,7 @@ void StorageStripeLog::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont num_indices_saved = 0; total_rows = 0; total_bytes = 0; - getContext()->dropMMappedFileCache(); + getContext()->clearMMappedFileCache(); } @@ -544,9 +544,10 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec fs::path data_path_in_backup_fs = data_path_in_backup; auto temp_dir_owner = std::make_shared(disk, "tmp/"); - fs::path temp_dir = temp_dir_owner->getPath(); + fs::path temp_dir = temp_dir_owner->getRelativePath(); disk->createDirectories(temp_dir); + const auto & read_settings = backup_entries_collector.getReadSettings(); bool copy_encrypted = !backup_entries_collector.getBackupSettings().decrypt_files_from_encrypted_disks; /// data.bin @@ -576,7 +577,7 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec /// sizes.json String files_info_path = file_checker.getPath(); backup_entries_collector.addBackupEntry( - data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, copy_encrypted)); + data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, read_settings, copy_encrypted)); /// columns.txt backup_entries_collector.addBackupEntry( diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 26cbe1f0233..8f96cb46910 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -79,11 +79,11 @@ public: nested->shutdown(); } - void flush() override + void flushAndPrepareForShutdown() override { std::lock_guard lock{nested_mutex}; if (nested) - nested->flush(); + nested->flushAndPrepareForShutdown(); } void drop() override @@ -153,10 +153,10 @@ public: } bool isView() const override { return false; } - void checkTableCanBeDropped() const override {} + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override {} private: - mutable std::mutex nested_mutex; + mutable std::recursive_mutex nested_mutex; mutable GetNestedStorageFunc get_nested; mutable StoragePtr nested; const bool add_conversion; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index b70a7de7909..a0f5379a1fd 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -1,9 +1,8 @@ #include -#include #include #include #include -#include +#include #include #include @@ -23,10 +22,16 @@ #include #include #include +#include +#include +#include +#include #include #include #include +#include +#include #include #include @@ -38,6 +43,10 @@ #include #include +namespace ProfileEvents +{ + extern const Event EngineFileLikeReadFiles; +} namespace DB { @@ -125,6 +134,8 @@ IStorageURLBase::IStorageURLBase( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + + virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); } @@ -153,14 +164,48 @@ namespace { return parseRemoteDescription(uri, 0, uri.size(), '|', max_addresses); } + + auto proxyConfigurationToPocoProxyConfiguration(const ProxyConfiguration & proxy_configuration) + { + Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config; + + poco_proxy_config.host = proxy_configuration.host; + poco_proxy_config.port = proxy_configuration.port; + poco_proxy_config.protocol = ProxyConfiguration::protocolToString(proxy_configuration.protocol); + + return poco_proxy_config; + } + + auto getProxyConfiguration(const std::string & protocol_string) + { + auto protocol = protocol_string == "https" ? ProxyConfigurationResolver::Protocol::HTTPS + : ProxyConfigurationResolver::Protocol::HTTP; + auto proxy_config = ProxyConfigurationResolverProvider::get(protocol, Context::getGlobalContextInstance()->getConfigRef())->resolve(); + + return proxyConfigurationToPocoProxyConfiguration(proxy_config); + } } class StorageURLSource::DisclosedGlobIterator::Impl { public: - Impl(const String & uri, size_t max_addresses) + Impl(const String & uri_, size_t max_addresses, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context) { - uris = parseRemoteDescription(uri, 0, uri.size(), ',', max_addresses); + uris = parseRemoteDescription(uri_, 0, uri_.size(), ',', max_addresses); + + ASTPtr filter_ast; + if (!uris.empty()) + filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, Poco::URI(uris[0]).getPath(), context); + + if (filter_ast) + { + std::vector paths; + paths.reserve(uris.size()); + for (const auto & uri : uris) + paths.push_back(Poco::URI(uri).getPath()); + + VirtualColumnUtils::filterByPathOrFile(uris, paths, query, virtual_columns, context, filter_ast); + } } String next() @@ -182,8 +227,8 @@ private: std::atomic_size_t index = 0; }; -StorageURLSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, size_t max_addresses) - : pimpl(std::make_shared(uri, max_addresses)) {} +StorageURLSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, size_t max_addresses, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context) + : pimpl(std::make_shared(uri, max_addresses, query, virtual_columns, context)) {} String StorageURLSource::DisclosedGlobIterator::next() { @@ -209,36 +254,36 @@ void StorageURLSource::setCredentials(Poco::Net::HTTPBasicCredentials & credenti } } -Block StorageURLSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - StorageURLSource::StorageURLSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, std::shared_ptr uri_iterator_, const std::string & http_method, std::function callback, - const String & format, - const std::optional & format_settings, + const String & format_, + const std::optional & format_settings_, String name_, - const Block & sample_block, - ContextPtr context, - const ColumnsDescription & columns, + ContextPtr context_, UInt64 max_block_size, const ConnectionTimeouts & timeouts, CompressionMethod compression_method, - size_t download_threads, + size_t max_parsing_threads, + const SelectQueryInfo & query_info, const HTTPHeaderEntries & headers_, const URIParams & params, - bool glob_url) - : ISource(getHeader(sample_block, requested_virtual_columns_)), name(std::move(name_)), requested_virtual_columns(requested_virtual_columns_), uri_iterator(uri_iterator_) + bool glob_url, + bool need_only_count_) + : ISource(info.source_header, false), WithContext(context_) + , name(std::move(name_)) + , columns_description(info.columns_description) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) + , block_for_format(info.format_header) + , uri_iterator(uri_iterator_) + , format(format_) + , format_settings(format_settings_) + , headers(getHeaders(headers_)) + , need_only_count(need_only_count_) { - auto headers = getHeaders(headers_); - /// Lazy initialization. We should not perform requests in constructor, because we need to do it in query pipeline. initialize = [=, this]() { @@ -254,7 +299,7 @@ StorageURLSource::StorageURLSource( uri_and_buf = getFirstAvailableURIAndReadBuffer( first_option, current_uri_options.end(), - context, + getContext(), params, http_method, callback, @@ -266,50 +311,74 @@ StorageURLSource::StorageURLSource( /// If file is empty and engine_url_skip_empty_files=1, skip it and go to the next file. } - while (context->getSettingsRef().engine_url_skip_empty_files && uri_and_buf.second->eof()); + while (getContext()->getSettingsRef().engine_url_skip_empty_files && uri_and_buf.second->eof()); curr_uri = uri_and_buf.first; + auto last_mod_time = uri_and_buf.second->tryGetLastModificationTime(); read_buf = std::move(uri_and_buf.second); - size_t file_size = 0; - try + if (auto file_progress_callback = getContext()->getFileProgressCallback()) { - file_size = getFileSizeFromReadBuffer(*read_buf); + size_t file_size = tryGetFileSizeFromReadBuffer(*read_buf).value_or(0); + file_progress_callback(FileProgress(0, file_size)); } - catch (...) - { - // we simply continue without updating total_size - } - - if (file_size) - { - /// Adjust total_rows_approx_accumulated with new total size. - if (total_size) - total_rows_approx_accumulated = static_cast(std::ceil(static_cast(total_size + file_size) / total_size * total_rows_approx_accumulated)); - total_size += file_size; - } - - // TODO: Pass max_parsing_threads and max_download_threads adjusted for num_streams. - input_format = FormatFactory::instance().getInput( - format, - *read_buf, - sample_block, - context, - max_block_size, - format_settings, - download_threads, - /*max_download_threads*/ std::nullopt, - /* is_remote_fs */ true, - compression_method); QueryPipelineBuilder builder; - builder.init(Pipe(input_format)); + std::optional num_rows_from_cache = std::nullopt; + if (need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files) + num_rows_from_cache = tryGetNumRowsFromCache(curr_uri.toString(), last_mod_time); - builder.addSimpleTransform([&](const Block & cur_header) - { return std::make_shared(cur_header, columns, *input_format, context); }); + if (num_rows_from_cache) + { + /// We should not return single chunk with all number of rows, + /// because there is a chance that this chunk will be materialized later + /// (it can cause memory problems even with default values in columns or when virtual columns are requested). + /// Instead, we use special ConstChunkGenerator that will generate chunks + /// with max_block_size rows until total number of rows is reached. + auto source = std::make_shared(block_for_format, *num_rows_from_cache, max_block_size); + builder.init(Pipe(source)); + } + else + { + // TODO: Pass max_parsing_threads and max_download_threads adjusted for num_streams. + input_format = FormatFactory::instance().getInput( + format, + *read_buf, + block_for_format, + getContext(), + max_block_size, + format_settings, + need_only_count ? 1 : max_parsing_threads, + /*max_download_threads*/ std::nullopt, + /* is_remote_ fs */ true, + compression_method); + input_format->setQueryInfo(query_info, getContext()); + + if (need_only_count) + input_format->needOnlyCount(); + + builder.init(Pipe(input_format)); + + if (columns_description.hasDefaults()) + { + builder.addSimpleTransform([&](const Block & cur_header) + { + return std::make_shared(cur_header, columns_description, *input_format, getContext()); + }); + } + } + + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); + + ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); return true; }; } @@ -332,38 +401,23 @@ Chunk StorageURLSource::generate() if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); - if (num_rows && total_size) - { - size_t chunk_size = input_format->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } - - const String & path{curr_uri.getPath()}; - - for (const auto & virtual_column : requested_virtual_columns) - { - if (virtual_column.name == "_path") - { - chunk.addColumn(virtual_column.type->createColumnConst(num_rows, path)->convertToFullColumnIfConst()); - } - else if (virtual_column.name == "_file") - { - size_t last_slash_pos = path.find_last_of('/'); - auto column = virtual_column.type->createColumnConst(num_rows, path.substr(last_slash_pos + 1)); - chunk.addColumn(column->convertToFullColumnIfConst()); - } - } - + total_rows_in_file += num_rows; + size_t chunk_size = 0; + if (input_format) + chunk_size = input_format->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); + VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, curr_uri.getPath()); return chunk; } + if (input_format && getContext()->getSettingsRef().use_cache_for_count_from_files) + addNumRowsToCache(curr_uri.toString(), total_rows_in_file); + pipeline->reset(); reader.reset(); input_format.reset(); read_buf.reset(); + total_rows_in_file = 0; } return {}; } @@ -371,7 +425,7 @@ Chunk StorageURLSource::generate() std::pair> StorageURLSource::getFirstAvailableURIAndReadBuffer( std::vector::const_iterator & option, const std::vector::const_iterator & end, - ContextPtr context, + ContextPtr context_, const URIParams & params, const String & http_method, std::function callback, @@ -382,21 +436,23 @@ std::pair> StorageURLSource: bool delay_initialization) { String first_exception_message; - ReadSettings read_settings = context->getReadSettings(); + ReadSettings read_settings = context_->getReadSettings(); size_t options = std::distance(option, end); std::pair> last_skipped_empty_res; for (; option != end; ++option) { bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end); - auto request_uri = Poco::URI(*option); + auto request_uri = Poco::URI(*option, context_->getSettingsRef().enable_url_encoding); for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); setCredentials(credentials, request_uri); - const auto settings = context->getSettings(); + const auto settings = context_->getSettings(); + + auto proxy_config = getProxyConfiguration(http_method); try { @@ -410,12 +466,14 @@ std::pair> StorageURLSource: settings.max_read_buffer_size, read_settings, headers, - &context->getRemoteHostFilter(), + &context_->getRemoteHostFilter(), delay_initialization, /* use_external_buffer */ false, - /* skip_url_not_found_error */ skip_url_not_found_error); + /* skip_url_not_found_error */ skip_url_not_found_error, + /* file_info */ std::nullopt, + proxy_config); - if (context->getSettingsRef().engine_url_skip_empty_files && res->eof() && option != std::prev(end)) + if (context_->getSettingsRef().engine_url_skip_empty_files && res->eof() && option != std::prev(end)) { last_skipped_empty_res = {request_uri, std::move(res)}; continue; @@ -445,6 +503,28 @@ std::pair> StorageURLSource: throw Exception(ErrorCodes::NETWORK_ERROR, "All uri ({}) options are unreachable: {}", options, first_exception_message); } +void StorageURLSource::addNumRowsToCache(const DB::String & uri, size_t num_rows) +{ + auto cache_key = getKeyForSchemaCache(uri, format, format_settings, getContext()); + StorageURL::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); +} + +std::optional StorageURLSource::tryGetNumRowsFromCache(const DB::String & uri, std::optional last_mod_time) +{ + auto cache_key = getKeyForSchemaCache(uri, format, format_settings, getContext()); + auto get_last_mod_time = [&]() -> std::optional + { + /// Some URLs could not have Last-Modified header, in this case we cannot be sure that + /// data wasn't changed after adding it's schema to cache. Use schema from cache only if + /// special setting for this case is enabled. + if (!last_mod_time && !getContext()->getSettingsRef().schema_inference_cache_require_modification_time_for_url) + return 0; + return last_mod_time; + }; + + return StorageURL::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); +} + StorageURLSink::StorageURLSink( const String & uri, const String & format, @@ -460,10 +540,17 @@ StorageURLSink::StorageURLSink( std::string content_type = FormatFactory::instance().getContentType(format, context, format_settings); std::string content_encoding = toContentEncodingName(compression_method); + auto proxy_config = getProxyConfiguration(http_method); + + auto write_buffer = std::make_unique( + Poco::URI(uri), http_method, content_type, content_encoding, headers, timeouts, DBMS_DEFAULT_BUFFER_SIZE, proxy_config + ); + write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique(Poco::URI(uri), http_method, content_type, content_encoding, headers, timeouts), + std::move(write_buffer), compression_method, - 3); + 3 + ); writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block, context, format_settings); } @@ -603,6 +690,83 @@ std::function IStorageURLBase::getReadPOSTDataCallback( return nullptr; } +namespace +{ + class ReadBufferIterator : public IReadBufferIterator, WithContext + { + public: + ReadBufferIterator( + const std::vector & urls_to_check_, + const String & format_, + const CompressionMethod & compression_method_, + const HTTPHeaderEntries & headers_, + const std::optional & format_settings_, + const ContextPtr & context_) + : WithContext(context_), urls_to_check(urls_to_check_), format(format_), compression_method(compression_method_), headers(headers_), format_settings(format_settings_) + { + it = urls_to_check.cbegin(); + } + + std::unique_ptr next() override + { + std::pair> uri_and_buf; + do + { + if (it == urls_to_check.cend()) + { + if (first) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because all files are empty. " + "You must specify table structure manually", + format); + return nullptr; + } + + uri_and_buf = StorageURLSource::getFirstAvailableURIAndReadBuffer( + it, + urls_to_check.cend(), + getContext(), + {}, + Poco::Net::HTTPRequest::HTTP_GET, + {}, + getHTTPTimeouts(getContext()), + credentials, + headers, + false, + false); + + ++it; + } while (getContext()->getSettingsRef().engine_url_skip_empty_files && uri_and_buf.second->eof()); + + first = false; + return wrapReadBufferWithCompressionMethod( + std::move(uri_and_buf.second), + compression_method, + static_cast(getContext()->getSettingsRef().zstd_window_log_max)); + } + + void setNumRowsToLastFile(size_t num_rows) override + { + if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3) + return; + + String source = *std::prev(it); + auto key = getKeyForSchemaCache(source, format, format_settings, getContext()); + StorageURL::getSchemaCache(getContext()).addNumRows(key, num_rows); + } + + private: + const std::vector & urls_to_check; + std::vector::const_iterator it; + const String & format; + const CompressionMethod & compression_method; + const HTTPHeaderEntries & headers; + Poco::Net::HTTPBasicCredentials credentials; + const std::optional & format_settings; + bool first = true; + }; +} ColumnsDescription IStorageURLBase::getTableStructureFromData( const String & format, @@ -636,50 +800,16 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( if (context->getSettingsRef().schema_inference_use_cache_for_url) columns_from_cache = tryGetColumnsFromCache(urls_to_check, headers, credentials, format, format_settings, context); - ReadBufferIterator read_buffer_iterator = [&, it = urls_to_check.cbegin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr - { - std::pair> uri_and_buf; - do - { - if (it == urls_to_check.cend()) - { - if (first) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because all files are empty. " - "You must specify table structure manually", - format); - return nullptr; - } - - uri_and_buf = StorageURLSource::getFirstAvailableURIAndReadBuffer( - it, - urls_to_check.cend(), - context, - {}, - Poco::Net::HTTPRequest::HTTP_GET, - {}, - getHTTPTimeouts(context), - credentials, - headers, - false, - false); - - ++it; - } while (context->getSettingsRef().engine_url_skip_empty_files && uri_and_buf.second->eof()); - - first = false; - return wrapReadBufferWithCompressionMethod( - std::move(uri_and_buf.second), - compression_method, - static_cast(context->getSettingsRef().zstd_window_log_max)); - }; - ColumnsDescription columns; if (columns_from_cache) + { columns = *columns_from_cache; + } else + { + ReadBufferIterator read_buffer_iterator(urls_to_check, format, compression_method, headers, format_settings, context); columns = readSchemaFromFormat(format, format_settings, read_buffer_iterator, urls_to_check.size() > 1, context); + } if (context->getSettingsRef().schema_inference_use_cache_for_url) addColumnsToCache(urls_to_check, columns, format, format_settings, context); @@ -687,9 +817,9 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( return columns; } -bool IStorageURLBase::supportsSubsetOfColumns() const +bool IStorageURLBase::supportsSubsetOfColumns(const ContextPtr & context) const { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context, format_settings); } bool IStorageURLBase::prefersLargeBlocks() const @@ -713,32 +843,11 @@ Pipe IStorageURLBase::read( { auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - columns_description = storage_snapshot->getDescriptionForColumns(column_names); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - - size_t max_download_threads = local_context->getSettingsRef().max_download_threads; - std::shared_ptr iterator_wrapper{nullptr}; bool is_url_with_globs = urlWithGlobs(uri); size_t max_addresses = local_context->getSettingsRef().glob_expansion_max_elements; + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + if (distributed_processing) { iterator_wrapper = std::make_shared( @@ -753,7 +862,12 @@ Pipe IStorageURLBase::read( else if (is_url_with_globs) { /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(uri, max_addresses); + auto glob_iterator = std::make_shared(uri, max_addresses, query_info.query, virtual_columns, local_context); + + /// check if we filtered out all the paths + if (glob_iterator->size() == 0) + return Pipe(std::make_shared(read_from_format_info.source_header)); + iterator_wrapper = std::make_shared([glob_iterator, max_addresses]() { String next_uri = glob_iterator->next(); @@ -777,30 +891,41 @@ Pipe IStorageURLBase::read( num_streams = 1; } + bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) + && local_context->getSettingsRef().optimize_count_from_files; + Pipes pipes; pipes.reserve(num_streams); - size_t download_threads = num_streams >= max_download_threads ? 1 : (max_download_threads / num_streams); + const size_t max_threads = local_context->getSettingsRef().max_threads; + const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / num_streams); + for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( - requested_virtual_columns, + read_from_format_info, iterator_wrapper, getReadMethod(), - getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size), + getReadPOSTDataCallback( + read_from_format_info.columns_description.getNamesOfPhysical(), + read_from_format_info.columns_description, + query_info, + local_context, + processed_stage, + max_block_size), format_name, format_settings, getName(), - block_for_format, local_context, - columns_description, max_block_size, getHTTPTimeouts(local_context), compression_method, - download_threads, + max_parsing_threads, + query_info, headers, params, - is_url_with_globs)); + is_url_with_globs, + need_only_count)); } return Pipe::unitePipes(std::move(pipes)); @@ -814,21 +939,8 @@ Pipe StorageURLWithFailover::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - size_t /*num_streams*/) + size_t num_streams) { - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - columns_description = storage_snapshot->getDescriptionForColumns(column_names); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); auto iterator_wrapper = std::make_shared([&, done = false]() mutable @@ -839,21 +951,25 @@ Pipe StorageURLWithFailover::read( return uri_options; }); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + + const size_t max_threads = local_context->getSettingsRef().max_threads; + const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / num_streams); + auto pipe = Pipe(std::make_shared( - std::vector{}, + read_from_format_info, iterator_wrapper, getReadMethod(), - getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size), + getReadPOSTDataCallback(read_from_format_info.columns_description.getNamesOfPhysical(), read_from_format_info.columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), - block_for_format, local_context, - columns_description, max_block_size, getHTTPTimeouts(local_context), compression_method, - local_context->getSettingsRef().max_download_threads, + max_parsing_threads, + query_info, headers, params)); std::shuffle(uri_options.begin(), uri_options.end(), thread_local_rng); @@ -902,9 +1018,7 @@ SinkToStoragePtr IStorageURLBase::write(const ASTPtr & query, const StorageMetad NamesAndTypesList IStorageURLBase::getVirtuals() const { - return NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; + return virtual_columns; } SchemaCache & IStorageURLBase::getSchemaCache(const ContextPtr & context) @@ -926,7 +1040,7 @@ std::optional IStorageURLBase::tryGetColumnsFromCache( { auto get_last_mod_time = [&]() -> std::optional { - auto last_mod_time = getLastModificationTime(url, headers, credentials, context); + auto last_mod_time = tryGetLastModificationTime(url, headers, credentials, context); /// Some URLs could not have Last-Modified header, in this case we cannot be sure that /// data wasn't changed after adding it's schema to cache. Use schema from cache only if /// special setting for this case is enabled. @@ -936,7 +1050,7 @@ std::optional IStorageURLBase::tryGetColumnsFromCache( }; auto cache_key = getKeyForSchemaCache(url, format_name, format_settings, context); - auto columns = schema_cache.tryGet(cache_key, get_last_mod_time); + auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); if (columns) return columns; } @@ -953,10 +1067,10 @@ void IStorageURLBase::addColumnsToCache( { auto & schema_cache = getSchemaCache(context); auto cache_keys = getKeysForSchemaCache(urls, format_name, format_settings, context); - schema_cache.addMany(cache_keys, columns); + schema_cache.addManyColumns(cache_keys, columns); } -std::optional IStorageURLBase::getLastModificationTime( +std::optional IStorageURLBase::tryGetLastModificationTime( const String & url, const HTTPHeaderEntries & headers, const Poco::Net::HTTPBasicCredentials & credentials, @@ -964,29 +1078,28 @@ std::optional IStorageURLBase::getLastModificationTime( { auto settings = context->getSettingsRef(); - try - { - ReadWriteBufferFromHTTP buf( - Poco::URI(url), - Poco::Net::HTTPRequest::HTTP_GET, - {}, - getHTTPTimeouts(context), - credentials, - settings.max_http_get_redirects, - settings.max_read_buffer_size, - context->getReadSettings(), - headers, - &context->getRemoteHostFilter(), - true, - false, - false); + auto uri = Poco::URI(url); - return buf.getLastModificationTime(); - } - catch (...) - { - return std::nullopt; - } + auto proxy_config = getProxyConfiguration(uri.getScheme()); + + ReadWriteBufferFromHTTP buf( + uri, + Poco::Net::HTTPRequest::HTTP_GET, + {}, + getHTTPTimeouts(context), + credentials, + settings.max_http_get_redirects, + settings.max_read_buffer_size, + context->getReadSettings(), + headers, + &context->getRemoteHostFilter(), + true, + false, + false, + std::nullopt, + proxy_config); + + return buf.tryGetLastModificationTime(); } StorageURL::StorageURL( diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 345f813dd7c..6c5c50af326 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -1,16 +1,17 @@ #pragma once -#include -#include -#include #include #include -#include #include -#include -#include +#include +#include +#include #include +#include #include +#include +#include +#include namespace DB @@ -58,6 +59,12 @@ public: static SchemaCache & getSchemaCache(const ContextPtr & context); + static std::optional tryGetLastModificationTime( + const String & url, + const HTTPHeaderEntries & headers, + const Poco::Net::HTTPBasicCredentials & credentials, + const ContextPtr & context); + protected: IStorageURLBase( const String & uri_, @@ -87,6 +94,8 @@ protected: ASTPtr partition_by; bool distributed_processing; + NamesAndTypesList virtual_columns; + virtual std::string getReadMethod() const; virtual std::vector> getReadURIParams( @@ -105,12 +114,14 @@ protected: QueryProcessingStage::Enum & processed_stage, size_t max_block_size) const; - bool supportsSubsetOfColumns() const override; + virtual bool supportsSubsetOfColumns(const ContextPtr & context) const; bool prefersLargeBlocks() const override; bool parallelizeOutputAfterReading(ContextPtr context) const override; + bool supportsTrivialCountOptimization() const override { return true; } + private: virtual Block getHeaderBlock(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const = 0; @@ -128,16 +139,10 @@ private: const String & format_name, const std::optional & format_settings, const ContextPtr & context); - - static std::optional getLastModificationTime( - const String & url, - const HTTPHeaderEntries & headers, - const Poco::Net::HTTPBasicCredentials & credentials, - const ContextPtr & context); }; -class StorageURLSource : public ISource +class StorageURLSource : public ISource, WithContext { using URIParams = std::vector>; @@ -145,7 +150,8 @@ public: class DisclosedGlobIterator { public: - DisclosedGlobIterator(const String & uri_, size_t max_addresses); + DisclosedGlobIterator(const String & uri_, size_t max_addresses, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context); + String next(); size_t size(); private: @@ -158,23 +164,23 @@ public: using IteratorWrapper = std::function; StorageURLSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, std::shared_ptr uri_iterator_, const std::string & http_method, std::function callback, const String & format, const std::optional & format_settings, String name_, - const Block & sample_block, ContextPtr context, - const ColumnsDescription & columns, UInt64 max_block_size, const ConnectionTimeouts & timeouts, CompressionMethod compression_method, - size_t download_threads, + size_t max_parsing_threads, + const SelectQueryInfo & query_info, const HTTPHeaderEntries & headers_ = {}, const URIParams & params = {}, - bool glob_url = false); + bool glob_url = false, + bool need_only_count_ = false); String getName() const override { return name; } @@ -182,8 +188,6 @@ public: static void setCredentials(Poco::Net::HTTPBasicCredentials & credentials, const Poco::URI & request_uri); - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - static std::pair> getFirstAvailableURIAndReadBuffer( std::vector::const_iterator & option, const std::vector::const_iterator & end, @@ -198,13 +202,24 @@ public: bool delay_initialization); private: + void addNumRowsToCache(const String & uri, size_t num_rows); + std::optional tryGetNumRowsFromCache(const String & uri, std::optional last_mod_time); + using InitializeFunc = std::function; InitializeFunc initialize; String name; - std::vector requested_virtual_columns; + ColumnsDescription columns_description; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; + Block block_for_format; std::shared_ptr uri_iterator; Poco::URI curr_uri; + String format; + const std::optional & format_settings; + HTTPHeaderEntries headers; + bool need_only_count; + size_t total_rows_in_file = 0; std::unique_ptr read_buf; std::shared_ptr input_format; @@ -212,11 +227,6 @@ private: std::unique_ptr reader; Poco::Net::HTTPBasicCredentials credentials; - - size_t total_size = 0; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; }; class StorageURLSink : public SinkToStorage @@ -276,6 +286,8 @@ public: return storage_snapshot->metadata->getSampleBlock(); } + bool supportsSubcolumns() const override { return true; } + static FormatSettings getFormatSettingsFromArgs(const StorageFactory::Arguments & args); struct Configuration : public StatelessTableEngineConfiguration diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp index 8804afb7af2..5c2108bef33 100644 --- a/src/Storages/StorageURLCluster.cpp +++ b/src/Storages/StorageURLCluster.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -67,6 +68,8 @@ StorageURLCluster::StorageURLCluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); + + virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); } void StorageURLCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) @@ -78,18 +81,11 @@ void StorageURLCluster::addColumnsStructureToQuery(ASTPtr & query, const String TableFunctionURLCluster::addColumnsStructureToArguments(expression_list->children, structure, context); } -RemoteQueryExecutor::Extension StorageURLCluster::getTaskIteratorExtension(ASTPtr, const ContextPtr & context) const +RemoteQueryExecutor::Extension StorageURLCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const { - auto iterator = std::make_shared(uri, context->getSettingsRef().glob_expansion_max_elements); + auto iterator = std::make_shared(uri, context->getSettingsRef().glob_expansion_max_elements, query, virtual_columns, context); auto callback = std::make_shared([iter = std::move(iterator)]() mutable -> String { return iter->next(); }); return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; } -NamesAndTypesList StorageURLCluster::getVirtuals() const -{ - return NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; -} - } diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index 67771416771..ddf7e6f0790 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -32,16 +32,21 @@ public: std::string getName() const override { return "URLCluster"; } - NamesAndTypesList getVirtuals() const override; + NamesAndTypesList getVirtuals() const override { return virtual_columns; } RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + bool supportsSubcolumns() const override { return true; } + + bool supportsTrivialCountOptimization() const override { return true; } + private: void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; String uri; String format_name; String compression_method; + NamesAndTypesList virtual_columns; }; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 470def7e197..f0f9b9540de 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -107,7 +107,8 @@ StorageView::StorageView( const StorageID & table_id_, const ASTCreateQuery & query, const ColumnsDescription & columns_, - const String & comment) + const String & comment, + const bool is_parameterized_view_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; @@ -123,8 +124,7 @@ StorageView::StorageView( NormalizeSelectWithUnionQueryVisitor::Data data{SetOperationMode::Unspecified}; NormalizeSelectWithUnionQueryVisitor{data}.visit(description.inner_query); - is_parameterized_view = query.isParameterizedView(); - view_parameter_types = analyzeReceiveQueryParamsWithType(description.inner_query); + is_parameterized_view = is_parameterized_view_ || query.isParameterizedView(); storage_metadata.setSelectQuery(description); setInMemoryMetadata(storage_metadata); } @@ -173,7 +173,7 @@ void StorageView::read( query_plan.addStep(std::move(materializing)); /// And also convert to expected structure. - const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names, query_info.parameterized_view_values); + const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names); const auto & header = query_plan.getCurrentDataStream().header; const auto * select_with_union = current_inner_query->as(); @@ -258,42 +258,6 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ child = view_query; } -String StorageView::replaceQueryParameterWithValue(const String & column_name, const NameToNameMap & parameter_values, const NameToNameMap & parameter_types) -{ - std::string name = column_name; - std::string::size_type pos = 0u; - for (const auto & parameter : parameter_values) - { - if ((pos = name.find(parameter.first)) != std::string::npos) - { - auto parameter_datatype_iterator = parameter_types.find(parameter.first); - size_t parameter_end = pos + parameter.first.size(); - if (parameter_datatype_iterator != parameter_types.end() && name.size() >= parameter_end && (name[parameter_end] == ',' || name[parameter_end] == ')')) - { - String parameter_name("_CAST(" + parameter.second + ", '" + parameter_datatype_iterator->second + "')"); - name.replace(pos, parameter.first.size(), parameter_name); - break; - } - } - } - return name; -} - -String StorageView::replaceValueWithQueryParameter(const String & column_name, const NameToNameMap & parameter_values) -{ - String name = column_name; - std::string::size_type pos = 0u; - for (const auto & parameter : parameter_values) - { - if ((pos = name.find("_CAST(" + parameter.second)) != std::string::npos) - { - name = name.substr(0,pos) + parameter.first + ")"; - break; - } - } - return name; -} - ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name) { ASTTableExpression * table_expression = getFirstTableExpression(select_query); diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index bebecb79ec0..b8bf5585c0f 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -15,7 +15,8 @@ public: const StorageID & table_id_, const ASTCreateQuery & query, const ColumnsDescription & columns_, - const String & comment); + const String & comment, + const bool is_parameterized_view_=false); std::string getName() const override { return "View"; } bool isView() const override { return true; } @@ -44,17 +45,9 @@ public: static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, const bool parameterized_view); static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name); - static String replaceQueryParameterWithValue (const String & column_name, const NameToNameMap & parameter_values, const NameToNameMap & parameter_types); - static String replaceValueWithQueryParameter (const String & column_name, const NameToNameMap & parameter_values); - - const NameToNameMap & getParameterTypes() const - { - return view_parameter_types; - } protected: bool is_parameterized_view; - NameToNameMap view_parameter_types; }; } diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index b532d1c91f0..0ba8838d4c3 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -79,6 +79,7 @@ std::function StorageXDBC::getReadPOSTDataCallback( column_names, columns_description.getOrdinary(), bridge_helper->getIdentifierQuotingStyle(), + LiteralEscapingStyle::Regular, remote_database_name, remote_table_name, local_context); @@ -145,7 +146,7 @@ SinkToStoragePtr StorageXDBC::write(const ASTPtr & /* query */, const StorageMet compression_method); } -bool StorageXDBC::supportsSubsetOfColumns() const +bool StorageXDBC::supportsSubsetOfColumns(const ContextPtr &) const { return true; } diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index d7a1138c710..1c1651cb333 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -68,7 +68,7 @@ private: Block getHeaderBlock(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const override; - bool supportsSubsetOfColumns() const override; + bool supportsSubsetOfColumns(const ContextPtr &) const override; }; } diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index 1d2a3de5101..c3a2e726365 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -30,7 +30,6 @@ endif() add_dependencies(generate-source generate-contributors) set(GENERATED_LICENSES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemLicenses.generated.cpp") -set(GENERATED_TIMEZONES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemTimeZones.generated.cpp") add_custom_command( OUTPUT StorageSystemLicenses.generated.cpp @@ -38,23 +37,13 @@ add_custom_command( WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC}) -list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC}) # Overlength strings set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) -clickhouse_embed_binaries( - TARGET information_schema_metadata - RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/" - RESOURCES schemata.sql tables.sql views.sql columns.sql -) - list (SORT storages_system_sources) # Reproducible build add_library(clickhouse_storages_system ${storages_system_sources}) -add_dependencies(clickhouse_storages_system information_schema_metadata) - target_link_libraries(clickhouse_storages_system PRIVATE dbms common @@ -62,5 +51,6 @@ target_link_libraries(clickhouse_storages_system PRIVATE clickhouse_common_zookeeper clickhouse_parsers Poco::JSON - INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}" ) + +target_include_directories(clickhouse_storages_system PRIVATE InformationSchema) diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 63b9a443f95..e09b27adf32 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -30,6 +32,8 @@ class IStorageSystemOneBlock : public IStorage protected: virtual void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const = 0; + virtual bool supportsColumnsMask() const { return false; } + public: explicit IStorageSystemOneBlock(const StorageID & table_id_) : IStorage(table_id_) { @@ -48,8 +52,15 @@ public: size_t /*num_streams*/) override { storage_snapshot->check(column_names); - Block sample_block = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + + if (supportsColumnsMask()) + { + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); + query_info.columns_mask = std::move(columns_mask); + sample_block = std::move(header); + } + MutableColumns res_columns = sample_block.cloneEmptyColumns(); fillData(res_columns, context, query_info); diff --git a/src/Storages/System/StorageSystemBackups.cpp b/src/Storages/System/StorageSystemBackups.cpp index 8e968f8f7c0..6fac9b04885 100644 --- a/src/Storages/System/StorageSystemBackups.cpp +++ b/src/Storages/System/StorageSystemBackups.cpp @@ -51,7 +51,7 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con auto & column_num_read_files = assert_cast(*res_columns[column_index++]); auto & column_num_read_bytes = assert_cast(*res_columns[column_index++]); - auto add_row = [&](const BackupsWorker::Info & info) + auto add_row = [&](const BackupOperationInfo & info) { column_id.insertData(info.id.data(), info.id.size()); column_name.insertData(info.name.data(), info.name.size()); diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index 4e7a25d7726..796b134ba56 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -8,7 +8,6 @@ const char * auto_config_build[] "SYSTEM", "@CMAKE_SYSTEM_NAME@", "VERSION_GITHASH", "@VERSION_GITHASH@", "VERSION_REVISION", "@VERSION_REVISION@", - "VERSION_DATE", "@VERSION_DATE@", "BUILD_TYPE", "@CMAKE_BUILD_TYPE@", "SYSTEM_PROCESSOR", "@CMAKE_SYSTEM_PROCESSOR@", "CMAKE_VERSION", "@CMAKE_VERSION@", diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index f4ef52d7605..39a61f22b89 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -32,6 +32,12 @@ NamesAndTypesList StorageSystemClusters::getNamesAndTypes() }; } +NamesAndAliases StorageSystemClusters::getNamesAndAliases() +{ + return { + {"name", std::make_shared(), "cluster"}, + }; +} void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h index 9aa1a6a5183..071ad423b89 100644 --- a/src/Storages/System/StorageSystemClusters.h +++ b/src/Storages/System/StorageSystemClusters.h @@ -22,6 +22,8 @@ public: static NamesAndTypesList getNamesAndTypes(); + static NamesAndAliases getNamesAndAliases(); + protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; using NameAndCluster = std::pair>; diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index e4ca6a15138..b76ad07abeb 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -315,23 +316,9 @@ Pipe StorageSystemColumns::read( const size_t /*num_streams*/) { storage_snapshot->check(column_names); - - /// Create a mask of what columns are needed in the result. - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample_block = storage_snapshot->metadata->getSampleBlock(); - Block header; - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample_block.getByPosition(i)); - } - } + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); Block block_to_filter; Storages storages; diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index f84c554afc0..d4948443607 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -20,6 +20,7 @@ const char * auto_contributors[] { "Ahmed Dardery", "Aimiyoo", "Akazz", + "Al Korgun", "AlPerevyshin", "Alain BERRIER", "Albert Kidrachev", @@ -42,10 +43,12 @@ const char * auto_contributors[] { "Alex", "Alex Bocharov", "Alex Cao", + "Alex Cheng", "Alex Karo", "Alex Krash", "Alex Ryndin", "Alex Zatelepin", + "AlexBykovski", "Alexander Avdonkin", "Alexander Bezpiatov", "Alexander Burmak", @@ -71,6 +74,7 @@ const char * auto_contributors[] { "Alexander Tokmakov", "Alexander Tretiakov", "Alexander Yakovlev", + "Alexander Zaitsev", "Alexandr Kondratev", "Alexandr Krasheninnikov", "Alexandr Orlov", @@ -138,6 +142,7 @@ const char * auto_contributors[] { "Andrey Zvonov", "Andrii Buriachevskyi", "Andrii R", + "Andy Fiddaman", "Andy Liang", "Andy Yang", "AndyB", @@ -184,8 +189,11 @@ const char * auto_contributors[] { "Artur", "Artur Beglaryan", "Artur Filatenkov", + "Artur Malchanau", + "Ash Vardanian", "AsiaKorushkina", "Atri Sharma", + "Austin Kothig", "Avogar", "Azat Khuzhin", "BSD_Conqueror", @@ -201,9 +209,11 @@ const char * auto_contributors[] { "Bertrand Junqua", "Bharat Nallan", "Bharat Nallan Chakravarthy", + "Bhavna Jindal", "Big Elephant", "BigRedEye", "Bill", + "Bin Xie", "BiteTheDDDDt", "BlahGeek", "Bo Lu", @@ -232,6 +242,7 @@ const char * auto_contributors[] { "CheSema", "Chebarykov Pavel", "Chen Yufei", + "Chen768959", "Cheng Pan", "Chienlung Cheung", "Christian", @@ -256,12 +267,15 @@ const char * auto_contributors[] { "Dalitso Banda", "Dan Roscigno", "DanRoscigno", + "Dani Pozo", "Daniel Bershatsky", "Daniel Dao", "Daniel Kutenin", + "Daniel Pozo Escalona", "Daniel Qin", "Daniil Rubin", "Danila Kutenin", + "Daniël van Eeden", "Dao", "Dao Minh Thuc", "Daria Mozhaeva", @@ -269,6 +283,7 @@ const char * auto_contributors[] { "DarkWanderer", "Darío", "Dave Lahn", + "Davit Vardanyan", "Denis Burlaka", "Denis Glazachev", "Denis Krivak", @@ -358,6 +373,7 @@ const char * auto_contributors[] { "FgoDt", "Filatenkov Artur", "Filipe Caixeta", + "Filipp Ozinov", "Filippov Denis", "Flowyi", "Francisco Barón", @@ -408,6 +424,7 @@ const char * auto_contributors[] { "Hasnat", "Heena Bansal", "HeenaBansal2009", + "Hendrik M", "Hiroaki Nakamura", "Hongbin", "Hongbin Ma", @@ -460,6 +477,7 @@ const char * auto_contributors[] { "JackyWoo", "Jacob Hayes", "Jacob Herrington", + "Jai Jhala", "Jake Bamrah", "Jake Liu", "Jakub Kuklis", @@ -475,16 +493,21 @@ const char * auto_contributors[] { "Jean Baptiste Favre", "Jeffrey Dang", "Jiading Guo", + "Jianfei Hu", "Jiang Tao", + "Jiang Yuqing", "Jianmei Zhang", "Jiebin Sun", + "Jiyoung Yoo", "Joanna Hulboj", "Jochen Schalanda", + "Joe Lynch", "Joey", "Johannes Visintini", "John", "John Hummel", "John Skopis", + "John Spurlock", "Jonatas Freitas", "Jonathan-Ackerman", "Jordi", @@ -507,6 +530,7 @@ const char * auto_contributors[] { "Keiji Yoshida", "Ken Chen", "Ken MacInnis", + "Kenji Noguchi", "Kerry Clendinning", "Kevin Chiang", "Kevin Michel", @@ -533,6 +557,7 @@ const char * auto_contributors[] { "Korviakov Andrey", "Kostiantyn Storozhuk", "Kozlov Ivan", + "Krisztián Szűcs", "Kruglov Pavel", "Krzysztof Góralski", "Kseniia Sumarokova", @@ -565,6 +590,7 @@ const char * auto_contributors[] { "Lorenzo Mangani", "Loud_Scream", "Lucas Chang", + "Lucas Fernando Cardoso Nunes", "Lucid Dreams", "Luck-Chang", "Luis Bosque", @@ -623,6 +649,7 @@ const char * auto_contributors[] { "Maxim Smirnov", "Maxim Ulanovskiy", "MaximAL", + "Maximilian Roos", "Mc.Spring", "Meena Renganathan", "Meena-Renganathan", @@ -659,6 +686,7 @@ const char * auto_contributors[] { "Mikhail Gaidamaka", "Mikhail Guzov", "Mikhail Korotov", + "Mikhail Koviazin", "Mikhail Malafeev", "Mikhail Nacharov", "Mikhail Salosin", @@ -702,6 +730,7 @@ const char * auto_contributors[] { "Nikhil Raman", "Nikifor Seriakov", "Nikita", + "Nikita Keba", "Nikita Lapkov", "Nikita Mikhailov", "Nikita Mikhalev", @@ -765,6 +794,7 @@ const char * auto_contributors[] { "Peignon Melvyn", "Peng Jian", "Peng Liu", + "Pengyuan Bian", "Persiyanov Dmitriy Andreevich", "Pervakov Grigorii", "Pervakov Grigory", @@ -815,8 +845,10 @@ const char * auto_contributors[] { "Roman Vasin", "Roman Vlasenko", "Roman Zhukov", + "Rory Crispin", "Roy Bellingan", "Ruslan", + "Ruslan Mardugalliamov", "Ruslan Savchenko", "Russ Frank", "Ruzal Ibragimov", @@ -832,12 +864,16 @@ const char * auto_contributors[] { "Salvatore Mesoraca", "Sami Kerola", "Samuel Chou", + "Samuel Colvin", "San", + "Sanjam Panda", "Saulius Valatka", "Sean Haynes", "Sean Lafferty", + "Selfuppen", "Sema Checherinda", "Serg Kulakov", + "Serge Klochkov", "Serge Rider", "Sergei Bocharov", "Sergei Semin", @@ -848,6 +884,7 @@ const char * auto_contributors[] { "Sergey Demurin", "Sergey Elantsev", "Sergey Fedorov", + "Sergey Katkovskiy", "Sergey Kazmin", "Sergey Kislov", "Sergey Kononenko", @@ -883,6 +920,7 @@ const char * auto_contributors[] { "SmitaRKulkarni", "Snow", "Sofia Antipushina", + "Song Liyong", "Sorck", "Stanislav Dobrovolschii", "Stanislav Pavlovichev", @@ -893,6 +931,7 @@ const char * auto_contributors[] { "Stepan Herold", "Stephan", "Steve-金勇", + "StianBerger", "Stig Bakken", "Storozhuk Kostiantyn", "Stupnikov Andrey", @@ -909,6 +948,7 @@ const char * auto_contributors[] { "Tagir Kuskarov", "Tai White", "Taleh Zaliyev", + "Tanay Tummalapalli", "Tangaev", "Tanya Bragin", "Tatiana", @@ -954,6 +994,7 @@ const char * auto_contributors[] { "Val Doroshchuk", "Valentin Alexeev", "Valera Ryaboshapko", + "VanDarkholme7", "Varinara", "Vasily Kozhukhovskiy", "Vasily Morozov", @@ -977,6 +1018,7 @@ const char * auto_contributors[] { "Vitaliy Karnienko", "Vitaliy Kozlovskiy", "Vitaliy Lyudvichenko", + "Vitaliy Pashkov", "Vitaliy Zakaznikov", "Vitaly", "Vitaly Artemyev", @@ -1029,6 +1071,7 @@ const char * auto_contributors[] { "Yakov Olkhovskiy", "YalalovSM", "Yangkuan Liu", + "Yarik Briukhovetskyi", "Yatian Xu", "Yatsishin Ilya", "Yağızcan Değirmenci", @@ -1050,9 +1093,11 @@ const char * auto_contributors[] { "Yuriy Baranov", "Yuriy Chernyshov", "Yuriy Korzhenevskiy", + "Yury Bogomolov", "Yury Karpovich", "Yury Stankevich", "Yusuke Tanaka", + "Zach Naimon", "ZhiYong Wang", "Zhichang Yu", "Zhichun Wu", @@ -1136,6 +1181,7 @@ const char * auto_contributors[] { "caspian", "cekc", "centos7", + "cfanbo", "cfcz48", "cgp", "champtar", @@ -1143,6 +1189,7 @@ const char * auto_contributors[] { "changvvb", "chasingegg", "chen", + "chen768959", "chen9t", "chengy8934", "chenjian", @@ -1163,6 +1210,7 @@ const char * auto_contributors[] { "cnmade", "comunodi", "congbaoyangrou", + "copperybean", "coraxster", "cwkyaoyao", "d.v.semenov", @@ -1172,6 +1220,7 @@ const char * auto_contributors[] { "daoready", "darkkeks", "dasmfm", + "daviddhc20120601", "davydovska", "decaseal", "dependabot-preview[bot]", @@ -1179,6 +1228,7 @@ const char * auto_contributors[] { "detailyang", "dfenelonov", "dgrr", + "dheerajathrey", "dimarub2000", "dinosaur", "divanorama", @@ -1193,6 +1243,7 @@ const char * auto_contributors[] { "eaxdev", "eejoin", "egatov", + "ekrasikov", "elBroom", "elenaspb2019", "elevankoff", @@ -1247,6 +1298,7 @@ const char * auto_contributors[] { "guov100", "guyco87", "guykohen", + "gyfis", "gyuton", "hanqf-git", "hao.he", @@ -1255,6 +1307,7 @@ const char * auto_contributors[] { "hcz", "heleihelei", "helifu", + "hendrik-m", "heng zhao", "hermano", "hexiaoting", @@ -1277,6 +1330,7 @@ const char * auto_contributors[] { "ikopylov", "imgbot[bot]", "ip", + "irenjj", "ismailakpolat", "it1804", "ivan-klass", @@ -1296,11 +1350,14 @@ const char * auto_contributors[] { "jiahui-97", "jianmei zhang", "jinjunzh", + "jiyoungyoooo", "jkuklis", "johanngan", + "jsc0218", "jthmath", "jun won", "jus1096", + "justindeguzman", "jyz0309", "karnevil13", "kashwy", @@ -1317,6 +1374,7 @@ const char * auto_contributors[] { "kolsys", "konnectr", "koshachy", + "kothiga", "kreuzerkrieg", "ks1322", "kshvakov", @@ -1329,6 +1387,7 @@ const char * auto_contributors[] { "lanfz", "larryluogit", "laurieliyang", + "lcjh", "lehasm", "leosunli", "leozhang", @@ -1435,6 +1494,7 @@ const char * auto_contributors[] { "ni1l", "nicelulu", "nickzhwang", + "nikitakeba", "nikitamikhaylov", "nonexistence", "ns-vasilev", @@ -1455,6 +1515,7 @@ const char * auto_contributors[] { "pawelsz-rb", "pdai", "pdv-ru", + "pedro.riera", "pengxiangcai", "peshkurov", "peter279k", @@ -1504,6 +1565,7 @@ const char * auto_contributors[] { "satanson", "save-my-heart", "sdk2", + "selfuppen", "serebrserg", "serxa", "sev7e0", @@ -1548,8 +1610,10 @@ const char * auto_contributors[] { "teng.ma", "terrylin", "tesw yew isal", + "therealnick233", "tianzhou", "tiger.yan", + "timfursov", "tison", "topvisor", "tpanetti", @@ -1563,6 +1627,7 @@ const char * auto_contributors[] { "usurai", "vahid-sohrabloo", "vdimir", + "velavokr", "velom", "vesslanjin", "vgocoder", @@ -1587,17 +1652,23 @@ const char * auto_contributors[] { "wuxiaobai24", "wzl", "xPoSx", + "xbthink", + "xiao", + "xiaolei565", + "xiebin", "xiedeyantu", "xieyichen", "xinhuitian", "xlwh", "xmy", + "xuelei", "yakkomajuri", "yakov-olkhovskiy", "yandd", "yang", "yangshuai", "yaqi-zhao", + "yariks5s", "yeer", "ygrek", "yhgcn", @@ -1654,6 +1725,7 @@ const char * auto_contributors[] { "Дмитрий Канатников", "Иванов Евгений", "Илья Исаев", + "Илья Коргун", "Коренберг Марк", "Коренберг ☢️ Марк", "Павел Литвиненко", @@ -1678,6 +1750,7 @@ const char * auto_contributors[] { "李扬", "极客青年", "枢木", + "王智博", "董海镔", "谢磊", "贾顺名(Jarvis)", diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp index 67867b6c577..bae7a266dcd 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp @@ -290,7 +290,7 @@ void StorageSystemDDLWorkerQueue::fillData(MutableColumns & res_columns, Context } else { - throw Coordination::Exception(maybe_finished_hosts.error, fs::path(task.entry_path) / "finished"); + throw Coordination::Exception::fromPath(maybe_finished_hosts.error, fs::path(task.entry_path) / "finished"); } /// Process active nodes @@ -322,7 +322,7 @@ void StorageSystemDDLWorkerQueue::fillData(MutableColumns & res_columns, Context } else { - throw Coordination::Exception(maybe_active_hosts.error, fs::path(task.entry_path) / "active"); + throw Coordination::Exception::fromPath(maybe_active_hosts.error, fs::path(task.entry_path) / "active"); } /// Process the rest hosts diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index 2649cf71182..0c4eb197efd 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -185,21 +186,9 @@ Pipe StorageSystemDataSkippingIndices::read( size_t /* num_streams */) { storage_snapshot->check(column_names); - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample_block = storage_snapshot->metadata->getSampleBlock(); - Block header; - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample_block.getByPosition(i)); - } - } + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); MutableColumnPtr column = ColumnString::create(); diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 2fcc91e49bb..1fa94fab7bf 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -117,13 +117,23 @@ void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr c const auto & database = databases.at(database_name); - res_columns[0]->insert(database_name); - res_columns[1]->insert(database->getEngineName()); - res_columns[2]->insert(context->getPath() + database->getDataPath()); - res_columns[3]->insert(database->getMetadataPath()); - res_columns[4]->insert(database->getUUID()); - res_columns[5]->insert(getEngineFull(context, database)); - res_columns[6]->insert(database->getDatabaseComment()); + size_t src_index = 0; + size_t res_index = 0; + const auto & columns_mask = query_info.columns_mask; + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database_name); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getEngineName()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(context->getPath() + database->getDataPath()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getMetadataPath()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getUUID()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(getEngineFull(context, database)); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getDatabaseComment()); } } diff --git a/src/Storages/System/StorageSystemDatabases.h b/src/Storages/System/StorageSystemDatabases.h index 37c5f97d497..29dd786ca0a 100644 --- a/src/Storages/System/StorageSystemDatabases.h +++ b/src/Storages/System/StorageSystemDatabases.h @@ -26,6 +26,8 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; + bool supportsColumnsMask() const override { return true; } + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; }; diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 97af4094e42..a9cd5f2610a 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -2,10 +2,12 @@ #include #include +#include #include #include #include #include +#include #include #include #include @@ -81,13 +83,11 @@ struct WorkerState class DetachedPartsSource : public ISource { public: - DetachedPartsSource(Block header_, std::shared_ptr state_, std::vector columns_mask_, UInt64 block_size_, - bool has_bytes_on_disk_column_) + DetachedPartsSource(Block header_, std::shared_ptr state_, std::vector columns_mask_, UInt64 block_size_) : ISource(std::move(header_)) , state(state_) , columns_mask(std::move(columns_mask_)) , block_size(block_size_) - , has_bytes_on_disk_column(has_bytes_on_disk_column_) {} String getName() const override { return "DataPartsSource"; } @@ -127,7 +127,6 @@ private: std::shared_ptr state; const std::vector columns_mask; const UInt64 block_size; - const bool has_bytes_on_disk_column; const size_t support_threads = 35; StoragesInfo current_info; @@ -149,9 +148,6 @@ private: void calculatePartSizeOnDisk(size_t begin, std::vector> & parts_sizes) { - if (!has_bytes_on_disk_column) - return; - WorkerState worker_state; for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) @@ -211,7 +207,9 @@ private: auto begin = detached_parts.size() - rows; std::vector> parts_sizes(rows); - calculatePartSizeOnDisk(begin, parts_sizes); + constexpr size_t bytes_on_disk_col_idx = 4; + if (columns_mask[bytes_on_disk_col_idx]) + calculatePartSizeOnDisk(begin, parts_sizes); for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) { @@ -229,10 +227,23 @@ private: new_columns[res_index++]->insert(p.dir_name); if (columns_mask[src_index++]) { - chassert(has_bytes_on_disk_column); + chassert(src_index - 1 == bytes_on_disk_col_idx); size_t bytes_on_disk = parts_sizes.at(p_id - begin).load(); new_columns[res_index++]->insert(bytes_on_disk); } + if (columns_mask[src_index++]) + { + Poco::Timestamp modification_time{}; + try + { + modification_time = p.disk->getLastModified(fs::path(current_info.data->getRelativeDataPath()) / MergeTreeData::DETACHED_DIR_NAME / p.dir_name); + } + catch (const fs::filesystem_error &) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + new_columns[res_index++]->insert(static_cast(modification_time.epochTime())); + } if (columns_mask[src_index++]) new_columns[res_index++]->insert(p.disk->getName()); if (columns_mask[src_index++]) @@ -263,12 +274,13 @@ StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_i {"partition_id", std::make_shared(std::make_shared())}, {"name", std::make_shared()}, {"bytes_on_disk", std::make_shared()}, + {"modification_time",std::make_shared()}, {"disk", std::make_shared()}, {"path", std::make_shared()}, {"reason", std::make_shared(std::make_shared())}, {"min_block_number", std::make_shared(std::make_shared())}, {"max_block_number", std::make_shared(std::make_shared())}, - {"level", std::make_shared(std::make_shared())} + {"level", std::make_shared(std::make_shared())}, }}); setInMemoryMetadata(storage_metadata); } @@ -285,21 +297,7 @@ Pipe StorageSystemDetachedParts::read( storage_snapshot->check(column_names); Block sample_block = storage_snapshot->metadata->getSampleBlock(); - NameSet names_set(column_names.begin(), column_names.end()); - - Block header; - std::vector columns_mask(sample_block.columns()); - - for (size_t i = 0; i < columns_mask.size(); ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample_block.getByPosition(i)); - } - } - - bool has_bytes_on_disk_column = names_set.contains("bytes_on_disk"); + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); auto state = std::make_shared(StoragesInfoStream(query_info, context)); @@ -307,7 +305,7 @@ Pipe StorageSystemDetachedParts::read( for (size_t i = 0; i < num_streams; ++i) { - auto source = std::make_shared(header.cloneEmpty(), state, columns_mask, max_block_size, has_bytes_on_disk_column); + auto source = std::make_shared(header.cloneEmpty(), state, columns_mask, max_block_size); pipe.addSource(std::move(source)); } diff --git a/src/Storages/System/StorageSystemEvents.cpp b/src/Storages/System/StorageSystemEvents.cpp index b9b07cfe0ac..a914c60abf4 100644 --- a/src/Storages/System/StorageSystemEvents.cpp +++ b/src/Storages/System/StorageSystemEvents.cpp @@ -16,6 +16,13 @@ NamesAndTypesList StorageSystemEvents::getNamesAndTypes() }; } +NamesAndAliases StorageSystemEvents::getNamesAndAliases() +{ + return { + {"name", std::make_shared(), "event"} + }; +} + void StorageSystemEvents::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) diff --git a/src/Storages/System/StorageSystemEvents.h b/src/Storages/System/StorageSystemEvents.h index ea0322c13b8..b2e4bac072a 100644 --- a/src/Storages/System/StorageSystemEvents.h +++ b/src/Storages/System/StorageSystemEvents.h @@ -17,6 +17,8 @@ public: static NamesAndTypesList getNamesAndTypes(); + static NamesAndAliases getNamesAndAliases(); + protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp index e03fd9ca081..b6126f2e032 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -1,7 +1,7 @@ #include "StorageSystemFilesystemCache.h" #include #include -#include +#include #include #include #include @@ -28,6 +28,7 @@ NamesAndTypesList StorageSystemFilesystemCache::getNamesAndTypes() {"downloaded_size", std::make_shared()}, {"kind", std::make_shared()}, {"unbound", std::make_shared>()}, + {"file_size", std::make_shared(std::make_shared())}, }; } @@ -43,9 +44,8 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex for (const auto & [cache_name, cache_data] : caches) { const auto & cache = cache_data->cache; - auto file_segments = cache->getSnapshot(); - - for (const auto & file_segment : *file_segments) + const auto file_segments = cache->getSnapshot(); + for (const auto & file_segment : file_segments) { size_t i = 0; res_columns[i++]->insert(cache_name); @@ -53,7 +53,8 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex /// Do not use `file_segment->getPathInLocalCache` here because it will lead to nullptr dereference /// (because file_segments in getSnapshot doesn't have `cache` field set) - res_columns[i++]->insert(cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->getKind())); + const auto path = cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->getKind()); + res_columns[i++]->insert(path); res_columns[i++]->insert(file_segment->key().toString()); const auto & range = file_segment->range(); @@ -63,9 +64,16 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex res_columns[i++]->insert(FileSegment::stateToString(file_segment->state())); res_columns[i++]->insert(file_segment->getHitsCount()); res_columns[i++]->insert(file_segment->getRefCount()); - res_columns[i++]->insert(file_segment->getDownloadedSize(false)); + res_columns[i++]->insert(file_segment->getDownloadedSize()); res_columns[i++]->insert(toString(file_segment->getKind())); res_columns[i++]->insert(file_segment->isUnbound()); + + std::error_code ec; + auto size = fs::file_size(path, ec); + if (!ec) + res_columns[i++]->insert(size); + else + res_columns[i++]->insertDefault(); } } } diff --git a/src/Storages/System/StorageSystemFilesystemCache.h b/src/Storages/System/StorageSystemFilesystemCache.h index da93adc8d55..cc5c8d12f79 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.h +++ b/src/Storages/System/StorageSystemFilesystemCache.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { diff --git a/src/Storages/System/StorageSystemKafkaConsumers.cpp b/src/Storages/System/StorageSystemKafkaConsumers.cpp new file mode 100644 index 00000000000..eb7d84603c0 --- /dev/null +++ b/src/Storages/System/StorageSystemKafkaConsumers.cpp @@ -0,0 +1,175 @@ +#include "config.h" + +#if USE_RDKAFKA + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "base/types.h" + +namespace DB +{ + +NamesAndTypesList StorageSystemKafkaConsumers::getNamesAndTypes() +{ + NamesAndTypesList names_and_types{ + {"database", std::make_shared()}, + {"table", std::make_shared()}, + {"consumer_id", std::make_shared()}, //(number? or string? - single clickhouse table can have many consumers) + {"assignments.topic", std::make_shared(std::make_shared())}, + {"assignments.partition_id", std::make_shared(std::make_shared())}, + {"assignments.current_offset", std::make_shared(std::make_shared())}, + {"exceptions.time", std::make_shared(std::make_shared())}, + {"exceptions.text", std::make_shared(std::make_shared())}, + {"last_poll_time", std::make_shared()}, + {"num_messages_read", std::make_shared()}, + {"last_commit_time", std::make_shared()}, + {"num_commits", std::make_shared()}, + {"last_rebalance_time", std::make_shared()}, + {"num_rebalance_revocations", std::make_shared()}, + {"num_rebalance_assignments", std::make_shared()}, + {"is_currently_used", std::make_shared()}, + {"rdkafka_stat", std::make_shared()}, + }; + return names_and_types; +} + +void StorageSystemKafkaConsumers::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +{ + auto tables_mark_dropped = DatabaseCatalog::instance().getTablesMarkedDropped(); + + size_t index = 0; + + + auto & database = assert_cast(*res_columns[index++]); + auto & table = assert_cast(*res_columns[index++]); + auto & consumer_id = assert_cast(*res_columns[index++]); //(number? or string? - single clickhouse table can have many consumers) + + auto & assigments_topics = assert_cast(assert_cast(*res_columns[index]).getData()); + auto & assigments_topics_offsets = assert_cast(*res_columns[index++]).getOffsets(); + + auto & assigments_partition_id = assert_cast(assert_cast(*res_columns[index]).getData()); + auto & assigments_partition_id_offsets = assert_cast(*res_columns[index++]).getOffsets(); + + auto & assigments_current_offset = assert_cast(assert_cast(*res_columns[index]).getData()); + auto & assigments_current_offset_offsets = assert_cast(*res_columns[index++]).getOffsets(); + + auto & exceptions_time = assert_cast(assert_cast(*res_columns[index]).getData()); + auto & exceptions_time_offset = assert_cast(*res_columns[index++]).getOffsets(); + auto & exceptions_text = assert_cast(assert_cast(*res_columns[index]).getData()); + auto & exceptions_text_offset = assert_cast(*res_columns[index++]).getOffsets(); + auto & last_poll_time = assert_cast(*res_columns[index++]); + auto & num_messages_read = assert_cast(*res_columns[index++]); + auto & last_commit_time = assert_cast(*res_columns[index++]); + auto & num_commits = assert_cast(*res_columns[index++]); + auto & last_rebalance_time = assert_cast(*res_columns[index++]); + auto & num_rebalance_revocations = assert_cast(*res_columns[index++]); + auto & num_rebalance_assigments = assert_cast(*res_columns[index++]); + auto & is_currently_used = assert_cast(*res_columns[index++]); + auto & rdkafka_stat = assert_cast(*res_columns[index++]); + + const auto access = context->getAccess(); + size_t last_assignment_num = 0; + size_t exceptions_num = 0; + + auto add_row = [&](const DatabaseTablesIteratorPtr & it, StorageKafka * storage_kafka_ptr) + { + if (!access->isGranted(AccessType::SHOW_TABLES, it->databaseName(), it->name())) + { + return; + } + + std::string database_str = it->databaseName(); + std::string table_str = it->name(); + + auto safe_consumers = storage_kafka_ptr->getSafeConsumers(); + + for (const auto & weak_consumer : safe_consumers.consumers) + { + if (auto consumer = weak_consumer.lock()) + { + auto consumer_stat = consumer->getStat(); + + database.insertData(database_str.data(), database_str.size()); + table.insertData(table_str.data(), table_str.size()); + + consumer_id.insertData(consumer_stat.consumer_id.data(), consumer_stat.consumer_id.size()); + + const auto num_assignnemts = consumer_stat.assignments.size(); + + for (size_t num = 0; num < num_assignnemts; ++num) + { + const auto & assign = consumer_stat.assignments[num]; + + assigments_topics.insertData(assign.topic_str.data(), assign.topic_str.size()); + + assigments_partition_id.insert(assign.partition_id); + assigments_current_offset.insert(assign.current_offset); + } + last_assignment_num += num_assignnemts; + + assigments_topics_offsets.push_back(last_assignment_num); + assigments_partition_id_offsets.push_back(last_assignment_num); + assigments_current_offset_offsets.push_back(last_assignment_num); + + for (const auto & exc : consumer_stat.exceptions_buffer) + { + exceptions_text.insertData(exc.text.data(), exc.text.size()); + exceptions_time.insert(exc.timestamp_usec); + } + exceptions_num += consumer_stat.exceptions_buffer.size(); + exceptions_text_offset.push_back(exceptions_num); + exceptions_time_offset.push_back(exceptions_num); + + + last_poll_time.insert(consumer_stat.last_poll_time); + num_messages_read.insert(consumer_stat.num_messages_read); + last_commit_time.insert(consumer_stat.last_commit_timestamp_usec); + num_commits.insert(consumer_stat.num_commits); + last_rebalance_time.insert(consumer_stat.last_rebalance_timestamp_usec); + + num_rebalance_revocations.insert(consumer_stat.num_rebalance_revocations); + num_rebalance_assigments.insert(consumer_stat.num_rebalance_assignments); + + is_currently_used.insert(consumer_stat.in_use); + + rdkafka_stat.insertData(consumer_stat.rdkafka_stat.data(), consumer_stat.rdkafka_stat.size()); + } + } + }; + + const bool show_tables_granted = access->isGranted(AccessType::SHOW_TABLES); + + if (show_tables_granted) + { + auto databases = DatabaseCatalog::instance().getDatabases(); + for (const auto & db : databases) + { + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) + { + StoragePtr storage = iterator->table(); + if (auto * kafka_table = dynamic_cast(storage.get())) + { + add_row(iterator, kafka_table); + } + } + } + + } +} + +} + +#endif diff --git a/src/Storages/System/StorageSystemKafkaConsumers.h b/src/Storages/System/StorageSystemKafkaConsumers.h new file mode 100644 index 00000000000..eda3a39bc7e --- /dev/null +++ b/src/Storages/System/StorageSystemKafkaConsumers.h @@ -0,0 +1,27 @@ +#pragma once + +#include "config.h" + +#if USE_RDKAFKA + + +#include + + +namespace DB +{ + +class StorageSystemKafkaConsumers final : public IStorageSystemOneBlock +{ +public: + std::string getName() const override { return "SystemKafkaConsumers"; } + static NamesAndTypesList getNamesAndTypes(); + +protected: + using IStorageSystemOneBlock::IStorageSystemOneBlock; + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; +}; + +} + +#endif diff --git a/src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp b/src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp deleted file mode 100644 index 3bb92814a2f..00000000000 --- a/src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include - -#if USE_ROCKSDB -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -NamesAndTypesList StorageSystemMergeTreeMetadataCache::getNamesAndTypes() -{ - return { - {"key", std::make_shared()}, - {"value", std::make_shared()}, - }; -} - -static bool extractKeyImpl(const IAST & elem, String & res, bool & precise) -{ - const auto * function = elem.as(); - if (!function) - return false; - - if (function->name == "and") - { - for (const auto & child : function->arguments->children) - { - bool tmp_precise = false; - if (extractKeyImpl(*child, res, tmp_precise)) - { - precise = tmp_precise; - return true; - } - } - return false; - } - - if (function->name == "equals" || function->name == "like") - { - const auto & args = function->arguments->as(); - const IAST * value; - - if (args.children.size() != 2) - return false; - - const ASTIdentifier * ident; - if ((ident = args.children.at(0)->as())) - value = args.children.at(1).get(); - else if ((ident = args.children.at(1)->as())) - value = args.children.at(0).get(); - else - return false; - - if (ident->name() != "key") - return false; - - const auto * literal = value->as(); - if (!literal) - return false; - - if (literal->value.getType() != Field::Types::String) - return false; - - res = literal->value.safeGet(); - precise = function->name == "equals"; - return true; - } - return false; -} - - -/// Retrieve from the query a condition of the form `key= 'key'`, from conjunctions in the WHERE clause. -static String extractKey(const ASTPtr & query, bool& precise) -{ - const auto & select = query->as(); - if (!select.where()) - return ""; - - String res; - return extractKeyImpl(*select.where(), res, precise) ? res : ""; -} - - -void StorageSystemMergeTreeMetadataCache::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const -{ - bool precise = false; - String key = extractKey(query_info.query, precise); - if (key.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' " - "or key LIKE 'prefix%' in WHERE clause."); - - auto cache = context->getMergeTreeMetadataCache(); - if (precise) - { - String value; - if (cache->get(key, value) != MergeTreeMetadataCache::Status::OK()) - return; - - size_t col_num = 0; - res_columns[col_num++]->insert(key); - res_columns[col_num++]->insert(value); - } - else - { - String target = extractFixedPrefixFromLikePattern(key, /*requires_perfect_prefix*/ false); - if (target.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' " - "or key LIKE 'prefix%' in WHERE clause."); - - Strings keys; - Strings values; - keys.reserve(4096); - values.reserve(4096); - cache->getByPrefix(target, keys, values); - if (keys.empty()) - return; - - assert(keys.size() == values.size()); - for (size_t i = 0; i < keys.size(); ++i) - { - size_t col_num = 0; - res_columns[col_num++]->insert(keys[i]); - res_columns[col_num++]->insert(values[i]); - } - } -} - -} -#endif diff --git a/src/Storages/System/StorageSystemMergeTreeMetadataCache.h b/src/Storages/System/StorageSystemMergeTreeMetadataCache.h deleted file mode 100644 index 4603583227e..00000000000 --- a/src/Storages/System/StorageSystemMergeTreeMetadataCache.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_ROCKSDB -#include - - -namespace DB -{ -class Context; - - -/// Implements `merge_tree_metadata_cache` system table, which allows you to view the metadata cache data in rocksdb for testing purposes. -class StorageSystemMergeTreeMetadataCache : public IStorageSystemOneBlock -{ -public: - std::string getName() const override { return "SystemMergeTreeMetadataCache"; } - - static NamesAndTypesList getNamesAndTypes(); - -protected: - using IStorageSystemOneBlock::IStorageSystemOneBlock; - - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; -}; - -} -#endif diff --git a/src/Storages/System/StorageSystemMerges.cpp b/src/Storages/System/StorageSystemMerges.cpp index 1f32a0ff700..2dbe2964eb9 100644 --- a/src/Storages/System/StorageSystemMerges.cpp +++ b/src/Storages/System/StorageSystemMerges.cpp @@ -20,6 +20,7 @@ NamesAndTypesList StorageSystemMerges::getNamesAndTypes() {"source_part_paths", std::make_shared(std::make_shared())}, {"result_part_path", std::make_shared()}, {"partition_id", std::make_shared()}, + {"partition", std::make_shared()}, {"is_mutation", std::make_shared()}, {"total_size_bytes_compressed", std::make_shared()}, {"total_size_bytes_uncompressed", std::make_shared()}, @@ -58,6 +59,7 @@ void StorageSystemMerges::fillData(MutableColumns & res_columns, ContextPtr cont res_columns[i++]->insert(merge.source_part_paths); res_columns[i++]->insert(merge.result_part_path); res_columns[i++]->insert(merge.partition_id); + res_columns[i++]->insert(merge.partition); res_columns[i++]->insert(merge.is_mutation); res_columns[i++]->insert(merge.total_size_bytes_compressed); res_columns[i++]->insert(merge.total_size_bytes_uncompressed); diff --git a/src/Storages/System/StorageSystemMetrics.cpp b/src/Storages/System/StorageSystemMetrics.cpp index 6007c8a7c71..ced363fed17 100644 --- a/src/Storages/System/StorageSystemMetrics.cpp +++ b/src/Storages/System/StorageSystemMetrics.cpp @@ -17,6 +17,13 @@ NamesAndTypesList StorageSystemMetrics::getNamesAndTypes() }; } +NamesAndAliases StorageSystemMetrics::getNamesAndAliases() +{ + return { + {"name", std::make_shared(), "metric"} + }; +} + void StorageSystemMetrics::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const { for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) diff --git a/src/Storages/System/StorageSystemMetrics.h b/src/Storages/System/StorageSystemMetrics.h index def95e0a934..e3e2c07014f 100644 --- a/src/Storages/System/StorageSystemMetrics.h +++ b/src/Storages/System/StorageSystemMetrics.h @@ -18,6 +18,8 @@ public: static NamesAndTypesList getNamesAndTypes(); + static NamesAndAliases getNamesAndAliases(); + protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 0979b9d9371..513af6cfc46 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -254,21 +255,10 @@ Pipe StorageSystemPartsBase::read( StoragesInfoStream stream(query_info, context); /// Create the result. - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample = storage_snapshot->metadata->getSampleBlock(); - Block header; - std::vector columns_mask(sample.columns()); - for (size_t i = 0; i < sample.columns(); ++i) - { - if (names_set.contains(sample.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample.getByPosition(i)); - } - } + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample, column_names); + MutableColumns res_columns = header.cloneEmptyColumns(); if (has_state_column) res_columns.push_back(ColumnString::create()); diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp index 288e4fd52a0..03757101ddf 100644 --- a/src/Storages/System/StorageSystemQueryCache.cpp +++ b/src/Storages/System/StorageSystemQueryCache.cpp @@ -50,7 +50,7 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr res_columns[3]->insert(key.is_shared); res_columns[4]->insert(key.is_compressed); res_columns[5]->insert(std::chrono::system_clock::to_time_t(key.expires_at)); - res_columns[6]->insert(key.ast->getTreeHash().first); + res_columns[6]->insert(key.ast->getTreeHash().low64); } } diff --git a/src/Storages/System/StorageSystemRoleGrants.cpp b/src/Storages/System/StorageSystemRoleGrants.cpp index cf5a24f88cd..8fcd0fd7cf5 100644 --- a/src/Storages/System/StorageSystemRoleGrants.cpp +++ b/src/Storages/System/StorageSystemRoleGrants.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ NamesAndTypesList StorageSystemRoleGrants::getNamesAndTypes() {"user_name", std::make_shared(std::make_shared())}, {"role_name", std::make_shared(std::make_shared())}, {"granted_role_name", std::make_shared()}, + {"granted_role_id", std::make_shared()}, {"granted_role_is_default", std::make_shared()}, {"with_admin_option", std::make_shared()}, }; @@ -45,12 +47,14 @@ void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, ContextPtr auto & column_role_name = assert_cast(assert_cast(*res_columns[column_index]).getNestedColumn()); auto & column_role_name_null_map = assert_cast(*res_columns[column_index++]).getNullMapData(); auto & column_granted_role_name = assert_cast(*res_columns[column_index++]); + auto & column_granted_role_id = assert_cast(*res_columns[column_index++]).getData(); auto & column_is_default = assert_cast(*res_columns[column_index++]).getData(); auto & column_admin_option = assert_cast(*res_columns[column_index++]).getData(); auto add_row = [&](const String & grantee_name, AccessEntityType grantee_type, const String & granted_role_name, + const UUID & granted_role_id, bool is_default, bool with_admin_option) { @@ -72,6 +76,7 @@ void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, ContextPtr assert(false); column_granted_role_name.insertData(granted_role_name.data(), granted_role_name.length()); + column_granted_role_id.push_back(granted_role_id.toUnderType()); column_is_default.push_back(is_default); column_admin_option.push_back(with_admin_option); }; @@ -90,7 +95,7 @@ void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, ContextPtr continue; bool is_default = !default_roles || default_roles->match(role_id); - add_row(grantee_name, grantee_type, *role_name, is_default, element.admin_option); + add_row(grantee_name, grantee_type, *role_name, role_id, is_default, element.admin_option); } } }; diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp new file mode 100644 index 00000000000..9f4307fca3a --- /dev/null +++ b/src/Storages/System/StorageSystemScheduler.cpp @@ -0,0 +1,107 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "IO/ResourceRequest.h" + + +namespace DB +{ + +NamesAndTypesList StorageSystemScheduler::getNamesAndTypes() +{ + NamesAndTypesList names_and_types{ + {"resource", std::make_shared()}, + {"path", std::make_shared()}, + {"type", std::make_shared()}, + {"weight", std::make_shared()}, + {"priority", std::make_shared()}, + {"is_active", std::make_shared()}, + {"active_children", std::make_shared()}, + {"dequeued_requests", std::make_shared()}, + {"dequeued_cost", std::make_shared()}, + {"busy_periods", std::make_shared()}, + {"vruntime", std::make_shared(std::make_shared())}, + {"system_vruntime", std::make_shared(std::make_shared())}, + {"queue_length", std::make_shared(std::make_shared())}, + {"queue_cost", std::make_shared(std::make_shared())}, + {"budget", std::make_shared(std::make_shared())}, + {"is_satisfied", std::make_shared(std::make_shared())}, + {"inflight_requests", std::make_shared(std::make_shared())}, + {"inflight_cost", std::make_shared(std::make_shared())}, + {"max_requests", std::make_shared(std::make_shared())}, + {"max_cost", std::make_shared(std::make_shared())}, + }; + return names_and_types; +} + + +void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +{ + context->getResourceManager()->forEachNode([&] (const String & resource, const String & path, const String & type, const SchedulerNodePtr & node) + { + size_t i = 0; + res_columns[i++]->insert(resource); + res_columns[i++]->insert(path); + res_columns[i++]->insert(type); + res_columns[i++]->insert(node->info.weight); + res_columns[i++]->insert(node->info.priority.value); + res_columns[i++]->insert(node->isActive()); + res_columns[i++]->insert(node->activeChildren()); + res_columns[i++]->insert(node->dequeued_requests.load()); + res_columns[i++]->insert(node->dequeued_cost.load()); + res_columns[i++]->insert(node->busy_periods.load()); + + Field vruntime; + Field system_vruntime; + Field queue_length; + Field queue_cost; + Field budget; + Field is_satisfied; + Field inflight_requests; + Field inflight_cost; + Field max_requests; + Field max_cost; + + if (auto * parent = dynamic_cast(node->parent)) + { + if (auto value = parent->getChildVRuntime(node.get())) + vruntime = *value; + } + if (auto * ptr = dynamic_cast(node.get())) + system_vruntime = ptr->getSystemVRuntime(); + if (auto * ptr = dynamic_cast(node.get())) + std::tie(queue_length, queue_cost) = ptr->getQueueLengthAndCost(); + if (auto * ptr = dynamic_cast(node.get())) + budget = ptr->getBudget(); + if (auto * ptr = dynamic_cast(node.get())) + is_satisfied = ptr->isSatisfied(); + if (auto * ptr = dynamic_cast(node.get())) + { + std::tie(inflight_requests, inflight_cost) = ptr->getInflights(); + std::tie(max_requests, max_cost) = ptr->getLimits(); + } + + res_columns[i++]->insert(vruntime); + res_columns[i++]->insert(system_vruntime); + res_columns[i++]->insert(queue_length); + res_columns[i++]->insert(queue_cost); + res_columns[i++]->insert(budget); + res_columns[i++]->insert(is_satisfied); + res_columns[i++]->insert(inflight_requests); + res_columns[i++]->insert(inflight_cost); + res_columns[i++]->insert(max_requests); + res_columns[i++]->insert(max_cost); + }); +} + +} diff --git a/src/Storages/System/StorageSystemScheduler.h b/src/Storages/System/StorageSystemScheduler.h new file mode 100644 index 00000000000..31d14862209 --- /dev/null +++ b/src/Storages/System/StorageSystemScheduler.h @@ -0,0 +1,22 @@ +#pragma once + +#include + + +namespace DB +{ +class Context; + +/// Implements `system.scheduler` table, which allows you to get information about scheduling nodes. +class StorageSystemScheduler final : public IStorageSystemOneBlock +{ +public: + std::string getName() const override { return "SystemScheduler"; } + static NamesAndTypesList getNamesAndTypes(); + +protected: + using IStorageSystemOneBlock::IStorageSystemOneBlock; + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; +}; + +} diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp index b11fc137d8c..a19cb1442c9 100644 --- a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp +++ b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp @@ -3,8 +3,11 @@ #include #include #include +#include #include #include +#include +#include #include #include #include @@ -36,7 +39,8 @@ NamesAndTypesList StorageSystemSchemaInferenceCache::getNamesAndTypes() {"format", std::make_shared()}, {"additional_format_info", std::make_shared()}, {"registration_time", std::make_shared()}, - {"schema", std::make_shared()} + {"schema", std::make_shared(std::make_shared())}, + {"number_of_rows", std::make_shared(std::make_shared())} }; } @@ -52,7 +56,14 @@ static void fillDataImpl(MutableColumns & res_columns, SchemaCache & schema_cach res_columns[2]->insert(key.format); res_columns[3]->insert(key.additional_format_info); res_columns[4]->insert(schema_info.registration_time); - res_columns[5]->insert(getSchemaString(schema_info.columns)); + if (schema_info.columns) + res_columns[5]->insert(getSchemaString(*schema_info.columns)); + else + res_columns[5]->insertDefault(); + if (schema_info.num_rows) + res_columns[6]->insert(*schema_info.num_rows); + else + res_columns[6]->insertDefault(); } } @@ -66,6 +77,9 @@ void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, C fillDataImpl(res_columns, StorageHDFS::getSchemaCache(context), "HDFS"); #endif fillDataImpl(res_columns, StorageURL::getSchemaCache(context), "URL"); +#if USE_AZURE_BLOB_STORAGE + fillDataImpl(res_columns, StorageAzureBlob::getSchemaCache(context), "Azure"); +#endif } } diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.h b/src/Storages/System/StorageSystemSchemaInferenceCache.h index 357bd687da6..e3afc6e1e38 100644 --- a/src/Storages/System/StorageSystemSchemaInferenceCache.h +++ b/src/Storages/System/StorageSystemSchemaInferenceCache.h @@ -1,7 +1,7 @@ #pragma once -#include #include +#include namespace DB { diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index 887d7f2a5d4..8d703632c68 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -275,15 +275,6 @@ Pipe StorageSystemStackTrace::read( Block sample_block = storage_snapshot->metadata->getSampleBlock(); - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - } - } - bool send_signal = names_set.contains("trace") || names_set.contains("query_id"); bool read_thread_names = names_set.contains("thread_name"); diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index e00d2d95568..715c98ee92a 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -107,6 +108,22 @@ static ColumnPtr getFilteredTables(const ASTPtr & query, const ColumnPtr & filte return block.getByPosition(0).column; } +/// Avoid heavy operation on tables if we only queried columns that we can get without table object. +/// Otherwise it will require table initialization for Lazy database. +static bool needTable(const DatabasePtr & database, const Block & header) +{ + if (database->getEngineName() != "Lazy") + return true; + + static const std::set columns_without_table = { "database", "name", "uuid", "metadata_modification_time" }; + for (const auto & column : header.getColumnsWithTypeAndName()) + { + if (columns_without_table.find(column.name) == columns_without_table.end()) + return true; + } + return false; +} + class TablesBlockSource : public ISource { @@ -265,6 +282,8 @@ protected: if (!tables_it || !tables_it->isValid()) tables_it = database->getTablesIterator(context); + const bool need_table = needTable(database, getPort().getHeader()); + for (; rows_count < max_block_size && tables_it->isValid(); tables_it->next()) { auto table_name = tables_it->name(); @@ -274,23 +293,27 @@ protected: if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) continue; - StoragePtr table = tables_it->table(); - if (!table) - // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) - continue; - + StoragePtr table = nullptr; TableLockHolder lock; - /// The only column that requires us to hold a shared lock is data_paths as rename might alter them (on ordinary tables) - /// and it's not protected internally by other mutexes - static const size_t DATA_PATHS_INDEX = 5; - if (columns_mask[DATA_PATHS_INDEX]) + if (need_table) { - lock = table->tryLockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); - if (!lock) - // Table was dropped while acquiring the lock, skipping table + table = tables_it->table(); + if (!table) + // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) continue; - } + /// The only column that requires us to hold a shared lock is data_paths as rename might alter them (on ordinary tables) + /// and it's not protected internally by other mutexes + static const size_t DATA_PATHS_INDEX = 5; + if (columns_mask[DATA_PATHS_INDEX]) + { + lock = table->tryLockForShare(context->getCurrentQueryId(), + context->getSettingsRef().lock_acquire_timeout); + if (!lock) + // Table was dropped while acquiring the lock, skipping table + continue; + } + } ++rows_count; size_t src_index = 0; @@ -307,6 +330,7 @@ protected: if (columns_mask[src_index++]) { + chassert(table != nullptr); res_columns[res_index++]->insert(table->getName()); } @@ -396,7 +420,9 @@ protected: else src_index += 3; - StorageMetadataPtr metadata_snapshot = table->getInMemoryMetadataPtr(); + StorageMetadataPtr metadata_snapshot; + if (table) + metadata_snapshot = table->getInMemoryMetadataPtr(); ASTPtr expression_ptr; if (columns_mask[src_index++]) @@ -433,7 +459,7 @@ protected: if (columns_mask[src_index++]) { - auto policy = table->getStoragePolicy(); + auto policy = table ? table->getStoragePolicy() : nullptr; if (policy) res_columns[res_index++]->insert(policy->getName()); else @@ -444,7 +470,7 @@ protected: settings.select_sequential_consistency = 0; if (columns_mask[src_index++]) { - auto total_rows = table->totalRows(settings); + auto total_rows = table ? table->totalRows(settings) : std::nullopt; if (total_rows) res_columns[res_index++]->insert(*total_rows); else @@ -489,7 +515,7 @@ protected: if (columns_mask[src_index++]) { - auto lifetime_rows = table->lifetimeRows(); + auto lifetime_rows = table ? table->lifetimeRows() : std::nullopt; if (lifetime_rows) res_columns[res_index++]->insert(*lifetime_rows); else @@ -498,7 +524,7 @@ protected: if (columns_mask[src_index++]) { - auto lifetime_bytes = table->lifetimeBytes(); + auto lifetime_bytes = table ? table->lifetimeBytes() : std::nullopt; if (lifetime_bytes) res_columns[res_index++]->insert(*lifetime_bytes); else @@ -587,23 +613,9 @@ Pipe StorageSystemTables::read( const size_t /*num_streams*/) { storage_snapshot->check(column_names); - - /// Create a mask of what columns are needed in the result. - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample_block = storage_snapshot->metadata->getSampleBlock(); - Block res_block; - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - res_block.insert(sample_block.getByPosition(i)); - } - } + auto [columns_mask, res_block] = getQueriedColumnsMaskAndHeader(sample_block, column_names); ColumnPtr filtered_databases_column = getFilteredDatabases(query_info, context); ColumnPtr filtered_tables_column = getFilteredTables(query_info.query, filtered_databases_column, context); diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 4d7f59b8ccd..bd7c81df107 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -490,7 +490,8 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns) continue; auto & task = list_tasks[list_task_idx]; - context->getProcessListElement()->checkTimeLimit(); + if (auto elem = context->getProcessListElement()) + elem->checkTimeLimit(); Strings nodes = std::move(list_result.names); @@ -525,7 +526,8 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns) auto & get_task = get_tasks[i]; auto & list_task = list_tasks[get_task.list_task_idx]; - context->getProcessListElement()->checkTimeLimit(); + if (auto elem = context->getProcessListElement()) + elem->checkTimeLimit(); // Deduplication String key = list_task.path_part + '/' + get_task.node; diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 61a91685324..074a648d235 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -3,14 +3,23 @@ #include #include #include -#include +#include + +#include "config.h" + +/// Embedded SQL definitions +INCBIN(resource_schemata_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/schemata.sql"); +INCBIN(resource_tables_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/tables.sql"); +INCBIN(resource_views_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/views.sql"); +INCBIN(resource_columns_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/columns.sql"); + namespace DB { /// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt -static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name) +static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name, std::string_view query) { try { @@ -18,15 +27,13 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE); if (database.getEngineName() != "Memory") return; - bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE; String metadata_resource_name = view_name + ".sql"; - auto attach_query = getResource(metadata_resource_name); - if (attach_query.empty()) + if (query.empty()) return; ParserCreateQuery parser; - ASTPtr ast = parseQuery(parser, attach_query.data(), attach_query.data() + attach_query.size(), + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "Attach query from embedded resource " + metadata_resource_name, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); @@ -34,13 +41,18 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d assert(view_name == ast_create.getTable()); ast_create.attach = false; ast_create.setDatabase(database.getDatabaseName()); - if (is_uppercase) - ast_create.setTable(Poco::toUpper(view_name)); StoragePtr view = createTableFromAST(ast_create, database.getDatabaseName(), database.getTableDataPath(ast_create), context, true).second; - database.createTable(context, ast_create.getTable(), view, ast); + ASTPtr ast_upper = ast_create.clone(); + auto & ast_create_upper = ast_upper->as(); + ast_create_upper.setTable(Poco::toUpper(view_name)); + StoragePtr view_upper = createTableFromAST(ast_create_upper, database.getDatabaseName(), + database.getTableDataPath(ast_create_upper), context, true).second; + + database.createTable(context, ast_create_upper.getTable(), view_upper, ast_upper); + } catch (...) { @@ -50,10 +62,10 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database) { - createInformationSchemaView(context, information_schema_database, "schemata"); - createInformationSchemaView(context, information_schema_database, "tables"); - createInformationSchemaView(context, information_schema_database, "views"); - createInformationSchemaView(context, information_schema_database, "columns"); + createInformationSchemaView(context, information_schema_database, "schemata", std::string_view(reinterpret_cast(gresource_schemata_sqlData), gresource_schemata_sqlSize)); + createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast(gresource_tables_sqlData), gresource_tables_sqlSize)); + createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast(gresource_views_sqlData), gresource_views_sqlSize)); + createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast(gresource_columns_sqlData), gresource_columns_sqlSize)); } } diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 84965b3196b..e4e19ce2e06 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -83,6 +83,11 @@ #include #include #include +#include + +#if USE_RDKAFKA +#include +#endif #ifdef OS_LINUX #include @@ -90,7 +95,6 @@ #if USE_ROCKSDB #include -#include #endif @@ -145,12 +149,15 @@ void attachSystemTablesLocal(ContextPtr context, IDatabase & system_database) attach(context, system_database, "backups"); attach(context, system_database, "schema_inference_cache"); attach(context, system_database, "dropped_tables"); + attach(context, system_database, "scheduler"); +#if USE_RDKAFKA + attach(context, system_database, "kafka_consumers"); +#endif #ifdef OS_LINUX attach(context, system_database, "stack_trace"); #endif #if USE_ROCKSDB attach(context, system_database, "rocksdb"); - attach(context, system_database, "merge_tree_metadata_cache"); #endif } diff --git a/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp b/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp new file mode 100644 index 00000000000..c29ccb590ed --- /dev/null +++ b/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp @@ -0,0 +1,24 @@ +#include + +namespace DB +{ + +std::pair, Block> getQueriedColumnsMaskAndHeader(const Block & sample_block, const Names & column_names) +{ + std::vector columns_mask(sample_block.columns()); + Block header; + + NameSet names_set(column_names.begin(), column_names.end()); + for (size_t i = 0; i < columns_mask.size(); ++i) + { + if (names_set.contains(sample_block.getByPosition(i).name)) + { + columns_mask[i] = 1; + header.insert(sample_block.getByPosition(i)); + } + } + + return std::make_pair(columns_mask, header); +} + +} diff --git a/src/Storages/System/getQueriedColumnsMaskAndHeader.h b/src/Storages/System/getQueriedColumnsMaskAndHeader.h new file mode 100644 index 00000000000..0781a92fa60 --- /dev/null +++ b/src/Storages/System/getQueriedColumnsMaskAndHeader.h @@ -0,0 +1,11 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +std::pair, Block> getQueriedColumnsMaskAndHeader(const Block & sample_block, const Names & column_names); + +} diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 907fc0cd22c..dbb424ee957 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -20,6 +20,10 @@ #include #include +#include +#include +#include + #include #include #include @@ -30,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -63,14 +68,31 @@ bool isValidFunction(const ASTPtr & expression, const std::function & is_constant, ASTs & result) { const auto * function = expression->as(); - if (function && (function->name == "and" || function->name == "indexHint")) + + if (function) { - bool ret = true; - for (const auto & child : function->arguments->children) - ret &= extractFunctions(child, is_constant, result); - return ret; + if (function->name == "and" || function->name == "indexHint") + { + bool ret = true; + for (const auto & child : function->arguments->children) + ret &= extractFunctions(child, is_constant, result); + return ret; + } + else if (function->name == "or") + { + bool ret = true; + ASTs or_args; + for (const auto & child : function->arguments->children) + ret &= extractFunctions(child, is_constant, or_args); + /// We can keep condition only if it still OR condition (i.e. we + /// have dependent conditions for columns at both sides) + if (or_args.size() == 2) + result.push_back(makeASTForLogicalOr(std::move(or_args))); + return ret; + } } - else if (isValidFunction(expression, is_constant)) + + if (isValidFunction(expression, is_constant)) { result.push_back(expression->clone()); return true; @@ -80,13 +102,13 @@ bool extractFunctions(const ASTPtr & expression, const std::functiongetNodes()) + { + if (node.type == ActionsDAG::ActionType::COLUMN) + { + const ColumnSet * column_set = checkAndGetColumnConstData(node.column.get()); + if (!column_set) + column_set = checkAndGetColumn(node.column.get()); + + if (column_set) + { + auto future_set = column_set->getData(); + if (!future_set->get()) + { + if (auto * set_from_subquery = typeid_cast(future_set.get())) + { + auto plan = set_from_subquery->build(context); + + if (!plan) + continue; + + auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder)); + pipeline.complete(std::make_shared(Block())); + + CompletedPipelineExecutor executor(pipeline); + executor.execute(); + } + } + } + } + } +} + +void filterBlockWithQuery(ActionsDAGPtr dag, Block & block, ContextPtr context) +{ + auto actions = std::make_shared(dag); + makeSets(actions, context); + Block block_with_filter = block; + actions->execute(block_with_filter); + + /// Filter the block. + String filter_column_name = dag->getOutputs().at(0)->result_name; + ColumnPtr filter_column = block_with_filter.getByName(filter_column_name).column->convertToFullColumnIfConst(); + + ConstantFilterDescription constant_filter(*filter_column); + + if (constant_filter.always_true) + { + return; + } + + if (constant_filter.always_false) + { + block = block.cloneEmpty(); + return; + } + + FilterDescription filter(*filter_column); + + for (size_t i = 0; i < block.columns(); ++i) + { + ColumnPtr & column = block.safeGetByPosition(i).column; + column = column->filter(*filter.data, -1); + } +} + void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr context, ASTPtr expression_ast) { if (block.rows() == 0) @@ -191,33 +281,7 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr contex ExpressionAnalyzer analyzer(expression_ast, syntax_result, context); ExpressionActionsPtr actions = analyzer.getActions(false /* add alises */, true /* project result */, CompileExpressions::yes); - for (const auto & node : actions->getNodes()) - { - if (node.type == ActionsDAG::ActionType::COLUMN) - { - const ColumnSet * column_set = checkAndGetColumnConstData(node.column.get()); - if (!column_set) - column_set = checkAndGetColumn(node.column.get()); - - if (column_set) - { - auto future_set = column_set->getData(); - if (!future_set->get()) - { - if (auto * set_from_subquery = typeid_cast(future_set.get())) - { - auto plan = set_from_subquery->build(context); - auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); - auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder)); - pipeline.complete(std::make_shared(Block())); - - CompletedPipelineExecutor executor(pipeline); - executor.execute(); - } - } - } - } - } + makeSets(actions, context); Block block_with_filter = block; actions->execute(block_with_filter); @@ -248,6 +312,101 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr contex } } +NamesAndTypesList getPathAndFileVirtualsForStorage(NamesAndTypesList storage_columns) +{ + auto default_virtuals = NamesAndTypesList{ + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; + + default_virtuals.sort(); + storage_columns.sort(); + + NamesAndTypesList result_virtuals; + std::set_difference( + default_virtuals.begin(), default_virtuals.end(), storage_columns.begin(), storage_columns.end(), + std::back_inserter(result_virtuals), + [](const NameAndTypePair & lhs, const NameAndTypePair & rhs){ return lhs.name < rhs.name; }); + + return result_virtuals; +} + +static void addPathAndFileToVirtualColumns(Block & block, const String & path, size_t idx) +{ + if (block.has("_path")) + block.getByName("_path").column->assumeMutableRef().insert(path); + + if (block.has("_file")) + { + auto pos = path.find_last_of('/'); + String file; + if (pos != std::string::npos) + file = path.substr(pos + 1); + else + file = path; + + block.getByName("_file").column->assumeMutableRef().insert(file); + } + + block.getByName("_idx").column->assumeMutableRef().insert(idx); +} + +ASTPtr createPathAndFileFilterAst(const ASTPtr & query, const NamesAndTypesList & virtual_columns, const String & path_example, const ContextPtr & context) +{ + if (!query || virtual_columns.empty()) + return {}; + + Block block; + for (const auto & column : virtual_columns) + block.insert({column.type->createColumn(), column.type, column.name}); + /// Create a block with one row to construct filter + /// Append "idx" column as the filter result + block.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); + addPathAndFileToVirtualColumns(block, path_example, 0); + ASTPtr filter_ast; + prepareFilterBlockWithQuery(query, context, block, filter_ast); + return filter_ast; +} + +ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context, ASTPtr filter_ast) +{ + Block block; + for (const auto & column : virtual_columns) + block.insert({column.type->createColumn(), column.type, column.name}); + block.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); + + for (size_t i = 0; i != paths.size(); ++i) + addPathAndFileToVirtualColumns(block, paths[i], i); + + filterBlockWithQuery(query, block, context, filter_ast); + + return block.getByName("_idx").column; +} + +void addRequestedPathAndFileVirtualsToChunk( + Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, const String * filename) +{ + for (const auto & virtual_column : requested_virtual_columns) + { + if (virtual_column.name == "_path") + { + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), path)); + } + else if (virtual_column.name == "_file") + { + if (filename) + { + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), *filename)); + } + else + { + size_t last_slash_pos = path.find_last_of('/'); + auto filename_from_path = path.substr(last_slash_pos + 1); + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), filename_from_path)); + } + } + } +} + } } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 15783f6e79f..a21f2b05552 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -33,6 +33,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block /// Only elements of the outer conjunction are considered, depending only on the columns present in the block. /// If `expression_ast` is passed, use it to filter block. void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr context, ASTPtr expression_ast = {}); +void filterBlockWithQuery(ActionsDAGPtr dag, Block & block, ContextPtr context); /// Extract from the input stream a set of `name` column values template @@ -46,6 +47,29 @@ auto extractSingleValueFromBlock(const Block & block, const String & name) return res; } +NamesAndTypesList getPathAndFileVirtualsForStorage(NamesAndTypesList storage_columns); + +ASTPtr createPathAndFileFilterAst(const ASTPtr & query, const NamesAndTypesList & virtual_columns, const String & path_example, const ContextPtr & context); + +ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context, ASTPtr filter_ast); + +template +void filterByPathOrFile(std::vector & sources, const std::vector & paths, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context, ASTPtr filter_ast) +{ + auto indexes_column = getFilterByPathAndFileIndexes(paths, query, virtual_columns, context, filter_ast); + const auto & indexes = typeid_cast(*indexes_column).getData(); + if (indexes.size() == sources.size()) + return; + + std::vector filtered_sources; + filtered_sources.reserve(indexes.size()); + for (auto index : indexes) + filtered_sources.emplace_back(std::move(sources[index])); + sources = std::move(filtered_sources); +} + +void addRequestedPathAndFileVirtualsToChunk( + Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, const String * filename = nullptr); } } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 0f506040cd9..e3fcd6249d1 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1571,7 +1571,7 @@ void StorageWindowView::writeIntoWindowView( }); auto executor = builder.execute(); - executor->execute(builder.getNumThreads()); + executor->execute(builder.getNumThreads(), local_context->getSettingsRef().use_concurrency_control); } void StorageWindowView::startup() @@ -1599,7 +1599,7 @@ void StorageWindowView::shutdown() DatabaseCatalog::instance().removeViewDependency(select_table_id, table_id); } -void StorageWindowView::checkTableCanBeDropped() const +void StorageWindowView::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const { auto table_id = getStorageID(); auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 847a4945d0e..231616ff820 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -119,7 +119,7 @@ public: bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } - void checkTableCanBeDropped() const override; + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override; void dropInnerTableIfAny(bool sync, ContextPtr context) override; diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 1ee7d747fcc..74f2709f458 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -130,7 +130,7 @@ public: return true; } - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { auto * function_node = node->as(); auto * join_node = node->as(); @@ -232,8 +232,8 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, ContextMutablePtr & mutable_context, size_t subquery_depth) { - auto subquery_hash = subquery_node->getTreeHash(); - String temporary_table_name = fmt::format("_data_{}_{}", subquery_hash.first, subquery_hash.second); + const auto subquery_hash = subquery_node->getTreeHash(); + const auto temporary_table_name = fmt::format("_data_{}", toString(subquery_hash)); const auto & external_tables = mutable_context->getExternalTables(); auto external_table_it = external_tables.find(temporary_table_name); diff --git a/src/Storages/fuzzers/CMakeLists.txt b/src/Storages/fuzzers/CMakeLists.txt index 98f490c5984..719b9b77cd9 100644 --- a/src/Storages/fuzzers/CMakeLists.txt +++ b/src/Storages/fuzzers/CMakeLists.txt @@ -1,7 +1,7 @@ clickhouse_add_executable (mergetree_checksum_fuzzer mergetree_checksum_fuzzer.cpp) # Look at comment around fuzz_compression target declaration -target_link_libraries (mergetree_checksum_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) +target_link_libraries (mergetree_checksum_fuzzer PRIVATE dbms) clickhouse_add_executable (columns_description_fuzzer columns_description_fuzzer.cpp) -target_link_libraries (columns_description_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) +target_link_libraries (columns_description_fuzzer PRIVATE dbms) diff --git a/src/Storages/fuzzers/columns_description_fuzzer.cpp b/src/Storages/fuzzers/columns_description_fuzzer.cpp index 44fd667ff1c..b703a1e7051 100644 --- a/src/Storages/fuzzers/columns_description_fuzzer.cpp +++ b/src/Storages/fuzzers/columns_description_fuzzer.cpp @@ -2,14 +2,16 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) -try { - using namespace DB; - ColumnsDescription columns = ColumnsDescription::parse(std::string(reinterpret_cast(data), size)); - std::cerr << columns.toString() << "\n"; + try + { + using namespace DB; + ColumnsDescription columns = ColumnsDescription::parse(std::string(reinterpret_cast(data), size)); + std::cerr << columns.toString() << "\n"; + } + catch (...) + { + } + return 0; } -catch (...) -{ - return 1; -} diff --git a/src/Storages/fuzzers/mergetree_checksum_fuzzer.cpp b/src/Storages/fuzzers/mergetree_checksum_fuzzer.cpp index 9a5a68f09cc..e046a73b1f9 100644 --- a/src/Storages/fuzzers/mergetree_checksum_fuzzer.cpp +++ b/src/Storages/fuzzers/mergetree_checksum_fuzzer.cpp @@ -1,4 +1,3 @@ -#include #include #include @@ -6,19 +5,20 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) -try { - DB::ReadBufferFromMemory in(data, size); - DB::MergeTreeDataPartChecksums res; - DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO); + try + { + DB::ReadBufferFromMemory in(data, size); + DB::MergeTreeDataPartChecksums res; + DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO); - if (!res.read(in)) - return 1; - res.write(out); + if (!res.read(in)) + return 0; + res.write(out); + } + catch (...) + { + } return 0; } -catch (...) -{ - return 1; -} diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index ec8f27feeda..ebd02f424fa 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -38,7 +38,7 @@ ColumnsDescription getStructureOfRemoteTableInShard( if (shard_info.isLocal()) { TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_func_ptr, context); - return table_function_ptr->getActualTableStructure(context); + return table_function_ptr->getActualTableStructure(context, /*is_insert_query*/ true); } auto table_func_name = queryToString(table_func_ptr); @@ -57,7 +57,7 @@ ColumnsDescription getStructureOfRemoteTableInShard( } ColumnsDescription res; - auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), table_id); + auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), table_id); /// Ignore limit for result number of rows (that could be set during handling CSE/CTE), /// since this is a service query and should not lead to query failure. @@ -176,7 +176,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( const auto & shards_info = cluster.getShardsInfo(); auto query = "DESC TABLE " + remote_table_id.getFullTableName(); - auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), remote_table_id); + auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), remote_table_id); new_context->setSetting("describe_extend_object_types", true); /// Expect only needed columns from the result of DESC TABLE. diff --git a/src/Storages/getVirtualsForStorage.cpp b/src/Storages/getVirtualsForStorage.cpp deleted file mode 100644 index 93b2aa97856..00000000000 --- a/src/Storages/getVirtualsForStorage.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include "getVirtualsForStorage.h" - -namespace DB -{ - -NamesAndTypesList getVirtualsForStorage(const NamesAndTypesList & storage_columns_, const NamesAndTypesList & default_virtuals_) -{ - auto default_virtuals = default_virtuals_; - auto storage_columns = storage_columns_; - default_virtuals.sort(); - storage_columns.sort(); - - NamesAndTypesList result_virtuals; - std::set_difference( - default_virtuals.begin(), default_virtuals.end(), storage_columns.begin(), storage_columns.end(), - std::back_inserter(result_virtuals), - [](const NameAndTypePair & lhs, const NameAndTypePair & rhs){ return lhs.name < rhs.name; }); - - return result_virtuals; -} - -} diff --git a/src/Storages/getVirtualsForStorage.h b/src/Storages/getVirtualsForStorage.h deleted file mode 100644 index 19e13425959..00000000000 --- a/src/Storages/getVirtualsForStorage.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -NamesAndTypesList getVirtualsForStorage(const NamesAndTypesList & storage_columns_, const NamesAndTypesList & default_virtuals_); - -} diff --git a/src/Storages/prepareReadingFromFormat.cpp b/src/Storages/prepareReadingFromFormat.cpp new file mode 100644 index 00000000000..6be4213ec6b --- /dev/null +++ b/src/Storages/prepareReadingFromFormat.cpp @@ -0,0 +1,78 @@ +#include +#include + +namespace DB +{ + +ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, const NamesAndTypesList & virtuals) +{ + ReadFromFormatInfo info; + /// Collect requested virtual columns and remove them from requested columns. + Strings columns_to_read; + for (const auto & column_name : requested_columns) + { + bool is_virtual = false; + for (const auto & virtual_column : virtuals) + { + if (column_name == virtual_column.name) + { + info.requested_virtual_columns.push_back(virtual_column); + is_virtual = true; + break; + } + } + + if (!is_virtual) + columns_to_read.push_back(column_name); + } + + /// Create header for Source that will contain all requested columns including virtual columns at the end + /// (because they will be added to the chunk after reading regular columns). + info.source_header = storage_snapshot->getSampleBlockForColumns(columns_to_read); + for (const auto & requested_virtual_column : info.requested_virtual_columns) + info.source_header.insert({requested_virtual_column.type->createColumn(), requested_virtual_column.type, requested_virtual_column.name}); + + /// Set requested columns that should be read from data. + info.requested_columns = storage_snapshot->getColumnsByNames(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns(), columns_to_read); + + if (supports_subset_of_columns) + { + /// If only virtual columns were requested, just read the smallest column. + if (columns_to_read.empty()) + { + columns_to_read.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); + } + /// We need to replace all subcolumns with their nested columns (e.g `a.b`, `a.b.c`, `x.y` -> `a`, `x`), + /// because most formats cannot extract subcolumns on their own. + /// All requested subcolumns will be extracted after reading. + else + { + std::unordered_set columns_to_read_set; + /// Save original order of columns. + std::vector new_columns_to_read; + for (const auto & column_to_read : info.requested_columns) + { + auto name = column_to_read.getNameInStorage(); + if (!columns_to_read_set.contains(name)) + { + columns_to_read_set.insert(name); + new_columns_to_read.push_back(name); + } + } + columns_to_read = std::move(new_columns_to_read); + } + info.columns_description = storage_snapshot->getDescriptionForColumns(columns_to_read); + } + /// If format doesn't support reading subset of columns, read all columns. + /// Requested columns/subcolumns will be extracted after reading. + else + { + info.columns_description = storage_snapshot->metadata->getColumns(); + } + + /// Create header for InputFormat with columns that will be read from the data. + info.format_header = storage_snapshot->getSampleBlockForColumns(info.columns_description.getNamesOfPhysical()); + return info; +} + +} diff --git a/src/Storages/prepareReadingFromFormat.h b/src/Storages/prepareReadingFromFormat.h new file mode 100644 index 00000000000..c5f3959a550 --- /dev/null +++ b/src/Storages/prepareReadingFromFormat.h @@ -0,0 +1,26 @@ +#pragma once +#include +#include + +namespace DB +{ + struct ReadFromFormatInfo + { + /// Header that will return Source from storage. + /// It contains all requested columns including virtual columns; + Block source_header; + /// Header that will be passed to IInputFormat to read data from file. + /// It can contain more columns than were requested if format doesn't support + /// reading subset of columns. + Block format_header; + /// Description of columns for format_header. Used for inserting defaults. + ColumnsDescription columns_description; + /// The list of requested columns without virtual columns. + NamesAndTypesList requested_columns; + /// The list of requested virtual columns. + NamesAndTypesList requested_virtual_columns; + }; + + /// Get all needed information for reading from data in some input format. + ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, const NamesAndTypesList & virtuals); +} diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index a4f1d963704..e5b1c8e8744 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -34,6 +34,8 @@ void registerStorageS3(StorageFactory & factory); void registerStorageCOS(StorageFactory & factory); void registerStorageOSS(StorageFactory & factory); void registerStorageHudi(StorageFactory & factory); +void registerStorageS3Queue(StorageFactory & factory); + #if USE_PARQUET void registerStorageDeltaLake(StorageFactory & factory); #endif @@ -133,6 +135,7 @@ void registerStorages() registerStorageCOS(factory); registerStorageOSS(factory); registerStorageHudi(factory); + registerStorageS3Queue(factory); #if USE_PARQUET registerStorageDeltaLake(factory); diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 5c1442ece11..749a154c19d 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -127,7 +127,8 @@ static void checkOld( std::string transformed_query = transformQueryForExternalDatabase( query_info, query_info.syntax_analyzer_result->requiredSourceColumns(), - state.getColumns(0), IdentifierQuotingStyle::DoubleQuotes, "test", "table", state.context); + state.getColumns(0), IdentifierQuotingStyle::DoubleQuotes, + LiteralEscapingStyle::Regular, "test", "table", state.context); EXPECT_EQ(transformed_query, expected) << query; } @@ -180,7 +181,8 @@ static void checkNewAnalyzer( query_info.table_expression = findTableExpression(query_node->getJoinTree(), "table"); std::string transformed_query = transformQueryForExternalDatabase( - query_info, column_names, state.getColumns(0), IdentifierQuotingStyle::DoubleQuotes, "test", "table", state.context); + query_info, column_names, state.getColumns(0), IdentifierQuotingStyle::DoubleQuotes, + LiteralEscapingStyle::Regular, "test", "table", state.context); EXPECT_EQ(transformed_query, expected) << query; } diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 375510e62bf..84a696a1e9c 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -258,6 +258,7 @@ String transformQueryForExternalDatabaseImpl( Names used_columns, const NamesAndTypesList & available_columns, IdentifierQuotingStyle identifier_quoting_style, + LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, ContextPtr context) @@ -337,7 +338,8 @@ String transformQueryForExternalDatabaseImpl( IAST::FormatSettings settings( out, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ identifier_quoting_style != IdentifierQuotingStyle::None, - /*identifier_quoting_style*/ identifier_quoting_style); + /*identifier_quoting_style*/ identifier_quoting_style, /*show_secrets_*/ true, + /*literal_escaping_style*/ literal_escaping_style); select->format(settings); @@ -351,6 +353,7 @@ String transformQueryForExternalDatabase( const Names & column_names, const NamesAndTypesList & available_columns, IdentifierQuotingStyle identifier_quoting_style, + LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, ContextPtr context) @@ -375,6 +378,7 @@ String transformQueryForExternalDatabase( column_names, available_columns, identifier_quoting_style, + literal_escaping_style, database, table, context); @@ -386,6 +390,7 @@ String transformQueryForExternalDatabase( query_info.syntax_analyzer_result->requiredSourceColumns(), available_columns, identifier_quoting_style, + literal_escaping_style, database, table, context); diff --git a/src/Storages/transformQueryForExternalDatabase.h b/src/Storages/transformQueryForExternalDatabase.h index 0f2b0a5822f..fb6af21907e 100644 --- a/src/Storages/transformQueryForExternalDatabase.h +++ b/src/Storages/transformQueryForExternalDatabase.h @@ -31,6 +31,7 @@ String transformQueryForExternalDatabase( const Names & column_names, const NamesAndTypesList & available_columns, IdentifierQuotingStyle identifier_quoting_style, + LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, ContextPtr context); diff --git a/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp b/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp index 513ec510d23..5e0bfdd5f2a 100644 --- a/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp +++ b/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include @@ -61,11 +63,15 @@ ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tre visitor.visit(new_tree); const auto * query_node = new_tree->as(); - const auto & query_node_ast = query_node->toAST({ .add_cast_for_constants = false, .fully_qualified_identifiers = false }); + auto query_node_ast = query_node->toAST({ .add_cast_for_constants = false, .fully_qualified_identifiers = false }); + const IAST * ast = query_node_ast.get(); - const auto * union_ast = query_node_ast->as(); + if (const auto * ast_subquery = ast->as()) + ast = ast_subquery->children.at(0).get(); + + const auto * union_ast = ast->as(); if (!union_ast) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "QueryNode AST is not a ASTSelectWithUnionQuery"); + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "QueryNode AST ({}) is not a ASTSelectWithUnionQuery", query_node_ast->getID()); if (union_ast->list_of_selects->children.size() != 1) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "QueryNode AST is not a single ASTSelectQuery, got {}", union_ast->list_of_selects->children.size()); diff --git a/src/TableFunctions/CMakeLists.txt b/src/TableFunctions/CMakeLists.txt index c9e5c66fe4a..b02a0e79f9c 100644 --- a/src/TableFunctions/CMakeLists.txt +++ b/src/TableFunctions/CMakeLists.txt @@ -6,16 +6,18 @@ if (TARGET ch_contrib::hivemetastore) add_headers_and_sources(clickhouse_table_functions Hive) endif () -list(REMOVE_ITEM clickhouse_table_functions_sources +extract_into_parent_list(clickhouse_table_functions_sources dbms_sources ITableFunction.cpp TableFunctionView.cpp - TableFunctionFactory.cpp) -list(REMOVE_ITEM clickhouse_table_functions_headers + TableFunctionFactory.cpp +) +extract_into_parent_list(clickhouse_table_functions_headers dbms_headers ITableFunction.h TableFunctionView.h - TableFunctionFactory.h) + TableFunctionFactory.h +) -add_library(clickhouse_table_functions ${clickhouse_table_functions_sources}) +add_library(clickhouse_table_functions ${clickhouse_table_functions_headers} ${clickhouse_table_functions_sources}) target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_parsers clickhouse_storages_system dbms) diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp index fb7635181dc..ebebee13092 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.cpp +++ b/src/TableFunctions/Hive/TableFunctionHive.cpp @@ -49,13 +49,14 @@ namespace DB actual_columns = parseColumnsListFromString(table_structure, context_); } - ColumnsDescription TableFunctionHive::getActualTableStructure(ContextPtr /*context_*/) const { return actual_columns; } + ColumnsDescription TableFunctionHive::getActualTableStructure(ContextPtr /*context_*/, bool /*is_insert_query*/) const { return actual_columns; } StoragePtr TableFunctionHive::executeImpl( const ASTPtr & /*ast_function_*/, ContextPtr context_, const std::string & table_name_, - ColumnsDescription /*cached_columns_*/) const + ColumnsDescription /*cached_columns_*/, + bool /*is_insert_query*/) const { const Settings & settings = context_->getSettings(); ParserExpression partition_by_parser; diff --git a/src/TableFunctions/Hive/TableFunctionHive.h b/src/TableFunctions/Hive/TableFunctionHive.h index ec09a87a876..5e48be46ce1 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.h +++ b/src/TableFunctions/Hive/TableFunctionHive.h @@ -17,10 +17,10 @@ public: bool hasStaticStructure() const override { return true; } StoragePtr executeImpl( - const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return storage_type_name; } - ColumnsDescription getActualTableStructure(ContextPtr) const override; + ColumnsDescription getActualTableStructure(ContextPtr, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function_, ContextPtr context_) override; private: diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp index df19e0ebad3..137e1dc27fe 100644 --- a/src/TableFunctions/ITableFunction.cpp +++ b/src/TableFunctions/ITableFunction.cpp @@ -34,15 +34,15 @@ StoragePtr ITableFunction::execute(const ASTPtr & ast_function, ContextPtr conte auto context_to_use = use_global_context ? context->getGlobalContext() : context; if (cached_columns.empty()) - return executeImpl(ast_function, context, table_name, std::move(cached_columns)); + return executeImpl(ast_function, context, table_name, std::move(cached_columns), is_insert_query); - if (hasStaticStructure() && cached_columns == getActualTableStructure(context)) - return executeImpl(ast_function, context_to_use, table_name, std::move(cached_columns)); + if (hasStaticStructure() && cached_columns == getActualTableStructure(context,is_insert_query)) + return executeImpl(ast_function, context_to_use, table_name, std::move(cached_columns), is_insert_query); auto this_table_function = shared_from_this(); auto get_storage = [=]() -> StoragePtr { - return this_table_function->executeImpl(ast_function, context_to_use, table_name, cached_columns); + return this_table_function->executeImpl(ast_function, context_to_use, table_name, cached_columns, is_insert_query); }; /// It will request actual table structure and create underlying storage lazily diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index fe71005cb9c..1946d8e8905 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -58,7 +58,7 @@ public: virtual void parseArguments(const ASTPtr & /*ast_function*/, ContextPtr /*context*/) {} /// Returns actual table structure probably requested from remote server, may fail - virtual ColumnsDescription getActualTableStructure(ContextPtr /*context*/) const = 0; + virtual ColumnsDescription getActualTableStructure(ContextPtr /*context*/, bool is_insert_query) const = 0; /// Check if table function needs a structure hint from SELECT query in case of /// INSERT INTO FUNCTION ... SELECT ... and INSERT INTO ... SELECT ... FROM table_function(...) @@ -76,7 +76,7 @@ public: /// because we cannot determine which column from table correspond to this virtual column. virtual std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const { return {}; } - virtual bool supportsReadingSubsetOfColumns() { return true; } + virtual bool supportsReadingSubsetOfColumns(const ContextPtr &) { return true; } /// Create storage according to the query. StoragePtr @@ -89,7 +89,7 @@ protected: private: virtual StoragePtr executeImpl( - const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const = 0; + const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const = 0; virtual const char * getStorageTypeName() const = 0; }; diff --git a/src/TableFunctions/ITableFunctionCluster.h b/src/TableFunctions/ITableFunctionCluster.h index a8329684ee6..7e81d6d21b7 100644 --- a/src/TableFunctions/ITableFunctionCluster.h +++ b/src/TableFunctions/ITableFunctionCluster.h @@ -17,7 +17,7 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_GET; + extern const int CLUSTER_DOESNT_EXIST; extern const int LOGICAL_ERROR; } @@ -59,7 +59,7 @@ protected: cluster_name = checkAndGetLiteralArgument(args[0], "cluster_name"); if (!context->tryGetCluster(cluster_name)) - throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name); + throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "Requested cluster '{}' not found", cluster_name); /// Just cut the first arg (cluster_name) and try to parse other table function arguments as is args.erase(args.begin()); diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index f87838cfb56..6d50e9138ff 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -26,7 +26,8 @@ protected: const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, - ColumnsDescription /*cached_columns*/) const override + ColumnsDescription /*cached_columns*/, + bool /*is_insert_query*/) const override { ColumnsDescription columns; if (TableFunction::configuration.structure != "auto") @@ -42,7 +43,7 @@ protected: const char * getStorageTypeName() const override { return Storage::name; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override + ColumnsDescription getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const override { if (TableFunction::configuration.structure == "auto") { diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index a60ab70d570..b88af855309 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -32,9 +32,9 @@ String ITableFunctionFileLike::getFormatFromFirstArgument() return FormatFactory::instance().getFormatFromFileName(filename, true); } -bool ITableFunctionFileLike::supportsReadingSubsetOfColumns() +bool ITableFunctionFileLike::supportsReadingSubsetOfColumns(const ContextPtr & context) { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format, context); } void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, ContextPtr context) @@ -110,7 +110,7 @@ void ITableFunctionFileLike::addColumnsStructureToArguments(ASTs & args, const S } } -StoragePtr ITableFunctionFileLike::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr ITableFunctionFileLike::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { ColumnsDescription columns; if (structure != "auto") diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index 8300cc27591..5fe86587797 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -27,7 +27,7 @@ public: void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } - bool supportsReadingSubsetOfColumns() override; + bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; static size_t getMaxNumberOfArguments() { return 4; } @@ -42,13 +42,14 @@ protected: virtual String getFormatFromFirstArgument(); String filename; + String path_to_archive; String format = "auto"; String structure = "auto"; String compression_method = "auto"; ColumnsDescription structure_hint; private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; virtual StoragePtr getStorage( const String & source, const String & format, const ColumnsDescription & columns, ContextPtr global_context, diff --git a/src/TableFunctions/ITableFunctionXDBC.cpp b/src/TableFunctions/ITableFunctionXDBC.cpp index 1fb0f392e33..59702259b35 100644 --- a/src/TableFunctions/ITableFunctionXDBC.cpp +++ b/src/TableFunctions/ITableFunctionXDBC.cpp @@ -61,7 +61,7 @@ void ITableFunctionXDBC::startBridgeIfNot(ContextPtr context) const } } -ColumnsDescription ITableFunctionXDBC::getActualTableStructure(ContextPtr context) const +ColumnsDescription ITableFunctionXDBC::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { startBridgeIfNot(context); @@ -92,10 +92,10 @@ ColumnsDescription ITableFunctionXDBC::getActualTableStructure(ContextPtr contex return ColumnsDescription{columns}; } -StoragePtr ITableFunctionXDBC::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr ITableFunctionXDBC::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { startBridgeIfNot(context); - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); auto result = std::make_shared( StorageID(getDatabaseName(), table_name), schema_name, remote_table_name, columns, ConstraintsDescription{}, String{}, context, helper); result->startup(); diff --git a/src/TableFunctions/ITableFunctionXDBC.h b/src/TableFunctions/ITableFunctionXDBC.h index 984a6a1957f..da0fa83033b 100644 --- a/src/TableFunctions/ITableFunctionXDBC.h +++ b/src/TableFunctions/ITableFunctionXDBC.h @@ -16,7 +16,7 @@ namespace DB class ITableFunctionXDBC : public ITableFunction { private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; /* A factory method to create bridge helper, that will assist in remote interaction */ virtual BridgeHelperPtr createBridgeHelper(ContextPtr context, @@ -24,7 +24,7 @@ private: const std::string & connection_string_, bool use_connection_pooling_) const = 0; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index d2a96173491..c29bea2c5c7 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -39,15 +39,13 @@ namespace bool isConnectionString(const std::string & candidate) { - return candidate.starts_with("DefaultEndpointsProtocol"); + return !candidate.starts_with("http"); } } -StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file) +void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) { - StorageAzureBlob::Configuration configuration; - /// Supported signatures: /// /// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) @@ -59,87 +57,80 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp configuration.blobs_paths = {configuration.blob_path}; - if (configuration.format == "auto" && get_format_from_file) + if (configuration.format == "auto") configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); - - return configuration; } - - if (engine_args.size() < 3 || engine_args.size() > 8) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage Azure requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - std::unordered_map engine_args_to_idx; - - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - - auto is_format_arg = [] (const std::string & s) -> bool + else { - return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); - }; + if (engine_args.size() < 3 || engine_args.size() > 8) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Azure requires 3 to 7 arguments: " + "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); - if (engine_args.size() == 4) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name/structure"); - if (is_format_arg(fourth_arg)) + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); + + std::unordered_map engine_args_to_idx; + + configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + configuration.is_connection_string = isConnectionString(configuration.connection_url); + + configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); + configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); + + auto is_format_arg + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; + + if (engine_args.size() == 4) { - configuration.format = fourth_arg; + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name/structure"); + if (is_format_arg(fourth_arg)) + { + configuration.format = fourth_arg; + } + else + { + configuration.structure = fourth_arg; + } } - else + else if (engine_args.size() == 5) { - configuration.structure = fourth_arg; + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + configuration.format = fourth_arg; + configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + } + else + { + configuration.account_name = fourth_arg; + configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + } } - } - else if (engine_args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) + else if (engine_args.size() == 6) { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + configuration.format = fourth_arg; + configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + configuration.structure = checkAndGetLiteralArgument(engine_args[5], "structure"); + } + else + { + configuration.account_name = fourth_arg; + configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name/structure"); + if (is_format_arg(sixth_arg)) + configuration.format = sixth_arg; + else + configuration.structure = sixth_arg; + } } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - } - } - else if (engine_args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - configuration.structure = checkAndGetLiteralArgument(engine_args[5], "structure"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - } - } - else if (engine_args.size() == 7) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format, compression and structure must be last arguments"); - } - else + else if (engine_args.size() == 7) { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); configuration.account_name = fourth_arg; configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); @@ -148,17 +139,9 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp configuration.format = sixth_arg; configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); } - } - else if (engine_args.size() == 8) - { - - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else + else if (engine_args.size() == 8) { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); configuration.account_name = fourth_arg; configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); @@ -168,14 +151,12 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); configuration.structure = checkAndGetLiteralArgument(engine_args[7], "structure"); } + + configuration.blobs_paths = {configuration.blob_path}; + + if (configuration.format == "auto") + configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); } - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto" && get_format_from_file) - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); - - return configuration; } void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context) @@ -190,32 +171,112 @@ void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function, auto & args = args_func.at(0)->children; - configuration = parseArgumentsImpl(args, context); + parseArgumentsImpl(args, context); } -ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(ContextPtr context) const +void TableFunctionAzureBlobStorage::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context) +{ + if (tryGetNamedCollectionWithOverrides(args, context)) + { + /// In case of named collection, just add key-value pair "structure='...'" + /// at the end of arguments to override existed structure. + ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure)}; + auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); + args.push_back(equal_func); + } + else + { + if (args.size() < 3 || args.size() > 8) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Azure requires 3 to 7 arguments: " + "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); + + auto structure_literal = std::make_shared(structure); + + auto is_format_arg + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; + + + if (args.size() == 3) + { + /// Add format=auto & compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + else if (args.size() == 4) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name/structure"); + if (is_format_arg(fourth_arg)) + { + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + else + { + args.back() = structure_literal; + } + } + else if (args.size() == 5) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); + if (!is_format_arg(fourth_arg)) + { + /// Add format=auto & compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(std::make_shared("auto")); + } + args.push_back(structure_literal); + } + else if (args.size() == 6) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); + if (!is_format_arg(fourth_arg)) + { + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + else + { + args.back() = structure_literal; + } + } + else if (args.size() == 7) + { + args.push_back(structure_literal); + } + else if (args.size() == 8) + { + args.back() = structure_literal; + } + } +} + +ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(ContextPtr context, bool is_insert_query) const { if (configuration.structure == "auto") { context->checkAccess(getSourceAccessType()); - auto client = StorageAzureBlob::createClient(configuration); + auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); auto settings = StorageAzureBlob::createSettings(context); auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings)); - return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context); + return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context, false); } return parseColumnsListFromString(configuration.structure, context); } -bool TableFunctionAzureBlobStorage::supportsReadingSubsetOfColumns() +bool TableFunctionAzureBlobStorage::supportsReadingSubsetOfColumns(const ContextPtr & context) { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context); } -StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto client = StorageAzureBlob::createClient(configuration); + auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); auto settings = StorageAzureBlob::createSettings(context); ColumnsDescription columns; @@ -234,6 +295,7 @@ StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_funct String{}, /// No format_settings for table function Azure std::nullopt, + /* distributed_processing */ false, nullptr); storage->startup(); diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.h b/src/TableFunctions/TableFunctionAzureBlobStorage.h index 0ac3f9771c7..e1759740f7f 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.h +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.h @@ -13,13 +13,23 @@ namespace DB class Context; -/* AzureBlob(source, [access_key_id, secret_access_key,] [format, structure, compression]) - creates a temporary storage for a file in AzureBlob. +/* AzureBlob(source, [access_key_id, secret_access_key,] [format, compression, structure]) - creates a temporary storage for a file in AzureBlob. */ class TableFunctionAzureBlobStorage : public ITableFunction { public: static constexpr auto name = "azureBlobStorage"; - static constexpr auto signature = "- connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]\n"; + + static constexpr auto signature = " - connection_string, container_name, blobpath\n" + " - connection_string, container_name, blobpath, structure \n" + " - connection_string, container_name, blobpath, format \n" + " - connection_string, container_name, blobpath, format, compression \n" + " - connection_string, container_name, blobpath, format, compression, structure \n" + " - storage_account_url, container_name, blobpath, account_name, account_key\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n"; static size_t getMaxNumberOfArguments() { return 8; } @@ -39,14 +49,16 @@ public: void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } - bool supportsReadingSubsetOfColumns() override; + bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override { return {"_path", "_file"}; } - static StorageAzureBlob::Configuration parseArgumentsImpl(ASTs & args, const ContextPtr & context, bool get_format_from_file = true); + virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); + + static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context); protected: @@ -54,11 +66,12 @@ protected: const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, - ColumnsDescription cached_columns) const override; + ColumnsDescription cached_columns, + bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Azure"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; mutable StorageAzureBlob::Configuration configuration; diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp new file mode 100644 index 00000000000..eee585967c2 --- /dev/null +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp @@ -0,0 +1,85 @@ +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include + +#include "registerTableFunctions.h" + +#include + + +namespace DB +{ + +StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( + const ASTPtr & /*function*/, ContextPtr context, + const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const +{ + StoragePtr storage; + ColumnsDescription columns; + bool structure_argument_was_provided = configuration.structure != "auto"; + + if (structure_argument_was_provided) + { + columns = parseColumnsListFromString(configuration.structure, context); + } + else if (!structure_hint.empty()) + { + columns = structure_hint; + } + + auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); + auto settings = StorageAzureBlob::createSettings(context); + + if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) + { + /// On worker node this filename won't contains globs + storage = std::make_shared( + configuration, + std::make_unique(table_name, std::move(client), std::move(settings)), + context, + StorageID(getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + /* comment */String{}, + /* format_settings */std::nullopt, /// No format_settings + /* distributed_processing */ true, + /*partition_by_=*/nullptr); + } + else + { + storage = std::make_shared( + cluster_name, + configuration, + std::make_unique(table_name, std::move(client), std::move(settings)), + StorageID(getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + context, + structure_argument_was_provided); + } + + storage->startup(); + + return storage; +} + + +void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory) +{ + factory.registerFunction( + {.documentation + = {.description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)", + .examples{{"azureBlobStorageCluster", "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}}, + .allow_readonly = false} + ); +} + + +} + +#endif diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h new file mode 100644 index 00000000000..58f79328f63 --- /dev/null +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h @@ -0,0 +1,55 @@ +#pragma once + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include + + +namespace DB +{ + +class Context; + +/** + * azureBlobStorageCluster(cluster_name, source, [access_key_id, secret_access_key,] format, compression_method, structure) + * A table function, which allows to process many files from Azure Blob Storage on a specific cluster + * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks + * in Azure Blob Storage file path and dispatch each file dynamically. + * On worker node it asks initiator about next task to process, processes it. + * This is repeated until the tasks are finished. + */ +class TableFunctionAzureBlobStorageCluster : public ITableFunctionCluster +{ +public: + static constexpr auto name = "azureBlobStorageCluster"; + static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]"; + + String getName() const override + { + return name; + } + + String getSignature() const override + { + return signature; + } + +protected: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return "AzureBlobStorageCluster"; } +}; + +} + +#endif diff --git a/src/TableFunctions/TableFunctionDictionary.cpp b/src/TableFunctions/TableFunctionDictionary.cpp index 90db9550a72..f0060acb411 100644 --- a/src/TableFunctions/TableFunctionDictionary.cpp +++ b/src/TableFunctions/TableFunctionDictionary.cpp @@ -43,7 +43,7 @@ void TableFunctionDictionary::parseArguments(const ASTPtr & ast_function, Contex dictionary_name = checkAndGetLiteralArgument(args[0], "dictionary_name"); } -ColumnsDescription TableFunctionDictionary::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionDictionary::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { const ExternalDictionariesLoader & external_loader = context->getExternalDictionariesLoader(); std::string resolved_name = external_loader.resolveDictionaryName(dictionary_name, context->getCurrentDatabase()); @@ -76,10 +76,10 @@ ColumnsDescription TableFunctionDictionary::getActualTableStructure(ContextPtr c } StoragePtr TableFunctionDictionary::executeImpl( - const ASTPtr &, ContextPtr context, const std::string & table_name, ColumnsDescription) const + const ASTPtr &, ContextPtr context, const std::string & table_name, ColumnsDescription, bool is_insert_query) const { StorageID dict_id(getDatabaseName(), table_name); - auto dictionary_table_structure = getActualTableStructure(context); + auto dictionary_table_structure = getActualTableStructure(context, is_insert_query); auto result = std::make_shared( dict_id, dictionary_name, std::move(dictionary_table_structure), String{}, StorageDictionary::Location::Custom, context); diff --git a/src/TableFunctions/TableFunctionDictionary.h b/src/TableFunctions/TableFunctionDictionary.h index cc184a32a17..d0beb292fe1 100644 --- a/src/TableFunctions/TableFunctionDictionary.h +++ b/src/TableFunctions/TableFunctionDictionary.h @@ -18,9 +18,9 @@ public: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Dictionary"; } diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp index 66e7ce7592c..5a64a988156 100644 --- a/src/TableFunctions/TableFunctionExecutable.cpp +++ b/src/TableFunctions/TableFunctionExecutable.cpp @@ -120,12 +120,12 @@ void TableFunctionExecutable::parseArguments(const ASTPtr & ast_function, Contex } } -ColumnsDescription TableFunctionExecutable::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionExecutable::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return parseColumnsListFromString(structure, context); } -StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { auto storage_id = StorageID(getDatabaseName(), table_name); auto global_context = context->getGlobalContext(); @@ -135,7 +135,7 @@ StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/, if (settings_query != nullptr) settings.applyChanges(settings_query->as()->changes); - auto storage = std::make_shared(storage_id, format, settings, input_queries, getActualTableStructure(context), ConstraintsDescription{}); + auto storage = std::make_shared(storage_id, format, settings, input_queries, getActualTableStructure(context, is_insert_query), ConstraintsDescription{}); storage->startup(); return storage; } diff --git a/src/TableFunctions/TableFunctionExecutable.h b/src/TableFunctions/TableFunctionExecutable.h index 2d9f86e14e6..aa595312fe4 100644 --- a/src/TableFunctions/TableFunctionExecutable.h +++ b/src/TableFunctions/TableFunctionExecutable.h @@ -24,11 +24,11 @@ public: bool hasStaticStructure() const override { return true; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Executable"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index aae93c8b596..f127979d92a 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -91,7 +91,7 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt query = std::move(explain_query); } -ColumnsDescription TableFunctionExplain::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionExplain::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { Block sample_block = getInterpreter(context).getSampleBlock(query->as()->getKind()); ColumnsDescription columns_description; @@ -123,7 +123,7 @@ static Block executeMonoBlock(QueryPipeline & pipeline) } StoragePtr TableFunctionExplain::executeImpl( - const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const + const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { /// To support settings inside explain subquery. auto mutable_context = Context::createCopy(context); @@ -132,7 +132,7 @@ StoragePtr TableFunctionExplain::executeImpl( Block block = executeMonoBlock(blockio.pipeline); StorageID storage_id(getDatabaseName(), table_name); - auto storage = std::make_shared(storage_id, getActualTableStructure(context), std::move(block)); + auto storage = std::make_shared(storage_id, getActualTableStructure(context, is_insert_query), std::move(block)); storage->startup(); return storage; } diff --git a/src/TableFunctions/TableFunctionExplain.h b/src/TableFunctions/TableFunctionExplain.h index 99d3e52ee68..2eb7e35d0b5 100644 --- a/src/TableFunctions/TableFunctionExplain.h +++ b/src/TableFunctions/TableFunctionExplain.h @@ -17,7 +17,7 @@ public: std::string getName() const override { return name; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Explain"; } @@ -25,7 +25,7 @@ private: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; InterpreterExplainQuery getInterpreter(ContextPtr context) const; diff --git a/src/TableFunctions/TableFunctionFactory.cpp b/src/TableFunctions/TableFunctionFactory.cpp index 76108f1cdd4..ce3daff0785 100644 --- a/src/TableFunctions/TableFunctionFactory.cpp +++ b/src/TableFunctions/TableFunctionFactory.cpp @@ -41,7 +41,7 @@ TableFunctionPtr TableFunctionFactory::get( { auto hints = getHints(table_function->name); if (!hints.empty()) - throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function {}. Maybe you meant: {}", table_function->name , toString(hints)); + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function {}. Maybe you meant: {}", table_function->name, toString(hints)); else throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function {}", table_function->name); } diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 0e49f26db40..f75c56e6523 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -25,6 +25,7 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr if (context->getApplicationType() != Context::ApplicationType::LOCAL) { ITableFunctionFileLike::parseFirstArguments(arg, context); + StorageFile::parseFileSource(std::move(filename), filename, path_to_archive); return; } @@ -39,6 +40,8 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr fd = STDOUT_FILENO; else if (filename == "stderr") fd = STDERR_FILENO; + else + StorageFile::parseFileSource(std::move(filename), filename, path_to_archive); } else if (type == Field::Types::Int64 || type == Field::Types::UInt64) { @@ -76,22 +79,32 @@ StoragePtr TableFunctionFile::getStorage(const String & source, ConstraintsDescription{}, String{}, global_context->getSettingsRef().rename_files_after_processing, + path_to_archive, }; + if (fd >= 0) return std::make_shared(fd, args); return std::make_shared(source, global_context->getUserFilesPath(), args); } -ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { if (fd >= 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Schema inference is not supported for table function '{}' with file descriptor", getName()); size_t total_bytes_to_read = 0; - Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read); - return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context); + + Strings paths; + std::optional archive_info; + if (path_to_archive.empty()) + paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read); + else + archive_info + = StorageFile::getArchiveInfo(path_to_archive, filename, context->getUserFilesPath(), context, total_bytes_to_read); + + return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context, archive_info); } diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h index 797948cad03..439ae87b4ae 100644 --- a/src/TableFunctions/TableFunctionFile.h +++ b/src/TableFunctions/TableFunctionFile.h @@ -20,7 +20,7 @@ public: return name; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override { diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp index 2a46f839bbe..3afe7ffde58 100644 --- a/src/TableFunctions/TableFunctionFormat.cpp +++ b/src/TableFunctions/TableFunctionFormat.cpp @@ -52,14 +52,11 @@ void TableFunctionFormat::parseArguments(const ASTPtr & ast_function, ContextPtr structure = checkAndGetLiteralArgument(args[1], "structure"); } -ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { - ReadBufferIterator read_buffer_iterator = [&](ColumnsDescription &) - { - return std::make_unique(data); - }; + SingleReadBufferIterator read_buffer_iterator(std::make_unique(data)); return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, false, context); } return parseColumnsListFromString(structure, context); @@ -98,9 +95,9 @@ Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr cont return concatenateBlocks(blocks); } -StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); Block res_block = parseData(columns, context); auto res = std::make_shared(StorageID(getDatabaseName(), table_name), columns, res_block); res->startup(); diff --git a/src/TableFunctions/TableFunctionFormat.h b/src/TableFunctions/TableFunctionFormat.h index d64ab14cb64..e20e8b6ea4b 100644 --- a/src/TableFunctions/TableFunctionFormat.h +++ b/src/TableFunctions/TableFunctionFormat.h @@ -18,10 +18,10 @@ public: bool hasStaticStructure() const override { return false; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Values"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; Block parseData(ColumnsDescription columns, ContextPtr context) const; diff --git a/src/TableFunctions/TableFunctionGenerateRandom.cpp b/src/TableFunctions/TableFunctionGenerateRandom.cpp index 08059796660..c6a9154cc66 100644 --- a/src/TableFunctions/TableFunctionGenerateRandom.cpp +++ b/src/TableFunctions/TableFunctionGenerateRandom.cpp @@ -97,7 +97,7 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co } } -ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { @@ -113,9 +113,9 @@ ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextP return parseColumnsListFromString(structure, context); } -StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - ColumnsDescription columns = getActualTableStructure(context); + ColumnsDescription columns = getActualTableStructure(context, is_insert_query); auto res = std::make_shared( StorageID(getDatabaseName(), table_name), columns, String{}, max_array_length, max_string_length, random_seed); res->startup(); diff --git a/src/TableFunctions/TableFunctionGenerateRandom.h b/src/TableFunctions/TableFunctionGenerateRandom.h index 584d65311f4..a5d11ce0af6 100644 --- a/src/TableFunctions/TableFunctionGenerateRandom.h +++ b/src/TableFunctions/TableFunctionGenerateRandom.h @@ -19,10 +19,10 @@ public: void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "GenerateRandom"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; String structure = "auto"; diff --git a/src/TableFunctions/TableFunctionHDFS.cpp b/src/TableFunctions/TableFunctionHDFS.cpp index 7aab55b48c9..a8e2108fda8 100644 --- a/src/TableFunctions/TableFunctionHDFS.cpp +++ b/src/TableFunctions/TableFunctionHDFS.cpp @@ -28,7 +28,7 @@ StoragePtr TableFunctionHDFS::getStorage( compression_method_); } -ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h index eec0a05fe8d..a7eb5daa440 100644 --- a/src/TableFunctions/TableFunctionHDFS.h +++ b/src/TableFunctions/TableFunctionHDFS.h @@ -34,7 +34,7 @@ public: return signature; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override { diff --git a/src/TableFunctions/TableFunctionInput.cpp b/src/TableFunctions/TableFunctionInput.cpp index 4941241acae..658a55c6fc4 100644 --- a/src/TableFunctions/TableFunctionInput.cpp +++ b/src/TableFunctions/TableFunctionInput.cpp @@ -43,7 +43,7 @@ void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr structure = checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context), "structure"); } -ColumnsDescription TableFunctionInput::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionInput::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { @@ -58,9 +58,9 @@ ColumnsDescription TableFunctionInput::getActualTableStructure(ContextPtr contex return parseColumnsListFromString(structure, context); } -StoragePtr TableFunctionInput::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionInput::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto storage = std::make_shared(StorageID(getDatabaseName(), table_name), getActualTableStructure(context)); + auto storage = std::make_shared(StorageID(getDatabaseName(), table_name), getActualTableStructure(context, is_insert_query)); storage->startup(); return storage; } diff --git a/src/TableFunctions/TableFunctionInput.h b/src/TableFunctions/TableFunctionInput.h index 8e7b34cb829..3164ce43eef 100644 --- a/src/TableFunctions/TableFunctionInput.h +++ b/src/TableFunctions/TableFunctionInput.h @@ -20,10 +20,10 @@ public: void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Input"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; String structure; diff --git a/src/TableFunctions/TableFunctionMeiliSearch.cpp b/src/TableFunctions/TableFunctionMeiliSearch.cpp index 41ae5bb9ee2..01840a80262 100644 --- a/src/TableFunctions/TableFunctionMeiliSearch.cpp +++ b/src/TableFunctions/TableFunctionMeiliSearch.cpp @@ -8,13 +8,13 @@ namespace DB { StoragePtr TableFunctionMeiliSearch::executeImpl( - const ASTPtr & /* ast_function */, ContextPtr /*context*/, const String & table_name, ColumnsDescription /*cached_columns*/) const + const ASTPtr & /* ast_function */, ContextPtr /*context*/, const String & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { return std::make_shared( StorageID(getDatabaseName(), table_name), configuration.value(), ColumnsDescription{}, ConstraintsDescription{}, String{}); } -ColumnsDescription TableFunctionMeiliSearch::getActualTableStructure(ContextPtr /* context */) const +ColumnsDescription TableFunctionMeiliSearch::getActualTableStructure(ContextPtr /* context */, bool /*is_insert_query*/) const { return StorageMeiliSearch::getTableStructureFromData(configuration.value()); } diff --git a/src/TableFunctions/TableFunctionMeiliSearch.h b/src/TableFunctions/TableFunctionMeiliSearch.h index 86be944ab12..a127809a9c5 100644 --- a/src/TableFunctions/TableFunctionMeiliSearch.h +++ b/src/TableFunctions/TableFunctionMeiliSearch.h @@ -13,11 +13,11 @@ public: private: StoragePtr executeImpl( - const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "meilisearch"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; std::optional configuration; diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp index 586cee54085..599953a1add 100644 --- a/src/TableFunctions/TableFunctionMerge.cpp +++ b/src/TableFunctions/TableFunctionMerge.cpp @@ -118,7 +118,7 @@ const TableFunctionMerge::DBToTableSetMap & TableFunctionMerge::getSourceDatabas return *source_databases_and_tables; } -ColumnsDescription TableFunctionMerge::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionMerge::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { for (const auto & db_with_tables : getSourceDatabasesAndTables(context)) { @@ -134,11 +134,11 @@ ColumnsDescription TableFunctionMerge::getActualTableStructure(ContextPtr contex } -StoragePtr TableFunctionMerge::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionMerge::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { auto res = std::make_shared( StorageID(getDatabaseName(), table_name), - getActualTableStructure(context), + getActualTableStructure(context, is_insert_query), String{}, source_database_name_or_regexp, database_is_regexp, diff --git a/src/TableFunctions/TableFunctionMerge.h b/src/TableFunctions/TableFunctionMerge.h index 3439056deda..8cc5119978a 100644 --- a/src/TableFunctions/TableFunctionMerge.h +++ b/src/TableFunctions/TableFunctionMerge.h @@ -17,13 +17,13 @@ public: std::string getName() const override { return name; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Merge"; } using TableSet = std::set; using DBToTableSetMap = std::map; const DBToTableSetMap & getSourceDatabasesAndTables(ContextPtr context) const; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; static TableSet getMatchedTablesWithAccess(const String & database_name, const String & table_regexp, const ContextPtr & context); diff --git a/src/TableFunctions/TableFunctionMongoDB.cpp b/src/TableFunctions/TableFunctionMongoDB.cpp index 31dd64f8254..5c7c1d98cdf 100644 --- a/src/TableFunctions/TableFunctionMongoDB.cpp +++ b/src/TableFunctions/TableFunctionMongoDB.cpp @@ -27,9 +27,9 @@ namespace ErrorCodes StoragePtr TableFunctionMongoDB::executeImpl(const ASTPtr & /*ast_function*/, - ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/) const + ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); auto storage = std::make_shared( StorageID(configuration->database, table_name), configuration->host, @@ -46,7 +46,7 @@ StoragePtr TableFunctionMongoDB::executeImpl(const ASTPtr & /*ast_function*/, return storage; } -ColumnsDescription TableFunctionMongoDB::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionMongoDB::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return parseColumnsListFromString(structure, context); } diff --git a/src/TableFunctions/TableFunctionMongoDB.h b/src/TableFunctions/TableFunctionMongoDB.h index b5033b2d654..c2c15cabe5a 100644 --- a/src/TableFunctions/TableFunctionMongoDB.h +++ b/src/TableFunctions/TableFunctionMongoDB.h @@ -17,11 +17,11 @@ public: private: StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, - const std::string & table_name, ColumnsDescription cached_columns) const override; + const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "MongoDB"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; std::optional configuration; diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index 1705dfcbfa5..03bd2264551 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -57,7 +57,7 @@ void TableFunctionMySQL::parseArguments(const ASTPtr & ast_function, ContextPtr pool.emplace(createMySQLPoolWithFailover(*configuration, mysql_settings)); } -ColumnsDescription TableFunctionMySQL::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionMySQL::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return StorageMySQL::getTableStructureFromData(*pool, configuration->database, configuration->table, context); } @@ -66,7 +66,8 @@ StoragePtr TableFunctionMySQL::executeImpl( const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, - ColumnsDescription /*cached_columns*/) const + ColumnsDescription /*cached_columns*/, + bool /*is_insert_query*/) const { auto res = std::make_shared( StorageID(getDatabaseName(), table_name), diff --git a/src/TableFunctions/TableFunctionMySQL.h b/src/TableFunctions/TableFunctionMySQL.h index 5a230530bc4..04f619f5f4b 100644 --- a/src/TableFunctions/TableFunctionMySQL.h +++ b/src/TableFunctions/TableFunctionMySQL.h @@ -23,10 +23,10 @@ public: return name; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "MySQL"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; mutable std::optional pool; diff --git a/src/TableFunctions/TableFunctionNull.cpp b/src/TableFunctions/TableFunctionNull.cpp index d25b9e15aa7..57911e16d4b 100644 --- a/src/TableFunctions/TableFunctionNull.cpp +++ b/src/TableFunctions/TableFunctionNull.cpp @@ -32,14 +32,14 @@ void TableFunctionNull::parseArguments(const ASTPtr & ast_function, ContextPtr c structure = checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(arguments[0], context), "structure"); } -ColumnsDescription TableFunctionNull::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionNull::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure != "auto") return parseColumnsListFromString(structure, context); return default_structure; } -StoragePtr TableFunctionNull::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionNull::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { ColumnsDescription columns; if (structure != "auto") diff --git a/src/TableFunctions/TableFunctionNull.h b/src/TableFunctions/TableFunctionNull.h index 4fece9e6da9..e80552d4cff 100644 --- a/src/TableFunctions/TableFunctionNull.h +++ b/src/TableFunctions/TableFunctionNull.h @@ -23,11 +23,11 @@ public: void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Null"; } void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; String structure = "auto"; ColumnsDescription structure_hint; diff --git a/src/TableFunctions/TableFunctionNumbers.cpp b/src/TableFunctions/TableFunctionNumbers.cpp index ba7a4dc4b36..d6cf50bc7d6 100644 --- a/src/TableFunctions/TableFunctionNumbers.cpp +++ b/src/TableFunctions/TableFunctionNumbers.cpp @@ -23,14 +23,14 @@ namespace ErrorCodes template -ColumnsDescription TableFunctionNumbers::getActualTableStructure(ContextPtr /*context*/) const +ColumnsDescription TableFunctionNumbers::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const { /// NOTE: https://bugs.llvm.org/show_bug.cgi?id=47418 return ColumnsDescription{{{"number", std::make_shared()}}}; } template -StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { if (const auto * function = ast_function->as()) { diff --git a/src/TableFunctions/TableFunctionNumbers.h b/src/TableFunctions/TableFunctionNumbers.h index 0a2f3eb863e..e380f40f7b2 100644 --- a/src/TableFunctions/TableFunctionNumbers.h +++ b/src/TableFunctions/TableFunctionNumbers.h @@ -19,12 +19,12 @@ public: std::string getName() const override { return name; } bool hasStaticStructure() const override { return true; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "SystemNumbers"; } UInt64 evaluateArgument(ContextPtr context, ASTPtr & argument) const; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; }; diff --git a/src/TableFunctions/TableFunctionPostgreSQL.cpp b/src/TableFunctions/TableFunctionPostgreSQL.cpp index 2b17a1b2c2b..322e0df7c15 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.cpp +++ b/src/TableFunctions/TableFunctionPostgreSQL.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes StoragePtr TableFunctionPostgreSQL::executeImpl(const ASTPtr & /*ast_function*/, - ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const + ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { auto result = std::make_shared( StorageID(getDatabaseName(), table_name), @@ -38,7 +38,7 @@ StoragePtr TableFunctionPostgreSQL::executeImpl(const ASTPtr & /*ast_function*/, } -ColumnsDescription TableFunctionPostgreSQL::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionPostgreSQL::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return StoragePostgreSQL::getTableStructureFromData(connection_pool, configuration->table, configuration->schema, context); } diff --git a/src/TableFunctions/TableFunctionPostgreSQL.h b/src/TableFunctions/TableFunctionPostgreSQL.h index 9f10e1c180e..f7d77567dd4 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.h +++ b/src/TableFunctions/TableFunctionPostgreSQL.h @@ -20,11 +20,11 @@ public: private: StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, - const std::string & table_name, ColumnsDescription cached_columns) const override; + const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "PostgreSQL"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; postgres::PoolWithFailoverPtr connection_pool; diff --git a/src/TableFunctions/TableFunctionRedis.cpp b/src/TableFunctions/TableFunctionRedis.cpp index ec659ae61e0..0b7433845b4 100644 --- a/src/TableFunctions/TableFunctionRedis.cpp +++ b/src/TableFunctions/TableFunctionRedis.cpp @@ -25,9 +25,9 @@ namespace ErrorCodes } StoragePtr TableFunctionRedis::executeImpl( - const ASTPtr & /*ast_function*/, ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/) const + const ASTPtr & /*ast_function*/, ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); StorageInMemoryMetadata metadata; metadata.setColumns(columns); @@ -39,7 +39,7 @@ StoragePtr TableFunctionRedis::executeImpl( return storage; } -ColumnsDescription TableFunctionRedis::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionRedis::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return parseColumnsListFromString(structure, context); } diff --git a/src/TableFunctions/TableFunctionRedis.h b/src/TableFunctions/TableFunctionRedis.h index b985a89e3d7..a7fc0df0a15 100644 --- a/src/TableFunctions/TableFunctionRedis.h +++ b/src/TableFunctions/TableFunctionRedis.h @@ -19,11 +19,11 @@ public: private: StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, - const String & table_name, ColumnsDescription cached_columns) const override; + const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Redis"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; RedisConfiguration configuration; diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 4143014a7b3..1f42ce4ba30 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -37,7 +37,10 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr String cluster_name; String cluster_description; - String database, table, username = "default", password; + String database = "system"; + String table = "one"; /// The table containing one row is used by default for queries without explicit table specification. + String username = "default"; + String password; if (args_func.size() != 1) throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -86,7 +89,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr else { /// Supported signatures: - /// + /// remote('addresses_expr') /// remote('addresses_expr', db.table) /// remote('addresses_expr', 'db', 'table') /// remote('addresses_expr', db.table, 'user') @@ -102,6 +105,8 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr /// /// remoteSecure() - same as remote() /// + /// cluster() + /// cluster('cluster_name') /// cluster('cluster_name', db.table) /// cluster('cluster_name', 'db', 'table') /// cluster('cluster_name', db.table, sharding_key) @@ -109,7 +114,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr /// /// clusterAllReplicas() - same as cluster() - if (args.size() < 2 || args.size() > max_args) + if ((!is_cluster_function && args.empty()) || args.size() > max_args) throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); size_t arg_num = 0; @@ -128,8 +133,15 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr if (is_cluster_function) { - args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); - cluster_name = checkAndGetLiteralArgument(args[arg_num], "cluster_name"); + if (!args.empty()) + { + args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); + cluster_name = checkAndGetLiteralArgument(args[arg_num], "cluster_name"); + } + else + { + cluster_name = "default"; + } } else { @@ -141,44 +153,49 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr } ++arg_num; - const auto * function = args[arg_num]->as(); - if (function && TableFunctionFactory::instance().isTableFunctionName(function->name)) - { - remote_table_function_ptr = args[arg_num]; - ++arg_num; - } - else - { - args[arg_num] = evaluateConstantExpressionForDatabaseName(args[arg_num], context); - database = checkAndGetLiteralArgument(args[arg_num], "database"); - ++arg_num; - - auto qualified_name = QualifiedTableName::parseFromString(database); - if (qualified_name.database.empty()) + /// Names of database and table is not necessary. + if (arg_num < args.size()) + { + const auto * function = args[arg_num]->as(); + if (function && TableFunctionFactory::instance().isTableFunctionName(function->name)) { - if (arg_num >= args.size()) + remote_table_function_ptr = args[arg_num]; + ++arg_num; + } + else + { + args[arg_num] = evaluateConstantExpressionForDatabaseName(args[arg_num], context); + database = checkAndGetLiteralArgument(args[arg_num], "database"); + + ++arg_num; + + auto qualified_name = QualifiedTableName::parseFromString(database); + if (qualified_name.database.empty()) { - throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (arg_num >= args.size()) + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table name was not found in function arguments. {}", static_cast(help_message)); + } + else + { + std::swap(qualified_name.database, qualified_name.table); + args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); + qualified_name.table = checkAndGetLiteralArgument(args[arg_num], "table"); + ++arg_num; + } } - else + + database = std::move(qualified_name.database); + table = std::move(qualified_name.table); + + /// Cluster function may have sharding key for insert + if (is_cluster_function && arg_num < args.size()) { - std::swap(qualified_name.database, qualified_name.table); - args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); - qualified_name.table = checkAndGetLiteralArgument(args[arg_num], "table"); + sharding_key = args[arg_num]; ++arg_num; } } - - database = std::move(qualified_name.database); - table = std::move(qualified_name.table); - - /// Cluster function may have sharding key for insert - if (is_cluster_function && arg_num < args.size()) - { - sharding_key = args[arg_num]; - ++arg_num; - } } /// Username and password parameters are prohibited in cluster version of the function @@ -203,15 +220,19 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr ++arg_num; } - if (arg_num < args.size() && !sharding_key) + if (arg_num < args.size()) { + if (sharding_key) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arguments `user` and `password` should be string literals (in single quotes)"); sharding_key = args[arg_num]; ++arg_num; } } if (arg_num < args.size()) + { throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } } if (!cluster_name.empty()) @@ -264,7 +285,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr secure, /* priority= */ Priority{1}, /* cluster_name= */ "", - /* password= */ "" + /* cluster_secret= */ "" }; cluster = std::make_shared(context->getSettingsRef(), names, params); } @@ -276,12 +297,12 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr remote_table_id.table_name = table; } -StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const +StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const { /// StorageDistributed supports mismatching structure of remote table, so we can use outdated structure for CREATE ... AS remote(...) /// without additional conversion in StorageTableFunctionProxy if (cached_columns.empty()) - cached_columns = getActualTableStructure(context); + cached_columns = getActualTableStructure(context, is_insert_query); assert(cluster); StoragePtr res = remote_table_function_ptr @@ -318,7 +339,7 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, Con return res; } -ColumnsDescription TableFunctionRemote::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionRemote::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { assert(cluster); return getStructureOfRemoteTable(*cluster, remote_table_id, context, remote_table_function_ptr); @@ -329,11 +350,13 @@ TableFunctionRemote::TableFunctionRemote(const std::string & name_, bool secure_ { is_cluster_function = (name == "cluster" || name == "clusterAllReplicas"); help_message = PreformattedMessage::create( - "Table function '{}' requires from 2 to {} parameters: " - ", , {}", + "Table function '{}' requires from {} to {} parameters: " + "{}", name, + is_cluster_function ? 0 : 1, is_cluster_function ? 4 : 6, - is_cluster_function ? " [, sharding_key]" : " [, username[, password], sharding_key]"); + is_cluster_function ? "[, , ] [, sharding_key]" + : " [, , ] [, username[, password], sharding_key]"); } void registerTableFunctionRemote(TableFunctionFactory & factory) diff --git a/src/TableFunctions/TableFunctionRemote.h b/src/TableFunctions/TableFunctionRemote.h index 32039d1e6a8..0f75bf2b854 100644 --- a/src/TableFunctions/TableFunctionRemote.h +++ b/src/TableFunctions/TableFunctionRemote.h @@ -22,13 +22,13 @@ public: std::string getName() const override { return name; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; bool needStructureConversion() const override { return false; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Distributed"; } void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 0f3078b1ca6..df9e5afcaff 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -58,6 +58,11 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context if (auto named_collection = tryGetNamedCollectionWithOverrides(args, context)) { StorageS3::processNamedCollectionResult(configuration, *named_collection); + if (configuration.format == "auto") + { + String file_path = named_collection->getOrDefault("filename", Poco::URI(named_collection->get("url")).getPath()); + configuration.format = FormatFactory::instance().getFormatFromFileName(file_path, true); + } } else { @@ -152,7 +157,8 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context } /// This argument is always the first - configuration.url = S3::URI(checkAndGetLiteralArgument(args[0], "url")); + String url = checkAndGetLiteralArgument(args[0], "url"); + configuration.url = S3::URI(url); if (args_to_idx.contains("format")) { @@ -176,12 +182,12 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(args[args_to_idx["secret_access_key"]], "secret_access_key"); configuration.auth_settings.no_sign_request = no_sign_request; + + if (configuration.format == "auto") + configuration.format = FormatFactory::instance().getFormatFromFileName(Poco::URI(url).getPath(), true); } configuration.keys = {configuration.url.key}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url.uri.getPath(), true); } void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context) @@ -313,7 +319,7 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & } } -ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (configuration.structure == "auto") { @@ -325,12 +331,12 @@ ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context) return parseColumnsListFromString(configuration.structure, context); } -bool TableFunctionS3::supportsReadingSubsetOfColumns() +bool TableFunctionS3::supportsReadingSubsetOfColumns(const ContextPtr & context) { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context); } -StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { S3::URI s3_uri (configuration.url); diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index d308f469236..a38ea5ba56b 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -47,7 +47,7 @@ public: void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } - bool supportsReadingSubsetOfColumns() override; + bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override { @@ -64,11 +64,12 @@ protected: const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, - ColumnsDescription cached_columns) const override; + ColumnsDescription cached_columns, + bool is_insert_query) const override; const char * getStorageTypeName() const override { return "S3"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; mutable StorageS3::Configuration configuration; diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index 1d93132c411..ce96f7f580b 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -17,7 +17,7 @@ namespace DB StoragePtr TableFunctionS3Cluster::executeImpl( const ASTPtr & /*function*/, ContextPtr context, - const std::string & table_name, ColumnsDescription /*cached_columns*/) const + const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { StoragePtr storage; ColumnsDescription columns; diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h index 459ff144f02..4fe25079cf4 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.h +++ b/src/TableFunctions/TableFunctionS3Cluster.h @@ -52,7 +52,8 @@ protected: const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, - ColumnsDescription cached_columns) const override; + ColumnsDescription cached_columns, + bool is_insert_query) const override; const char * getStorageTypeName() const override { return "S3Cluster"; } }; diff --git a/src/TableFunctions/TableFunctionSQLite.cpp b/src/TableFunctions/TableFunctionSQLite.cpp index a9831363bd9..27e6fcf1fd1 100644 --- a/src/TableFunctions/TableFunctionSQLite.cpp +++ b/src/TableFunctions/TableFunctionSQLite.cpp @@ -29,7 +29,7 @@ namespace ErrorCodes StoragePtr TableFunctionSQLite::executeImpl(const ASTPtr & /*ast_function*/, - ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/) const + ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { auto storage = std::make_shared(StorageID(getDatabaseName(), table_name), sqlite_db, @@ -42,7 +42,7 @@ StoragePtr TableFunctionSQLite::executeImpl(const ASTPtr & /*ast_function*/, } -ColumnsDescription TableFunctionSQLite::getActualTableStructure(ContextPtr /* context */) const +ColumnsDescription TableFunctionSQLite::getActualTableStructure(ContextPtr /* context */, bool /*is_insert_query*/) const { return StorageSQLite::getTableStructureFromData(sqlite_db, remote_table_name); } diff --git a/src/TableFunctions/TableFunctionSQLite.h b/src/TableFunctions/TableFunctionSQLite.h index fded5646b39..74318f058a9 100644 --- a/src/TableFunctions/TableFunctionSQLite.h +++ b/src/TableFunctions/TableFunctionSQLite.h @@ -18,11 +18,11 @@ public: private: StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, - const std::string & table_name, ColumnsDescription cached_columns) const override; + const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "SQLite"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; String database_path, remote_table_name; diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index 4ed204a2af3..8d5a023fc3b 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -118,7 +118,7 @@ StoragePtr TableFunctionURL::getStorage( configuration.http_method); } -ColumnsDescription TableFunctionURL::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionURL::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { diff --git a/src/TableFunctions/TableFunctionURL.h b/src/TableFunctions/TableFunctionURL.h index 021eb71df53..5e58a36dde9 100644 --- a/src/TableFunctions/TableFunctionURL.h +++ b/src/TableFunctions/TableFunctionURL.h @@ -32,7 +32,7 @@ public: return signature; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; static void addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context); diff --git a/src/TableFunctions/TableFunctionURLCluster.cpp b/src/TableFunctions/TableFunctionURLCluster.cpp index c94943db758..a2949278155 100644 --- a/src/TableFunctions/TableFunctionURLCluster.cpp +++ b/src/TableFunctions/TableFunctionURLCluster.cpp @@ -38,7 +38,7 @@ StoragePtr TableFunctionURLCluster::getStorage( format, compression_method, StorageID(getDatabaseName(), table_name), - getActualTableStructure(context), + getActualTableStructure(context, /* is_insert_query */ true), ConstraintsDescription{}, configuration, structure != "auto"); diff --git a/src/TableFunctions/TableFunctionValues.cpp b/src/TableFunctions/TableFunctionValues.cpp index cf0e20c624c..42a19874704 100644 --- a/src/TableFunctions/TableFunctionValues.cpp +++ b/src/TableFunctions/TableFunctionValues.cpp @@ -119,14 +119,14 @@ void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr structure = ColumnsDescription(names_and_types); } -ColumnsDescription TableFunctionValues::getActualTableStructure(ContextPtr /*context*/) const +ColumnsDescription TableFunctionValues::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const { return structure; } -StoragePtr TableFunctionValues::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionValues::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); Block sample_block; for (const auto & name_type : columns.getOrdinary()) diff --git a/src/TableFunctions/TableFunctionValues.h b/src/TableFunctions/TableFunctionValues.h index 61ce5158086..7c87bff835e 100644 --- a/src/TableFunctions/TableFunctionValues.h +++ b/src/TableFunctions/TableFunctionValues.h @@ -14,10 +14,10 @@ public: std::string getName() const override { return name; } bool hasStaticStructure() const override { return true; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Values"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; static DataTypes getTypesFromArgument(const ASTPtr & arg, ContextPtr context); diff --git a/src/TableFunctions/TableFunctionView.cpp b/src/TableFunctions/TableFunctionView.cpp index 6b50e7e0611..2a50fb2d006 100644 --- a/src/TableFunctions/TableFunctionView.cpp +++ b/src/TableFunctions/TableFunctionView.cpp @@ -41,7 +41,7 @@ void TableFunctionView::parseArguments(const ASTPtr & ast_function, ContextPtr / throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}' requires a query argument.", getName()); } -ColumnsDescription TableFunctionView::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionView::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { assert(create.select); assert(create.children.size() == 1); @@ -58,9 +58,9 @@ ColumnsDescription TableFunctionView::getActualTableStructure(ContextPtr context } StoragePtr TableFunctionView::executeImpl( - const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const + const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); auto res = std::make_shared(StorageID(getDatabaseName(), table_name), create, columns, ""); res->startup(); return res; diff --git a/src/TableFunctions/TableFunctionView.h b/src/TableFunctions/TableFunctionView.h index bbf072655ed..c679a1f315d 100644 --- a/src/TableFunctions/TableFunctionView.h +++ b/src/TableFunctions/TableFunctionView.h @@ -21,7 +21,7 @@ public: const ASTSelectWithUnionQuery & getSelectQuery() const; private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "View"; } @@ -29,7 +29,7 @@ private: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; ASTCreateQuery create; }; diff --git a/src/TableFunctions/TableFunctionViewIfPermitted.cpp b/src/TableFunctions/TableFunctionViewIfPermitted.cpp index 12762e24f7e..d7944df1b28 100644 --- a/src/TableFunctions/TableFunctionViewIfPermitted.cpp +++ b/src/TableFunctions/TableFunctionViewIfPermitted.cpp @@ -55,16 +55,16 @@ void TableFunctionViewIfPermitted::parseArguments(const ASTPtr & ast_function, C else_table_function = TableFunctionFactory::instance().get(else_ast, context); } -ColumnsDescription TableFunctionViewIfPermitted::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionViewIfPermitted::getActualTableStructure(ContextPtr context, bool is_insert_query) const { - return else_table_function->getActualTableStructure(context); + return else_table_function->getActualTableStructure(context, is_insert_query); } StoragePtr TableFunctionViewIfPermitted::executeImpl( - const ASTPtr & /* ast_function */, ContextPtr context, const std::string & table_name, ColumnsDescription /* cached_columns */) const + const ASTPtr & /* ast_function */, ContextPtr context, const std::string & table_name, ColumnsDescription /* cached_columns */, bool is_insert_query) const { StoragePtr storage; - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); if (isPermitted(context, columns)) { diff --git a/src/TableFunctions/TableFunctionViewIfPermitted.h b/src/TableFunctions/TableFunctionViewIfPermitted.h index 9fdb34f30ab..bee4e15bfa5 100644 --- a/src/TableFunctions/TableFunctionViewIfPermitted.h +++ b/src/TableFunctions/TableFunctionViewIfPermitted.h @@ -20,7 +20,7 @@ public: const ASTSelectWithUnionQuery & getSelectQuery() const; private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "ViewIfPermitted"; } @@ -28,7 +28,7 @@ private: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; bool isPermitted(const ContextPtr & context, const ColumnsDescription & else_columns) const; diff --git a/src/TableFunctions/TableFunctionZeros.cpp b/src/TableFunctions/TableFunctionZeros.cpp index 3c487362e1f..eb93626590e 100644 --- a/src/TableFunctions/TableFunctionZeros.cpp +++ b/src/TableFunctions/TableFunctionZeros.cpp @@ -20,14 +20,14 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; template -ColumnsDescription TableFunctionZeros::getActualTableStructure(ContextPtr /*context*/) const +ColumnsDescription TableFunctionZeros::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const { /// NOTE: https://bugs.llvm.org/show_bug.cgi?id=47418 return ColumnsDescription{{{"zero", std::make_shared()}}}; } template -StoragePtr TableFunctionZeros::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionZeros::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { if (const auto * function = ast_function->as()) { diff --git a/src/TableFunctions/TableFunctionZeros.h b/src/TableFunctions/TableFunctionZeros.h index eef1577673e..07d523ee37c 100644 --- a/src/TableFunctions/TableFunctionZeros.h +++ b/src/TableFunctions/TableFunctionZeros.h @@ -19,12 +19,12 @@ public: std::string getName() const override { return name; } bool hasStaticStructure() const override { return true; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "SystemZeros"; } UInt64 evaluateArgument(ContextPtr context, ASTPtr & argument) const; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; }; diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index eb6e0372223..6dd29551f48 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -75,6 +75,7 @@ void registerTableFunctions() #if USE_AZURE_BLOB_STORAGE registerTableFunctionAzureBlobStorage(factory); + registerTableFunctionAzureBlobStorageCluster(factory); #endif diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 758e193e88f..db212698e65 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -72,6 +72,7 @@ void registerTableFunctionExplain(TableFunctionFactory & factory); #if USE_AZURE_BLOB_STORAGE void registerTableFunctionAzureBlobStorage(TableFunctionFactory & factory); +void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory); #endif void registerTableFunctions(); diff --git a/src/configure_config.cmake b/src/configure_config.cmake index ae6305705c2..6e636e580c2 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -162,3 +162,8 @@ endif () if (TARGET ch_contrib::fiu) set(FIU_ENABLE 1) endif() +if (TARGET ch_contrib::libarchive) + set(USE_LIBARCHIVE 1) +endif() + +set(SOURCE_DIR ${PROJECT_SOURCE_DIR}) diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 02f70c8a6df..080cd3f2677 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -1,44 +1,15 @@ test_access_for_functions/test.py::test_access_rights_for_function -test_backward_compatibility/test_normalized_count_comparison.py::test_select_aggregate_alias_column test_concurrent_backups_s3/test.py::test_concurrent_backups test_distributed_ddl/test.py::test_default_database[configs] test_distributed_ddl/test.py::test_default_database[configs_secure] test_distributed_ddl/test.py::test_on_server_fail[configs] test_distributed_ddl/test.py::test_on_server_fail[configs_secure] test_distributed_insert_backward_compatibility/test.py::test_distributed_in_tuple -test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[default-] -test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[nopass-] -test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[pass-foo] -test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[default-] -test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[nopass-] -test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[pass-foo] -test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[default-] -test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[nopass-] -test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[pass-foo] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster[default-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster[nopass-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster[pass-foo] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[default-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[nopass-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[pass-foo] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[default-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[nopass-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[pass-foo] test_distributed_load_balancing/test.py::test_distributed_replica_max_ignored_errors test_distributed_load_balancing/test.py::test_load_balancing_default test_distributed_load_balancing/test.py::test_load_balancing_priority_round_robin[dist_priority] test_distributed_load_balancing/test.py::test_load_balancing_priority_round_robin[dist_priority_negative] test_distributed_load_balancing/test.py::test_load_balancing_round_robin -test_backward_compatibility/test.py::test_backward_compatability1 -test_backward_compatibility/test_aggregate_fixed_key.py::test_two_level_merge -test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_avg -test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact[1000] -test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact[500000] -test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact_variadic[1000] -test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact_variadic[500000] -test_backward_compatibility/test_ip_types_binary_compatibility.py::test_ip_types_binary_compatibility -test_backward_compatibility/test_select_aggregate_alias_column.py::test_select_aggregate_alias_column -test_backward_compatibility/test_short_strings_aggregation.py::test_backward_compatability test_mask_sensitive_info/test.py::test_encryption_functions test_merge_table_over_distributed/test.py::test_global_in test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed @@ -87,7 +58,6 @@ test_row_policy/test.py::test_users_xml_is_readonly test_row_policy/test.py::test_with_prewhere test_row_policy/test.py::test_with_prewhere test_settings_constraints_distributed/test.py::test_select_clamps_settings -test_backward_compatibility/test_cte_distributed.py::test_cte_distributed test_compression_codec_read/test.py::test_default_codec_read test_dictionaries_update_and_reload/test.py::test_reload_after_fail_in_cache_dictionary test_distributed_type_object/test.py::test_distributed_type_object @@ -98,9 +68,6 @@ test_storage_postgresql/test.py::test_postgres_select_insert test_storage_rabbitmq/test.py::test_rabbitmq_materialized_view test_system_merges/test.py::test_mutation_simple[] test_system_merges/test.py::test_mutation_simple[replicated] -test_backward_compatibility/test_insert_profile_events.py::test_new_client_compatible -test_backward_compatibility/test_insert_profile_events.py::test_old_client_compatible -test_backward_compatibility/test_vertical_merges_from_compact_parts.py::test_vertical_merges_from_compact_parts test_disk_over_web_server/test.py::test_cache[node2] test_disk_over_web_server/test.py::test_incorrect_usage test_disk_over_web_server/test.py::test_replicated_database @@ -108,96 +75,24 @@ test_disk_over_web_server/test.py::test_unavailable_server test_disk_over_web_server/test.py::test_usage[node2] test_distributed_backward_compatability/test.py::test_distributed_in_tuple test_executable_table_function/test.py::test_executable_function_input_python -test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py::test_groupBitmapAndState_on_different_version_nodes -test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py::test_groupBitmapAndState_on_distributed_table test_settings_profile/test.py::test_show_profiles test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster -test_backward_compatibility/test_functions.py::test_aggregate_states -test_backward_compatibility/test_functions.py::test_string_functions -test_default_compression_codec/test.py::test_default_codec_for_compact_parts -test_default_compression_codec/test.py::test_default_codec_multiple -test_default_compression_codec/test.py::test_default_codec_single -test_default_compression_codec/test.py::test_default_codec_version_update test_postgresql_protocol/test.py::test_python_client -test_quota/test.py::test_add_remove_interval -test_quota/test.py::test_add_remove_quota -test_quota/test.py::test_consumption_of_show_clusters -test_quota/test.py::test_consumption_of_show_databases -test_quota/test.py::test_consumption_of_show_privileges -test_quota/test.py::test_consumption_of_show_processlist -test_quota/test.py::test_consumption_of_show_tables -test_quota/test.py::test_dcl_introspection -test_quota/test.py::test_dcl_management -test_quota/test.py::test_exceed_quota -test_quota/test.py::test_query_inserts -test_quota/test.py::test_quota_from_users_xml -test_quota/test.py::test_reload_users_xml_by_timer -test_quota/test.py::test_simpliest_quota -test_quota/test.py::test_tracking_quota -test_quota/test.py::test_users_xml_is_readonly -test_replicated_merge_tree_compatibility/test.py::test_replicated_merge_tree_defaults_compatibility -test_polymorphic_parts/test.py::test_different_part_types_on_replicas[polymorphic_table_wide-Wide] -test_old_versions/test.py::test_client_is_older_than_server -test_polymorphic_parts/test.py::test_polymorphic_parts_non_adaptive -test_old_versions/test.py::test_server_is_older_than_client -test_polymorphic_parts/test.py::test_compact_parts_only -test_polymorphic_parts/test.py::test_different_part_types_on_replicas[polymorphic_table_compact-Compact] -test_polymorphic_parts/test.py::test_polymorphic_parts_index -test_old_versions/test.py::test_distributed_query_initiator_is_older_than_shard -test_polymorphic_parts/test.py::test_polymorphic_parts_basics[first_node1-second_node1] -test_polymorphic_parts/test.py::test_polymorphic_parts_basics[first_node0-second_node0] -test_ttl_replicated/test.py::test_ttl_table[DELETE] -test_ttl_replicated/test.py::test_ttl_columns -test_ttl_replicated/test.py::test_ttl_compatibility[node_left2-node_right2-2] -test_ttl_replicated/test.py::test_ttl_table[] -test_version_update/test.py::test_aggregate_function_versioning_server_upgrade -test_version_update/test.py::test_aggregate_function_versioning_fetch_data_from_old_to_new_server -test_ttl_replicated/test.py::test_ttl_double_delete_rule_returns_error -test_ttl_replicated/test.py::test_ttl_alter_delete[test_ttl_alter_delete] -test_ttl_replicated/test.py::test_ttl_alter_delete[test_ttl_alter_delete_replicated] -test_ttl_replicated/test.py::test_ttl_compatibility[node_left0-node_right0-0] -test_version_update/test.py::test_modulo_partition_key_issue_23508 -test_ttl_replicated/test.py::test_ttl_many_columns -test_ttl_replicated/test.py::test_modify_column_ttl -test_ttl_replicated/test.py::test_merge_with_ttl_timeout -test_ttl_replicated/test.py::test_ttl_empty_parts -test_ttl_replicated/test.py::test_ttl_compatibility[node_left1-node_right1-1] -test_version_update/test.py::test_aggregate_function_versioning_persisting_metadata -test_version_update/test.py::test_aggregate_function_versioning_issue_16587 -test_ttl_replicated/test.py::test_modify_ttl test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database test_profile_events_s3/test.py::test_profile_events -test_version_update_after_mutation/test.py::test_upgrade_while_mutation -test_version_update_after_mutation/test.py::test_mutate_and_upgrade -test_system_flush_logs/test.py::test_system_logs[system.text_log-0] test_user_defined_object_persistence/test.py::test_persistence test_settings_profile/test.py::test_show_profiles test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster test_select_access_rights/test_main.py::test_alias_columns test_select_access_rights/test_main.py::test_select_count test_select_access_rights/test_main.py::test_select_join -test_replicated_merge_tree_compatibility/test.py::test_replicated_merge_tree_defaults_compatibility test_postgresql_protocol/test.py::test_python_client -test_quota/test.py::test_add_remove_interval -test_quota/test.py::test_add_remove_quota -test_quota/test.py::test_consumption_of_show_clusters -test_quota/test.py::test_consumption_of_show_databases -test_quota/test.py::test_consumption_of_show_privileges -test_quota/test.py::test_consumption_of_show_processlist -test_quota/test.py::test_consumption_of_show_tables -test_quota/test.py::test_dcl_introspection -test_quota/test.py::test_dcl_management -test_quota/test.py::test_exceed_quota -test_quota/test.py::test_query_inserts -test_quota/test.py::test_quota_from_users_xml -test_quota/test.py::test_reload_users_xml_by_timer -test_quota/test.py::test_simpliest_quota -test_quota/test.py::test_tracking_quota -test_quota/test.py::test_users_xml_is_readonly test_replicating_constants/test.py::test_different_versions test_merge_tree_s3/test.py::test_heavy_insert_select_check_memory[node] +test_wrong_db_or_table_name/test.py::test_wrong_table_name test_drop_is_lock_free/test.py::test_query_is_lock_free[detach table] -test_backward_compatibility/test_data_skipping_indices.py::test_index -test_backward_compatibility/test_convert_ordinary.py::test_convert_ordinary_to_atomic -test_backward_compatibility/test_memory_bound_aggregation.py::test_backward_compatability test_odbc_interaction/test.py::test_postgres_insert +test_zookeeper_config/test.py::test_chroot_with_different_root +test_zookeeper_config/test.py::test_chroot_with_same_root +test_merge_tree_azure_blob_storage/test.py::test_table_manipulations +test_parallel_replicas_skip_shards/test.py::test_skip_unavailable_shards diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index b746d1610a4..4419190e12c 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -2,16 +2,13 @@ 00562_in_subquery_merge_tree 00593_union_all_assert_columns_removed 00673_subquery_prepared_set_performance -00700_decimal_compare 00717_merge_and_distributed 00725_memory_tracking 00754_distributed_optimize_skip_select_on_unused_shards 00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere -00838_unique_index 00927_asof_joins 00940_order_by_read_in_order_query_plan 00945_bloom_filter_index -00979_set_index_not 00981_in_subquery_with_tuple 01049_join_low_card_bug_long 01062_pm_all_join_with_block_continuation @@ -72,7 +69,6 @@ 01925_test_storage_merge_aliases 01930_optimize_skip_unused_shards_rewrite_in 01947_mv_subquery -01951_distributed_push_down_limit 01952_optimize_distributed_group_by_sharding_key 02000_join_on_const 02001_shard_num_shard_count @@ -82,7 +78,6 @@ 02242_join_rocksdb 02267_join_dup_columns_issue36199 02302_s3_file_pruning -02317_distinct_in_order_optimization_explain 02341_global_join_cte 02345_implicit_transaction 02352_grouby_shadows_arg @@ -92,9 +87,7 @@ 02382_join_and_filtering_set 02402_merge_engine_with_view 02404_memory_bound_merging -02421_decimal_in_precision_issue_41125 02426_orc_bug -02428_decimal_in_floating_point_literal 02428_parameterized_view 02458_use_structure_from_insertion_table 02479_race_condition_between_insert_and_droppin_mv @@ -130,6 +123,8 @@ 02581_share_big_sets_between_mutation_tasks_long 02581_share_big_sets_between_multiple_mutations_tasks_long 00992_system_parts_race_condition_zookeeper_long +02818_parameterized_view_with_cte_multiple_usage 02790_optimize_skip_unused_shards_join 01940_custom_tld_sharding_key 02815_range_dict_no_direct_join +02861_join_on_nullsafe_compare diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 514aaf7e2ac..97ce5dddd19 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -4,20 +4,24 @@ import logging import subprocess import os import sys +from pathlib import Path from github import Github from build_download_helper import get_build_name_for_check, read_build_urls -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from clickhouse_helper import ( + CiLogsCredentials, + ClickHouseHelper, + prepare_tests_results_for_clickhouse, +) from commit_status_helper import ( RerunHelper, format_description, get_commit, post_commit_status, ) -from docker_pull_helper import get_image_with_version +from docker_pull_helper import DockerImage, get_image_with_version from env_helper import ( - GITHUB_RUN_URL, REPORTS_PATH, TEMP_PATH, ) @@ -26,19 +30,36 @@ from pr_info import PRInfo from report import TestResult from s3_helper import S3Helper from stopwatch import Stopwatch +from tee_popen import TeePopen +from upload_result_helper import upload_results IMAGE_NAME = "clickhouse/fuzzer" -def get_run_command(pr_number, sha, download_url, workspace_path, image): +def get_run_command( + pr_info: PRInfo, + build_url: str, + workspace_path: str, + ci_logs_args: str, + image: DockerImage, +) -> str: + envs = [ + f"-e PR_TO_TEST={pr_info.number}", + f"-e SHA_TO_TEST={pr_info.sha}", + f"-e BINARY_URL_TO_DOWNLOAD='{build_url}'", + ] + + env_str = " ".join(envs) + return ( f"docker run " # For sysctl "--privileged " "--network=host " + f"{ci_logs_args}" f"--volume={workspace_path}:/workspace " + f"{env_str} " "--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE " - f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" ' f"{image}" ) @@ -79,31 +100,39 @@ def main(): build_url = url break else: - raise Exception("Cannot binary clickhouse among build results") + raise Exception("Cannot find the clickhouse binary among build results") logging.info("Got build url %s", build_url) workspace_path = os.path.join(temp_path, "workspace") if not os.path.exists(workspace_path): os.makedirs(workspace_path) + ci_logs_credentials = CiLogsCredentials(Path(temp_path) / "export-logs-config.sh") + ci_logs_args = ci_logs_credentials.get_docker_arguments( + pr_info, stopwatch.start_time_str, check_name + ) run_command = get_run_command( - pr_info.number, pr_info.sha, build_url, workspace_path, docker_image + pr_info, + build_url, + workspace_path, + ci_logs_args, + docker_image, ) logging.info("Going to run %s", run_command) run_log_path = os.path.join(temp_path, "run.log") - with open(run_log_path, "w", encoding="utf-8") as log: - with subprocess.Popen( - run_command, shell=True, stderr=log, stdout=log - ) as process: - retcode = process.wait() - if retcode == 0: - logging.info("Run successfully") - else: - logging.info("Run failed") + main_log_path = os.path.join(workspace_path, "main.log") + + with TeePopen(run_command, run_log_path) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + ci_logs_credentials.clean_ci_logs_from_credentials(Path(run_log_path)) check_name_lower = ( check_name.lower().replace("(", "").replace(")", "").replace(" ", "") @@ -111,26 +140,31 @@ def main(): s3_prefix = f"{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/" paths = { "run.log": run_log_path, - "main.log": os.path.join(workspace_path, "main.log"), - "server.log.zst": os.path.join(workspace_path, "server.log.zst"), + "main.log": main_log_path, "fuzzer.log": os.path.join(workspace_path, "fuzzer.log"), "report.html": os.path.join(workspace_path, "report.html"), "core.zst": os.path.join(workspace_path, "core.zst"), "dmesg.log": os.path.join(workspace_path, "dmesg.log"), } + compressed_server_log_path = os.path.join(workspace_path, "server.log.zst") + if os.path.exists(compressed_server_log_path): + paths["server.log.zst"] = compressed_server_log_path + + # The script can fail before the invocation of `zstd`, but we are still interested in its log: + + not_compressed_server_log_path = os.path.join(workspace_path, "server.log") + if os.path.exists(not_compressed_server_log_path): + paths["server.log"] = not_compressed_server_log_path + s3_helper = S3Helper() for f in paths: try: - paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + f) + paths[f] = s3_helper.upload_test_report_to_s3(Path(paths[f]), s3_prefix + f) except Exception as ex: logging.info("Exception uploading file %s text %s", f, ex) paths[f] = "" - report_url = GITHUB_RUN_URL - if paths["report.html"]: - report_url = paths["report.html"] - # Try to get status message saved by the fuzzer try: with open( @@ -152,6 +186,19 @@ def main(): if "fail" in status: test_result.status = "FAIL" + if paths["report.html"]: + report_url = paths["report.html"] + else: + report_url = upload_results( + s3_helper, + pr_info.number, + pr_info.sha, + [test_result], + [], + check_name, + [url for url in paths.values() if url], + ) + ch_helper = ClickHouseHelper() prepared_events = prepare_tests_results_for_clickhouse( diff --git a/tests/ci/attach_gdb.lib b/tests/ci/attach_gdb.lib index e937cf6dba7..8ca1e024b99 100644 --- a/tests/ci/attach_gdb.lib +++ b/tests/ci/attach_gdb.lib @@ -42,3 +42,5 @@ quit # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s) run_with_retry 60 clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" } + +# vi: ft=bash diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 35b98a7c3bb..498fd836436 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -1,20 +1,15 @@ #!/usr/bin/env python3 +from pathlib import Path +from typing import Tuple import subprocess import logging -import json import os import sys import time -from typing import List, Tuple from ci_config import CI_CONFIG, BuildConfig -from commit_status_helper import ( - NotSet, - get_commit_filtered_statuses, - get_commit, - post_commit_status, -) +from ccache_utils import CargoCache from docker_pull_helper import get_image_with_version from env_helper import ( GITHUB_JOB, @@ -24,28 +19,35 @@ from env_helper import ( S3_DOWNLOAD, TEMP_PATH, ) -from get_robot_token import get_best_robot_token -from github_helper import GitHub +from git_helper import Git, git_runner from pr_info import PRInfo +from report import BuildResult, FAILURE, StatusType, SUCCESS from s3_helper import S3Helper from tee_popen import TeePopen from version_helper import ( ClickHouseVersion, - Git, get_version_from_repo, update_version_local, ) +from clickhouse_helper import ( + ClickHouseHelper, + CiLogsCredentials, + prepare_tests_results_for_clickhouse, + get_instance_type, + get_instance_id, +) +from stopwatch import Stopwatch IMAGE_NAME = "clickhouse/binary-builder" BUILD_LOG_NAME = "build_log.log" def _can_export_binaries(build_config: BuildConfig) -> bool: - if build_config["package_type"] != "deb": + if build_config.package_type != "deb": return False - if build_config["sanitizer"] != "": + if build_config.sanitizer != "": return True - if build_config["build_type"] != "": + if build_config.debug_build: return True return False @@ -53,34 +55,37 @@ def _can_export_binaries(build_config: BuildConfig) -> bool: def get_packager_cmd( build_config: BuildConfig, packager_path: str, - output_path: str, + output_path: Path, + cargo_cache_dir: Path, build_version: str, image_version: str, official: bool, ) -> str: - package_type = build_config["package_type"] - comp = build_config["compiler"] + package_type = build_config.package_type + comp = build_config.compiler cmake_flags = "-DENABLE_CLICKHOUSE_SELF_EXTRACTING=1" cmd = ( - f"cd {packager_path} && CMAKE_FLAGS='{cmake_flags}' ./packager --output-dir={output_path} " - f"--package-type={package_type} --compiler={comp}" + f"cd {packager_path} && CMAKE_FLAGS='{cmake_flags}' ./packager " + f"--output-dir={output_path} --package-type={package_type} --compiler={comp}" ) - if build_config["build_type"]: - cmd += f" --build-type={build_config['build_type']}" - if build_config["sanitizer"]: - cmd += f" --sanitizer={build_config['sanitizer']}" - if build_config["tidy"] == "enable": + if build_config.debug_build: + cmd += " --debug-build" + if build_config.sanitizer: + cmd += f" --sanitizer={build_config.sanitizer}" + if build_config.tidy: cmd += " --clang-tidy" cmd += " --cache=sccache" cmd += " --s3-rw-access" cmd += f" --s3-bucket={S3_BUILDS_BUCKET}" + cmd += f" --cargo-cache-dir={cargo_cache_dir}" - if "additional_pkgs" in build_config and build_config["additional_pkgs"]: + if build_config.additional_pkgs: cmd += " --additional-pkgs" cmd += f" --docker-image-version={image_version}" + cmd += " --with-profiler" cmd += f" --version={build_version}" if _can_export_binaries(build_config): @@ -93,13 +98,13 @@ def get_packager_cmd( def build_clickhouse( - packager_cmd: str, logs_path: str, build_output_path: str -) -> Tuple[str, bool]: - build_log_path = os.path.join(logs_path, BUILD_LOG_NAME) + packager_cmd: str, logs_path: Path, build_output_path: Path +) -> Tuple[Path, StatusType]: + build_log_path = logs_path / BUILD_LOG_NAME success = False with TeePopen(packager_cmd, build_log_path) as process: retcode = process.wait() - if os.path.exists(build_output_path): + if build_output_path.exists(): build_results = os.listdir(build_output_path) else: build_results = [] @@ -114,15 +119,16 @@ def build_clickhouse( ) else: logging.info("Build failed") - return build_log_path, success + return build_log_path, SUCCESS if success else FAILURE def check_for_success_run( s3_helper: S3Helper, s3_prefix: str, build_name: str, - build_config: BuildConfig, + version: ClickHouseVersion, ) -> None: + # TODO: Remove after S3 artifacts # the final empty argument is necessary for distinguish build and build_suffix logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix, "") logging.info("Checking for artifacts in %s", logged_prefix) @@ -151,15 +157,16 @@ def check_for_success_run( return success = len(build_urls) > 0 - create_json_artifact( - TEMP_PATH, + build_result = BuildResult( build_name, log_url, build_urls, - build_config, + version.describe, + SUCCESS if success else FAILURE, 0, - success, + GITHUB_JOB, ) + build_result.write_json(Path(TEMP_PATH)) # Fail build job if not successeded if not success: sys.exit(1) @@ -167,36 +174,6 @@ def check_for_success_run( sys.exit(0) -def create_json_artifact( - temp_path: str, - build_name: str, - log_url: str, - build_urls: List[str], - build_config: BuildConfig, - elapsed: int, - success: bool, -) -> None: - subprocess.check_call( - f"echo 'BUILD_URLS=build_urls_{build_name}' >> $GITHUB_ENV", shell=True - ) - - result = { - "log_url": log_url, - "build_urls": build_urls, - "build_config": build_config, - "elapsed_seconds": elapsed, - "status": success, - "job_name": GITHUB_JOB, - } - - json_name = "build_urls_" + build_name + ".json" - - print(f"Dump json report {result} to {json_name} with env build_urls_{build_name}") - - with open(os.path.join(temp_path, json_name), "w", encoding="utf-8") as build_links: - json.dump(result, build_links) - - def get_release_or_pr(pr_info: PRInfo, version: ClickHouseVersion) -> Tuple[str, str]: "Return prefixes for S3 artifacts paths" # FIXME performance @@ -220,10 +197,10 @@ def upload_master_static_binaries( pr_info: PRInfo, build_config: BuildConfig, s3_helper: S3Helper, - build_output_path: str, + build_output_path: Path, ) -> None: """Upload binary artifacts to a static S3 links""" - static_binary_name = build_config.get("static_binary_name", False) + static_binary_name = build_config.static_binary_name if pr_info.number != 0: return elif not static_binary_name: @@ -232,48 +209,21 @@ def upload_master_static_binaries( return s3_path = "/".join((pr_info.base_ref, static_binary_name, "clickhouse")) - binary = os.path.join(build_output_path, "clickhouse") + binary = build_output_path / "clickhouse" url = s3_helper.upload_build_file_to_s3(binary, s3_path) print(f"::notice ::Binary static URL: {url}") -def mark_failed_reports_pending(build_name: str, pr_info: PRInfo) -> None: - try: - gh = GitHub(get_best_robot_token()) - commit = get_commit(gh, pr_info.sha) - statuses = get_commit_filtered_statuses(commit) - report_status = [ - name - for name, builds in CI_CONFIG["builds_report_config"].items() - if build_name in builds - ][0] - for status in statuses: - if status.context == report_status and status.state in ["failure", "error"]: - logging.info( - "Commit already have failed status for '%s', setting it to 'pending'", - report_status, - ) - post_commit_status( - commit, - "pending", - status.target_url or NotSet, - "Set to pending on rerun", - report_status, - pr_info, - ) - except: # we do not care about any exception here - logging.info("Failed to get or mark the reports status as pending, continue") - - def main(): logging.basicConfig(level=logging.INFO) + stopwatch = Stopwatch() build_name = sys.argv[1] - build_config = CI_CONFIG["build_config"][build_name] + build_config = CI_CONFIG.build_config[build_name] - if not os.path.exists(TEMP_PATH): - os.makedirs(TEMP_PATH) + temp_path = Path(TEMP_PATH) + os.makedirs(temp_path, exist_ok=True) pr_info = PRInfo() @@ -292,10 +242,7 @@ def main(): # If this is rerun, then we try to find already created artifacts and just # put them as github actions artifact (result) - check_for_success_run(s3_helper, s3_path_prefix, build_name, build_config) - - # If it's a latter running, we need to mark possible failed status - mark_failed_reports_pending(build_name, pr_info) + check_for_success_run(s3_helper, s3_path_prefix, build_name, version) docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME) image_version = docker_image.version @@ -303,8 +250,6 @@ def main(): logging.info("Got version from repo %s", version.string) official_flag = pr_info.number == 0 - if "official" in build_config: - official_flag = build_config["official"] version_type = "testing" if "release" in pr_info.labels or "release-lts" in pr_info.labels: @@ -317,14 +262,18 @@ def main(): logging.info("Build short name %s", build_name) - build_output_path = os.path.join(TEMP_PATH, build_name) - if not os.path.exists(build_output_path): - os.makedirs(build_output_path) + build_output_path = temp_path / build_name + os.makedirs(build_output_path, exist_ok=True) + cargo_cache = CargoCache( + temp_path / "cargo_cache" / "registry", temp_path, s3_helper + ) + cargo_cache.download() packager_cmd = get_packager_cmd( build_config, os.path.join(REPO_COPY, "docker/packager"), build_output_path, + cargo_cache.directory, version.string, image_version, official_flag, @@ -332,18 +281,21 @@ def main(): logging.info("Going to run packager with %s", packager_cmd) - logs_path = os.path.join(TEMP_PATH, "build_log") - if not os.path.exists(logs_path): - os.makedirs(logs_path) + logs_path = temp_path / "build_log" + os.makedirs(logs_path, exist_ok=True) start = time.time() - log_path, success = build_clickhouse(packager_cmd, logs_path, build_output_path) + log_path, build_status = build_clickhouse( + packager_cmd, logs_path, build_output_path + ) elapsed = int(time.time() - start) subprocess.check_call( f"sudo chown -R ubuntu:ubuntu {build_output_path}", shell=True ) - logging.info("Build finished with %s, log path %s", success, log_path) - if not success: + logging.info("Build finished as %s, log path %s", build_status, log_path) + if build_status == SUCCESS: + cargo_cache.upload() + else: # We check if docker works, because if it's down, it's infrastructure try: subprocess.check_call("docker info", shell=True) @@ -355,8 +307,8 @@ def main(): # FIXME performance performance_urls = [] - performance_path = os.path.join(build_output_path, "performance.tar.zst") - if os.path.exists(performance_path): + performance_path = build_output_path / "performance.tar.zst" + if performance_path.exists(): performance_urls.append( s3_helper.upload_build_file_to_s3(performance_path, s3_performance_path) ) @@ -367,7 +319,7 @@ def main(): os.remove(performance_path) build_urls = ( - s3_helper.upload_build_folder_to_s3( + s3_helper.upload_build_directory_to_s3( build_output_path, s3_path_prefix, keep_dirs_in_s3_path=False, @@ -379,9 +331,9 @@ def main(): print("::notice ::Build URLs: {}".format("\n".join(build_urls))) - if os.path.exists(log_path): + if log_path.exists(): log_url = s3_helper.upload_build_file_to_s3( - log_path, s3_path_prefix + "/" + os.path.basename(log_path) + log_path, s3_path_prefix + "/" + log_path.name ) logging.info("Log url %s", log_url) else: @@ -389,13 +341,112 @@ def main(): print(f"::notice ::Log URL: {log_url}") - create_json_artifact( - TEMP_PATH, build_name, log_url, build_urls, build_config, elapsed, success + build_result = BuildResult( + build_name, + log_url, + build_urls, + version.describe, + build_status, + elapsed, + GITHUB_JOB, + ) + result_json_path = build_result.write_json(temp_path) + logging.info( + "Build result file %s is written, content:\n %s", + result_json_path, + result_json_path.read_text(encoding="utf-8"), ) upload_master_static_binaries(pr_info, build_config, s3_helper, build_output_path) - # Fail build job if not successeded - if not success: + + # Upload profile data + ch_helper = ClickHouseHelper() + + ci_logs_credentials = CiLogsCredentials(Path("/dev/null")) + if ci_logs_credentials.host: + instance_type = get_instance_type() + instance_id = get_instance_id() + query = f"""INSERT INTO build_time_trace +( + pull_request_number, + commit_sha, + check_start_time, + check_name, + instance_type, + instance_id, + file, + library, + time, + pid, + tid, + ph, + ts, + dur, + cat, + name, + detail, + count, + avgMs, + args_name +) +SELECT {pr_info.number}, '{pr_info.sha}', '{stopwatch.start_time_str}', '{build_name}', '{instance_type}', '{instance_id}', * +FROM input(' + file String, + library String, + time DateTime64(6), + pid UInt32, + tid UInt32, + ph String, + ts UInt64, + dur UInt64, + cat String, + name String, + detail String, + count UInt64, + avgMs UInt64, + args_name String') +FORMAT JSONCompactEachRow""" + + auth = { + "X-ClickHouse-User": "ci", + "X-ClickHouse-Key": ci_logs_credentials.password, + } + url = f"https://{ci_logs_credentials.host}/" + profiles_dir = temp_path / "profiles_source" + os.makedirs(profiles_dir, exist_ok=True) + logging.info("Processing profile JSON files from {GIT_REPO_ROOT}/build_docker") + git_runner( + "./utils/prepare-time-trace/prepare-time-trace.sh " + f"build_docker {profiles_dir.absolute()}" + ) + profile_data_file = temp_path / "profile.json" + with open(profile_data_file, "wb") as profile_fd: + for profile_sourse in os.listdir(profiles_dir): + with open(profiles_dir / profile_sourse, "rb") as ps_fd: + profile_fd.write(ps_fd.read()) + + logging.info( + "::notice ::Log Uploading profile data, path: %s, size: %s, query: %s", + profile_data_file, + profile_data_file.stat().st_size, + query, + ) + ch_helper.insert_file(url, auth, query, profile_data_file) + + # Upload statistics to CI database + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + [], + build_status, + stopwatch.duration_seconds, + stopwatch.start_time_str, + log_url, + f"Build ({build_name})", + ) + ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + + # Fail the build job if it didn't succeed + if build_status != SUCCESS: sys.exit(1) diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 47c11ee0911..f8a7afcd6bf 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -6,7 +6,7 @@ import os import sys import time from pathlib import Path -from typing import Any, Callable, List +from typing import Any, Callable, List, Union import requests # type: ignore @@ -16,6 +16,10 @@ from ci_config import CI_CONFIG DOWNLOAD_RETRIES_COUNT = 5 +class DownloadException(Exception): + pass + + def get_with_retries( url: str, retries: int = DOWNLOAD_RETRIES_COUNT, @@ -91,10 +95,10 @@ def get_gh_api( def get_build_name_for_check(check_name: str) -> str: - return CI_CONFIG["tests_config"][check_name]["required_build"] # type: ignore + return CI_CONFIG.test_configs[check_name].required_build -def read_build_urls(build_name: str, reports_path: str) -> List[str]: +def read_build_urls(build_name: str, reports_path: Union[Path, str]) -> List[str]: for root, _, files in os.walk(reports_path): for f in files: if build_name in f: @@ -149,7 +153,9 @@ def download_build_with_progress(url: str, path: Path) -> None: if os.path.exists(path): os.remove(path) else: - raise Exception(f"Cannot download dataset from {url}, all retries exceeded") + raise DownloadException( + f"Cannot download dataset from {url}, all retries exceeded" + ) if sys.stdout.isatty(): sys.stdout.write("\n") @@ -174,7 +180,7 @@ def download_builds_filter( print(urls) if not urls: - raise Exception("No build URLs found") + raise DownloadException("No build URLs found") download_builds(result_path, urls, filter_fn) diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 1362f3c8934..ba4d8411193 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -5,19 +5,25 @@ import logging import os import sys import atexit -from typing import Dict, List, Tuple +from pathlib import Path from github import Github from env_helper import ( GITHUB_JOB_URL, GITHUB_REPOSITORY, - GITHUB_RUN_URL, GITHUB_SERVER_URL, REPORTS_PATH, TEMP_PATH, ) -from report import create_build_html_report, BuildResult, BuildResults +from report import ( + BuildResult, + ERROR, + PENDING, + SUCCESS, + create_build_html_report, + get_worst_status, +) from s3_helper import S3Helper from get_robot_token import get_best_robot_token from pr_info import NeedsDataType, PRInfo @@ -34,95 +40,17 @@ from ci_config import CI_CONFIG NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH", "") -def group_by_artifacts(build_urls: List[str]) -> Dict[str, List[str]]: - groups = { - "apk": [], - "deb": [], - "binary": [], - "tgz": [], - "rpm": [], - "performance": [], - } # type: Dict[str, List[str]] - for url in build_urls: - if url.endswith("performance.tar.zst"): - groups["performance"].append(url) - elif ( - url.endswith(".deb") - or url.endswith(".buildinfo") - or url.endswith(".changes") - or url.endswith(".tar.gz") - ): - groups["deb"].append(url) - elif url.endswith(".apk"): - groups["apk"].append(url) - elif url.endswith(".rpm"): - groups["rpm"].append(url) - elif url.endswith(".tgz") or url.endswith(".tgz.sha512"): - groups["tgz"].append(url) - else: - groups["binary"].append(url) - return groups - - -def get_failed_report( - job_name: str, -) -> Tuple[BuildResults, List[List[str]], List[str]]: - message = f"{job_name} failed" - build_result = BuildResult( - compiler="unknown", - build_type="unknown", - sanitizer="unknown", - status=message, - elapsed_seconds=0, - comment="", - ) - return [build_result], [[""]], [GITHUB_RUN_URL] - - -def process_report( - build_report: dict, -) -> Tuple[BuildResults, List[List[str]], List[str]]: - build_config = build_report["build_config"] - build_result = BuildResult( - compiler=build_config["compiler"], - build_type=build_config["build_type"], - sanitizer=build_config["sanitizer"], - status="success" if build_report["status"] else "failure", - elapsed_seconds=build_report["elapsed_seconds"], - comment=build_config["comment"], - ) - build_results = [] - build_urls = [] - build_logs_urls = [] - urls_groups = group_by_artifacts(build_report["build_urls"]) - found_group = False - for _, group_urls in urls_groups.items(): - if group_urls: - build_results.append(build_result) - build_urls.append(group_urls) - build_logs_urls.append(build_report["log_url"]) - found_group = True - - # No one group of urls is found, a failed report - if not found_group: - build_results.append(build_result) - build_urls.append([""]) - build_logs_urls.append(build_report["log_url"]) - - return build_results, build_urls, build_logs_urls - - -def get_build_name_from_file_name(file_name): - return file_name.replace("build_urls_", "").replace(".json", "") - - def main(): logging.basicConfig(level=logging.INFO) - temp_path = TEMP_PATH - logging.info("Reports path %s", REPORTS_PATH) + temp_path = Path(TEMP_PATH) + temp_path.mkdir(parents=True, exist_ok=True) - if not os.path.exists(temp_path): - os.makedirs(temp_path) + logging.info("Reports path %s", REPORTS_PATH) + reports_path = Path(REPORTS_PATH) + logging.info( + "Reports found:\n %s", + "\n ".join(p.as_posix() for p in reports_path.rglob("*.json")), + ) build_check_name = sys.argv[1] needs_data = {} # type: NeedsDataType @@ -132,11 +60,11 @@ def main(): needs_data = json.load(file_handler) required_builds = len(needs_data) - if needs_data and all(i["result"] == "skipped" for i in needs_data.values()): - logging.info("All builds are skipped, exiting") - sys.exit(0) - - logging.info("The next builds are required: %s", ", ".join(needs_data)) + if needs_data: + logging.info("The next builds are required: %s", ", ".join(needs_data)) + if all(i["result"] == "skipped" for i in needs_data.values()): + logging.info("All builds are skipped, exiting") + sys.exit(0) gh = Github(get_best_robot_token(), per_page=100) pr_info = PRInfo() @@ -149,77 +77,45 @@ def main(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) - builds_for_check = CI_CONFIG["builds_report_config"][build_check_name] + builds_for_check = CI_CONFIG.builds_report_config[build_check_name] required_builds = required_builds or len(builds_for_check) # Collect reports from json artifacts - builds_report_map = {} - for root, _, files in os.walk(REPORTS_PATH): - for f in files: - if f.startswith("build_urls_") and f.endswith(".json"): - logging.info("Found build report json %s", f) - build_name = get_build_name_from_file_name(f) - if build_name in builds_for_check: - with open(os.path.join(root, f), "rb") as file_handler: - builds_report_map[build_name] = json.load(file_handler) - else: - logging.info( - "Skipping report %s for build %s, it's not in our reports list", - f, - build_name, - ) + build_results = [] + for build_name in builds_for_check: + report_name = BuildResult.get_report_name(build_name).stem + build_result = BuildResult.read_json(reports_path / report_name, build_name) + if build_result.is_missing: + logging.warning("Build results for %s are missing", build_name) + continue + build_results.append(build_result) - # Sort reports by config order - build_reports = [ - builds_report_map[build_name] - for build_name in builds_for_check - if build_name in builds_report_map + # The code to collect missing reports for failed jobs + missing_job_names = [ + name + for name in needs_data + if not any(1 for build_result in build_results if build_result.job_name == name) ] - - some_builds_are_missing = len(build_reports) < required_builds - missing_build_names = [] - if some_builds_are_missing: - logging.warning( - "Expected to get %s build results, got only %s", - required_builds, - len(build_reports), - ) - missing_build_names = [ - name - for name in needs_data - if not any(rep for rep in build_reports if rep["job_name"] == name) - ] - else: - logging.info("Got exactly %s builds", len(builds_report_map)) - - # Group build artifacts by groups - build_results = [] # type: BuildResults - build_artifacts = [] # type: List[List[str]] - build_logs = [] # type: List[str] - - for build_report in build_reports: - _build_results, build_artifacts_url, build_logs_url = process_report( - build_report - ) + missing_builds = len(missing_job_names) + for job_name in reversed(missing_job_names): + build_result = BuildResult.missing_result("missing") + build_result.job_name = job_name + build_result.status = PENDING logging.info( - "Got %s artifact groups for build report report", len(_build_results) + "There is missing report for %s, created a dummy result %s", + job_name, + build_result, ) - build_results.extend(_build_results) - build_artifacts.extend(build_artifacts_url) - build_logs.extend(build_logs_url) + build_results.insert(0, build_result) - for failed_job in missing_build_names: - _build_results, build_artifacts_url, build_logs_url = get_failed_report( - failed_job - ) - build_results.extend(_build_results) - build_artifacts.extend(build_artifacts_url) - build_logs.extend(build_logs_url) - - total_groups = len(build_results) + # Calculate artifact groups like packages and binaries + total_groups = sum(len(br.grouped_urls) for br in build_results) + ok_groups = sum( + len(br.grouped_urls) for br in build_results if br.status == SUCCESS + ) logging.info("Totally got %s artifact groups", total_groups) if total_groups == 0: - logging.error("No success builds, failing check") + logging.error("No success builds, failing check without creating a status") sys.exit(1) s3_helper = S3Helper() @@ -234,17 +130,14 @@ def main(): report = create_build_html_report( build_check_name, build_results, - build_logs, - build_artifacts, task_url, branch_url, branch_name, commit_url, ) - report_path = os.path.join(temp_path, "report.html") - with open(report_path, "w", encoding="utf-8") as fd: - fd.write(report) + report_path = temp_path / "report.html" + report_path.write_text(report, encoding="utf-8") logging.info("Going to upload prepared report") context_name_for_path = build_check_name.lower().replace(" ", "_") @@ -259,27 +152,20 @@ def main(): print(f"::notice ::Report url: {url}") # Prepare a commit status - ok_groups = 0 - summary_status = "success" - for build_result in build_results: - if build_result.status == "failure" and summary_status != "error": - summary_status = "failure" - if build_result.status == "error" or not build_result.status: - summary_status = "error" - - if build_result.status == "success": - ok_groups += 1 + summary_status = get_worst_status(br.status for br in build_results) # Check if there are no builds at all, do not override bad status - if summary_status == "success": - if some_builds_are_missing: - summary_status = "pending" + if summary_status == SUCCESS: + if missing_builds: + summary_status = PENDING elif ok_groups == 0: - summary_status = "error" + summary_status = ERROR addition = "" - if some_builds_are_missing: - addition = f" ({len(build_reports)} of {required_builds} builds are OK)" + if missing_builds: + addition = ( + f" ({required_builds - missing_builds} of {required_builds} builds are OK)" + ) description = format_description( f"{ok_groups}/{total_groups} artifact groups are OK{addition}" @@ -289,7 +175,7 @@ def main(): commit, summary_status, url, description, build_check_name, pr_info ) - if summary_status == "error": + if summary_status == ERROR: sys.exit(1) diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py index 250655ddeb2..8282fb7768a 100644 --- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py +++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py @@ -323,7 +323,9 @@ def main(event): if action == "edited": print("PR is edited, check if the body is correct") - error, category = check_pr_description(pull_request["body"]) + error, _ = check_pr_description( + pull_request["body"], pull_request["base"]["repo"]["full_name"] + ) if error: print( f"The PR's body is wrong, is going to comment it. The error is: {error}" diff --git a/tests/ci/ccache_utils.py b/tests/ci/ccache_utils.py index e8522127132..e726bf80b92 100644 --- a/tests/ci/ccache_utils.py +++ b/tests/ci/ccache_utils.py @@ -1,71 +1,31 @@ #!/usr/bin/env python3 import logging -import time -import sys import os import shutil +from hashlib import md5 from pathlib import Path import requests # type: ignore +from build_download_helper import download_build_with_progress, DownloadException from compress_files import decompress_fast, compress_fast from env_helper import S3_DOWNLOAD, S3_BUILDS_BUCKET +from git_helper import git_runner from s3_helper import S3Helper DOWNLOAD_RETRIES_COUNT = 5 -def dowload_file_with_progress(url, path): - logging.info("Downloading from %s to temp path %s", url, path) - for i in range(DOWNLOAD_RETRIES_COUNT): - try: - with open(path, "wb") as f: - response = requests.get(url, stream=True) - response.raise_for_status() - total_length = response.headers.get("content-length") - if total_length is None or int(total_length) == 0: - logging.info( - "No content-length, will download file without progress" - ) - f.write(response.content) - else: - dl = 0 - total_length = int(total_length) - logging.info("Content length is %ld bytes", total_length) - for data in response.iter_content(chunk_size=4096): - dl += len(data) - f.write(data) - if sys.stdout.isatty(): - done = int(50 * dl / total_length) - percent = int(100 * float(dl) / total_length) - eq_str = "=" * done - space_str = " " * (50 - done) - sys.stdout.write(f"\r[{eq_str}{space_str}] {percent}%") - sys.stdout.flush() - break - except Exception as ex: - sys.stdout.write("\n") - time.sleep(3) - logging.info("Exception while downloading %s, retry %s", ex, i + 1) - if os.path.exists(path): - os.remove(path) - else: - raise Exception(f"Cannot download dataset from {url}, all retries exceeded") - - sys.stdout.write("\n") - logging.info("Downloading finished") - - def get_ccache_if_not_exists( - path_to_ccache_dir: str, + path_to_ccache_dir: Path, s3_helper: S3Helper, current_pr_number: int, - temp_path: str, + temp_path: Path, release_pr: int, ) -> int: """returns: number of PR for downloaded PR. -1 if ccache not found""" - ccache_name = os.path.basename(path_to_ccache_dir) + ccache_name = path_to_ccache_dir.name cache_found = False prs_to_check = [current_pr_number] # Release PR is either 0 or defined @@ -94,11 +54,11 @@ def get_ccache_if_not_exists( logging.info("Found ccache on path %s", obj) url = f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{obj}" - compressed_cache = os.path.join(temp_path, os.path.basename(obj)) - dowload_file_with_progress(url, compressed_cache) + compressed_cache = temp_path / os.path.basename(obj) + download_build_with_progress(url, compressed_cache) - path_to_decompress = str(Path(path_to_ccache_dir).parent) - if not os.path.exists(path_to_decompress): + path_to_decompress = path_to_ccache_dir.parent + if not path_to_decompress.exists(): os.makedirs(path_to_decompress) if os.path.exists(path_to_ccache_dir): @@ -122,15 +82,77 @@ def get_ccache_if_not_exists( return ccache_pr -def upload_ccache(path_to_ccache_dir, s3_helper, current_pr_number, temp_path): +def upload_ccache( + path_to_ccache_dir: Path, + s3_helper: S3Helper, + current_pr_number: int, + temp_path: Path, +) -> None: logging.info("Uploading cache %s for pr %s", path_to_ccache_dir, current_pr_number) - ccache_name = os.path.basename(path_to_ccache_dir) - compressed_cache_path = os.path.join(temp_path, ccache_name + ".tar.zst") + ccache_name = path_to_ccache_dir.name + compressed_cache_path = temp_path / f"{ccache_name}.tar.zst" compress_fast(path_to_ccache_dir, compressed_cache_path) - s3_path = ( - str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path) - ) + s3_path = f"{current_pr_number}/ccaches/{compressed_cache_path.name}" logging.info("Will upload %s to path %s", compressed_cache_path, s3_path) s3_helper.upload_build_file_to_s3(compressed_cache_path, s3_path) logging.info("Upload finished") + + +class CargoCache: + PREFIX = "ccache/cargo_cache" + + def __init__( + self, + directory: Path, + temp_path: Path, + s3_helper: S3Helper, + ): + self._cargo_lock_file = Path(git_runner.cwd) / "rust" / "Cargo.lock" + self.lock_hash = md5(self._cargo_lock_file.read_bytes()).hexdigest() + self.directory = directory + self.archive_name = f"Cargo_cache_{self.lock_hash}.tar.zst" + self.temp_path = temp_path + self.s3_helper = s3_helper + self._url = ( + f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{self.PREFIX}/{self.archive_name}" + ) + self._force_upload_cache = False + + def download(self): + logging.info("Searching rust cache for Cargo.lock md5 %s", self.lock_hash) + compressed_cache = self.temp_path / self.archive_name + try: + download_build_with_progress(self._url, compressed_cache) + except DownloadException: + logging.warning("Unable downloading cargo cache, creating empty directory") + self.directory.mkdir(parents=True, exist_ok=True) + return + + # decompress the cache and check if the necessary directory is there + self.directory.parent.mkdir(parents=True, exist_ok=True) + decompress_fast(compressed_cache, self.directory.parent) + if not self.directory.exists(): + logging.warning( + "The cargo cache archive was successfully downloaded and " + "decompressed, but %s does not exitst. Creating empty one", + self.directory, + ) + logging.info("Cache for Cargo.lock md5 %s will be uploaded", self.lock_hash) + self.directory.mkdir(parents=True, exist_ok=True) + + def upload(self): + if not self._force_upload_cache: + cache_response = requests.head(self._url) + if cache_response.status_code == 200: + logging.info( + "Remote cargo cache %s already exist, won't reupload", self._url + ) + return + + logging.info("Compressing cargo cache") + archive_path = self.directory.parent / self.archive_name + compress_fast(self.directory, archive_path) + s3_path = f"{self.PREFIX}/{self.archive_name}" + logging.info("Uploading %s to S3 path %s", archive_path, s3_path) + self.s3_helper.upload_build_file_to_s3(archive_path, s3_path) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index db9a7f926be..b9ccc23cb2e 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1,193 +1,195 @@ #!/usr/bin/env python3 +import logging + from dataclasses import dataclass -from typing import Callable, Dict, TypeVar +from typing import Callable, Dict, List, Literal -ConfValue = TypeVar("ConfValue", str, bool) -BuildConfig = Dict[str, ConfValue] -CI_CONFIG = { - "build_config": { - "package_release": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "", - "package_type": "deb", - "static_binary_name": "amd64", - "additional_pkgs": True, - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "coverity": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "", - "package_type": "coverity", - "tidy": "disable", - "with_coverage": False, - "official": False, - "comment": "A special build for coverity", - }, - "package_aarch64": { - "compiler": "clang-16-aarch64", - "build_type": "", - "sanitizer": "", - "package_type": "deb", - "static_binary_name": "aarch64", - "additional_pkgs": True, - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_asan": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "address", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_ubsan": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "undefined", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_tsan": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "thread", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_msan": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "memory", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_debug": { - "compiler": "clang-16", - "build_type": "debug", - "sanitizer": "", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "Note: sparse checkout was used", - }, - "binary_release": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_tidy": { - "compiler": "clang-16", - "build_type": "debug", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "debug-amd64", - "tidy": "enable", - "with_coverage": False, - "comment": "clang-tidy is used for static analysis", - }, - "binary_darwin": { - "compiler": "clang-16-darwin", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "macos", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_aarch64": { - "compiler": "clang-16-aarch64", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_aarch64_v80compat": { - "compiler": "clang-16-aarch64-v80compat", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "aarch64v80compat", - "tidy": "disable", - "with_coverage": False, - "comment": "For ARMv8.1 and older", - }, - "binary_freebsd": { - "compiler": "clang-16-freebsd", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "freebsd", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_darwin_aarch64": { - "compiler": "clang-16-darwin-aarch64", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "macos-aarch64", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_ppc64le": { - "compiler": "clang-16-ppc64le", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "powerpc64le", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_amd64_compat": { - "compiler": "clang-16-amd64-compat", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "amd64compat", - "tidy": "disable", - "with_coverage": False, - "comment": "SSE2-only build", - }, - "binary_riscv64": { - "compiler": "clang-16-riscv64", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "riscv64", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, +@dataclass +class BuildConfig: + compiler: str + package_type: Literal["deb", "binary", "fuzzers"] + additional_pkgs: bool = False + debug_build: bool = False + sanitizer: str = "" + tidy: bool = False + comment: str = "" + static_binary_name: str = "" + + +@dataclass +class TestConfig: + required_build: str + force_tests: bool = False + + +BuildConfigs = Dict[str, BuildConfig] +BuildsReportConfig = Dict[str, List[str]] +TestConfigs = Dict[str, TestConfig] + + +@dataclass +class CiConfig: + build_config: BuildConfigs + builds_report_config: BuildsReportConfig + test_configs: TestConfigs + + def validate(self) -> None: + errors = [] + # All build configs must belong to build_report_config + for build_name in self.build_config.keys(): + build_in_reports = False + for report_config in self.builds_report_config.values(): + if build_name in report_config: + build_in_reports = True + break + if not build_in_reports: + logging.error( + "Build name %s does not belong to build reports", build_name + ) + errors.append( + f"Build name {build_name} does not belong to build reports" + ) + # And otherwise + for build_report_name, build_names in self.builds_report_config.items(): + missed_names = [ + name for name in build_names if name not in self.build_config.keys() + ] + if missed_names: + logging.error( + "The following names of the build report '%s' " + "are missed in build_config: %s", + build_report_name, + missed_names, + ) + errors.append( + f"The following names of the build report '{build_report_name}' " + f"are missed in build_config: {missed_names}", + ) + # And finally, all of tests' requirements must be in the builds + for test_name, test_config in self.test_configs.items(): + if test_config.required_build not in self.build_config.keys(): + logging.error( + "The requierment '%s' for '%s' is not found in builds", + test_config, + test_name, + ) + errors.append( + f"The requierment '{test_config}' for " + f"'{test_name}' is not found in builds" + ) + + if errors: + raise KeyError("config contains errors", errors) + + +CI_CONFIG = CiConfig( + build_config={ + "package_release": BuildConfig( + compiler="clang-16", + package_type="deb", + static_binary_name="amd64", + additional_pkgs=True, + ), + "package_aarch64": BuildConfig( + compiler="clang-16-aarch64", + package_type="deb", + static_binary_name="aarch64", + additional_pkgs=True, + ), + "package_asan": BuildConfig( + compiler="clang-16", + sanitizer="address", + package_type="deb", + ), + "package_ubsan": BuildConfig( + compiler="clang-16", + sanitizer="undefined", + package_type="deb", + ), + "package_tsan": BuildConfig( + compiler="clang-16", + sanitizer="thread", + package_type="deb", + ), + "package_msan": BuildConfig( + compiler="clang-16", + sanitizer="memory", + package_type="deb", + ), + "package_debug": BuildConfig( + compiler="clang-16", + debug_build=True, + package_type="deb", + comment="Note: sparse checkout was used", + ), + "binary_release": BuildConfig( + compiler="clang-16", + package_type="binary", + ), + "binary_tidy": BuildConfig( + compiler="clang-16", + debug_build=True, + package_type="binary", + static_binary_name="debug-amd64", + tidy=True, + comment="clang-tidy is used for static analysis", + ), + "binary_darwin": BuildConfig( + compiler="clang-16-darwin", + package_type="binary", + static_binary_name="macos", + ), + "binary_aarch64": BuildConfig( + compiler="clang-16-aarch64", + package_type="binary", + ), + "binary_aarch64_v80compat": BuildConfig( + compiler="clang-16-aarch64-v80compat", + package_type="binary", + static_binary_name="aarch64v80compat", + comment="For ARMv8.1 and older", + ), + "binary_freebsd": BuildConfig( + compiler="clang-16-freebsd", + package_type="binary", + static_binary_name="freebsd", + ), + "binary_darwin_aarch64": BuildConfig( + compiler="clang-16-darwin-aarch64", + package_type="binary", + static_binary_name="macos-aarch64", + ), + "binary_ppc64le": BuildConfig( + compiler="clang-16-ppc64le", + package_type="binary", + static_binary_name="powerpc64le", + ), + "binary_amd64_compat": BuildConfig( + compiler="clang-16-amd64-compat", + package_type="binary", + static_binary_name="amd64compat", + comment="SSE2-only build", + ), + "binary_riscv64": BuildConfig( + compiler="clang-16-riscv64", + package_type="binary", + static_binary_name="riscv64", + ), + "binary_s390x": BuildConfig( + compiler="clang-16-s390x", + package_type="binary", + static_binary_name="s390x", + ), + "fuzzers": BuildConfig( + compiler="clang-16", + package_type="fuzzers", + ), }, - "builds_report_config": { + builds_report_config={ "ClickHouse build check": [ "package_release", - "coverity", "package_aarch64", "package_asan", "package_ubsan", @@ -195,6 +197,7 @@ CI_CONFIG = { "package_msan", "package_debug", "binary_release", + "fuzzers", ], "ClickHouse special build check": [ "binary_tidy", @@ -205,216 +208,84 @@ CI_CONFIG = { "binary_darwin_aarch64", "binary_ppc64le", "binary_riscv64", + "binary_s390x", "binary_amd64_compat", ], }, - "tests_config": { - # required_build - build name for artifacts - # force_tests - force success status for tests - "Install packages (amd64)": { - "required_build": "package_release", - }, - "Install packages (arm64)": { - "required_build": "package_aarch64", - }, - "Stateful tests (asan)": { - "required_build": "package_asan", - }, - "Stateful tests (tsan)": { - "required_build": "package_tsan", - }, - "Stateful tests (msan)": { - "required_build": "package_msan", - }, - "Stateful tests (ubsan)": { - "required_build": "package_ubsan", - }, - "Stateful tests (debug)": { - "required_build": "package_debug", - }, - "Stateful tests (release)": { - "required_build": "package_release", - }, - "Stateful tests (aarch64)": { - "required_build": "package_aarch64", - }, - "Stateful tests (release, DatabaseOrdinary)": { - "required_build": "package_release", - }, - "Stateful tests (release, DatabaseReplicated)": { - "required_build": "package_release", - }, + test_configs={ + "Install packages (amd64)": TestConfig("package_release"), + "Install packages (arm64)": TestConfig("package_aarch64"), + "Stateful tests (asan)": TestConfig("package_asan"), + "Stateful tests (tsan)": TestConfig("package_tsan"), + "Stateful tests (msan)": TestConfig("package_msan"), + "Stateful tests (ubsan)": TestConfig("package_ubsan"), + "Stateful tests (debug)": TestConfig("package_debug"), + "Stateful tests (release)": TestConfig("package_release"), + "Stateful tests (aarch64)": TestConfig("package_aarch64"), + "Stateful tests (release, DatabaseOrdinary)": TestConfig("package_release"), + "Stateful tests (release, DatabaseReplicated)": TestConfig("package_release"), # Stateful tests for parallel replicas - "Stateful tests (release, ParallelReplicas)": { - "required_build": "package_release", - }, - "Stateful tests (debug, ParallelReplicas)": { - "required_build": "package_debug", - }, - "Stateful tests (asan, ParallelReplicas)": { - "required_build": "package_asan", - }, - "Stateful tests (msan, ParallelReplicas)": { - "required_build": "package_msan", - }, - "Stateful tests (ubsan, ParallelReplicas)": { - "required_build": "package_ubsan", - }, - "Stateful tests (tsan, ParallelReplicas)": { - "required_build": "package_tsan", - }, + "Stateful tests (release, ParallelReplicas)": TestConfig("package_release"), + "Stateful tests (debug, ParallelReplicas)": TestConfig("package_debug"), + "Stateful tests (asan, ParallelReplicas)": TestConfig("package_asan"), + "Stateful tests (msan, ParallelReplicas)": TestConfig("package_msan"), + "Stateful tests (ubsan, ParallelReplicas)": TestConfig("package_ubsan"), + "Stateful tests (tsan, ParallelReplicas)": TestConfig("package_tsan"), # End stateful tests for parallel replicas - "Stateless tests (asan)": { - "required_build": "package_asan", - }, - "Stateless tests (tsan)": { - "required_build": "package_tsan", - }, - "Stateless tests (msan)": { - "required_build": "package_msan", - }, - "Stateless tests (ubsan)": { - "required_build": "package_ubsan", - }, - "Stateless tests (debug)": { - "required_build": "package_debug", - }, - "Stateless tests (release)": { - "required_build": "package_release", - }, - "Stateless tests (aarch64)": { - "required_build": "package_aarch64", - }, - "Stateless tests (release, wide parts enabled)": { - "required_build": "package_release", - }, - "Stateless tests (release, analyzer)": { - "required_build": "package_release", - }, - "Stateless tests (release, DatabaseOrdinary)": { - "required_build": "package_release", - }, - "Stateless tests (release, DatabaseReplicated)": { - "required_build": "package_release", - }, - "Stateless tests (release, s3 storage)": { - "required_build": "package_release", - }, - "Stateless tests (debug, s3 storage)": { - "required_build": "package_debug", - }, - "Stateless tests (tsan, s3 storage)": { - "required_build": "package_tsan", - }, - "Stress test (asan)": { - "required_build": "package_asan", - }, - "Stress test (tsan)": { - "required_build": "package_tsan", - }, - "Stress test (ubsan)": { - "required_build": "package_ubsan", - }, - "Stress test (msan)": { - "required_build": "package_msan", - }, - "Stress test (debug)": { - "required_build": "package_debug", - }, - "Upgrade check (asan)": { - "required_build": "package_asan", - }, - "Upgrade check (tsan)": { - "required_build": "package_tsan", - }, - "Upgrade check (msan)": { - "required_build": "package_msan", - }, - "Upgrade check (debug)": { - "required_build": "package_debug", - }, - "Integration tests (asan)": { - "required_build": "package_asan", - }, - "Integration tests (asan, analyzer)": { - "required_build": "package_asan", - }, - "Integration tests (tsan)": { - "required_build": "package_tsan", - }, - "Integration tests (release)": { - "required_build": "package_release", - }, - "Integration tests (msan)": { - "required_build": "package_msan", - }, - "Integration tests flaky check (asan)": { - "required_build": "package_asan", - }, - "Compatibility check (amd64)": { - "required_build": "package_release", - }, - "Compatibility check (aarch64)": { - "required_build": "package_aarch64", - }, - "Unit tests (release-clang)": { - "required_build": "binary_release", - }, - "Unit tests (asan)": { - "required_build": "package_asan", - }, - "Unit tests (msan)": { - "required_build": "package_msan", - }, - "Unit tests (tsan)": { - "required_build": "package_tsan", - }, - "Unit tests (ubsan)": { - "required_build": "package_ubsan", - }, - "AST fuzzer (debug)": { - "required_build": "package_debug", - }, - "AST fuzzer (asan)": { - "required_build": "package_asan", - }, - "AST fuzzer (msan)": { - "required_build": "package_msan", - }, - "AST fuzzer (tsan)": { - "required_build": "package_tsan", - }, - "AST fuzzer (ubsan)": { - "required_build": "package_ubsan", - }, - "Stateless tests flaky check (asan)": { - "required_build": "package_asan", - }, - "ClickHouse Keeper Jepsen": { - "required_build": "binary_release", - }, - "ClickHouse Server Jepsen": { - "required_build": "binary_release", - }, - "Performance Comparison": { - "required_build": "package_release", - "test_grep_exclude_filter": "", - }, - "Performance Comparison Aarch64": { - "required_build": "package_aarch64", - "test_grep_exclude_filter": "", - }, - "SQLancer (release)": { - "required_build": "package_release", - }, - "SQLancer (debug)": { - "required_build": "package_debug", - }, - "Sqllogic test (release)": { - "required_build": "package_release", - }, + "Stateless tests (asan)": TestConfig("package_asan"), + "Stateless tests (tsan)": TestConfig("package_tsan"), + "Stateless tests (msan)": TestConfig("package_msan"), + "Stateless tests (ubsan)": TestConfig("package_ubsan"), + "Stateless tests (debug)": TestConfig("package_debug"), + "Stateless tests (release)": TestConfig("package_release"), + "Stateless tests (aarch64)": TestConfig("package_aarch64"), + "Stateless tests (release, wide parts enabled)": TestConfig("package_release"), + "Stateless tests (release, analyzer)": TestConfig("package_release"), + "Stateless tests (release, DatabaseOrdinary)": TestConfig("package_release"), + "Stateless tests (release, DatabaseReplicated)": TestConfig("package_release"), + "Stateless tests (release, s3 storage)": TestConfig("package_release"), + "Stateless tests (debug, s3 storage)": TestConfig("package_debug"), + "Stateless tests (tsan, s3 storage)": TestConfig("package_tsan"), + "Stress test (asan)": TestConfig("package_asan"), + "Stress test (tsan)": TestConfig("package_tsan"), + "Stress test (ubsan)": TestConfig("package_ubsan"), + "Stress test (msan)": TestConfig("package_msan"), + "Stress test (debug)": TestConfig("package_debug"), + "Upgrade check (asan)": TestConfig("package_asan"), + "Upgrade check (tsan)": TestConfig("package_tsan"), + "Upgrade check (msan)": TestConfig("package_msan"), + "Upgrade check (debug)": TestConfig("package_debug"), + "Integration tests (asan)": TestConfig("package_asan"), + "Integration tests (asan, analyzer)": TestConfig("package_asan"), + "Integration tests (tsan)": TestConfig("package_tsan"), + "Integration tests (release)": TestConfig("package_release"), + "Integration tests (msan)": TestConfig("package_msan"), + "Integration tests flaky check (asan)": TestConfig("package_asan"), + "Compatibility check (amd64)": TestConfig("package_release"), + "Compatibility check (aarch64)": TestConfig("package_aarch64"), + "Unit tests (release)": TestConfig("binary_release"), + "Unit tests (asan)": TestConfig("package_asan"), + "Unit tests (msan)": TestConfig("package_msan"), + "Unit tests (tsan)": TestConfig("package_tsan"), + "Unit tests (ubsan)": TestConfig("package_ubsan"), + "AST fuzzer (debug)": TestConfig("package_debug"), + "AST fuzzer (asan)": TestConfig("package_asan"), + "AST fuzzer (msan)": TestConfig("package_msan"), + "AST fuzzer (tsan)": TestConfig("package_tsan"), + "AST fuzzer (ubsan)": TestConfig("package_ubsan"), + "Stateless tests flaky check (asan)": TestConfig("package_asan"), + "ClickHouse Keeper Jepsen": TestConfig("binary_release"), + "ClickHouse Server Jepsen": TestConfig("binary_release"), + "Performance Comparison": TestConfig("package_release"), + "Performance Comparison Aarch64": TestConfig("package_aarch64"), + "SQLancer (release)": TestConfig("package_release"), + "SQLancer (debug)": TestConfig("package_debug"), + "Sqllogic test (release)": TestConfig("package_release"), + "SQLTest": TestConfig("package_release"), }, -} # type: dict +) +CI_CONFIG.validate() + # checks required by Mergeable Check REQUIRED_CHECKS = [ @@ -424,103 +295,10 @@ REQUIRED_CHECKS = [ "Fast test", "Stateful tests (release)", "Stateless tests (release)", - "Stateless tests (debug) [1/5]", - "Stateless tests (debug) [2/5]", - "Stateless tests (debug) [3/5]", - "Stateless tests (debug) [4/5]", - "Stateless tests (debug) [5/5]", - "AST fuzzer (asan)", - "AST fuzzer (msan)", - "AST fuzzer (tsan)", - "AST fuzzer (ubsan)", - "AST fuzzer (debug)", - "Compatibility check (aarch64)", - "Compatibility check (amd64)", - "Install packages (amd64)", - "Install packages (arm64)", - "Integration tests (asan) [1/6]", - "Integration tests (asan) [2/6]", - "Integration tests (asan) [3/6]", - "Integration tests (asan) [4/6]", - "Integration tests (asan) [5/6]", - "Integration tests (asan) [6/6]", - "Integration tests (release) [1/4]", - "Integration tests (release) [2/4]", - "Integration tests (release) [3/4]", - "Integration tests (release) [4/4]", - "Integration tests (tsan) [1/6]", - "Integration tests (tsan) [2/6]", - "Integration tests (tsan) [3/6]", - "Integration tests (tsan) [4/6]", - "Integration tests (tsan) [5/6]", - "Integration tests (tsan) [6/6]", - "Integration tests flaky check (asan)", - "Stateful tests (aarch64)", - "Stateful tests (asan)", - "Stateful tests (asan, ParallelReplicas)", - "Stateful tests (debug)", - "Stateful tests (debug, ParallelReplicas)", - "Stateful tests (msan)", - "Stateful tests (msan, ParallelReplicas)", - "Stateful tests (release, ParallelReplicas)", - "Stateful tests (tsan)", - "Stateful tests (tsan, ParallelReplicas)", - "Stateful tests (ubsan)", - "Stateful tests (ubsan, ParallelReplicas)", - "Stateless tests (aarch64)", - "Stateless tests (asan) [1/4]", - "Stateless tests (asan) [2/4]", - "Stateless tests (asan) [3/4]", - "Stateless tests (asan) [4/4]", - "Stateless tests (debug) [1/5]", - "Stateless tests (debug) [2/5]", - "Stateless tests (debug) [3/5]", - "Stateless tests (debug) [4/5]", - "Stateless tests (debug) [5/5]", - "Stateless tests (debug, s3 storage) [1/6]", - "Stateless tests (debug, s3 storage) [2/6]", - "Stateless tests (debug, s3 storage) [3/6]", - "Stateless tests (debug, s3 storage) [4/6]", - "Stateless tests (debug, s3 storage) [5/6]", - "Stateless tests (debug, s3 storage) [6/6]", - "Stateless tests (msan) [1/6]", - "Stateless tests (msan) [2/6]", - "Stateless tests (msan) [3/6]", - "Stateless tests (msan) [4/6]", - "Stateless tests (msan) [5/6]", - "Stateless tests (msan) [6/6]", - "Stateless tests (release, DatabaseReplicated) [1/4]", - "Stateless tests (release, DatabaseReplicated) [2/4]", - "Stateless tests (release, DatabaseReplicated) [3/4]", - "Stateless tests (release, DatabaseReplicated) [4/4]", - "Stateless tests (release, s3 storage) [1/2]", - "Stateless tests (release, s3 storage) [2/2]", - "Stateless tests (release, wide parts enabled)", - "Stateless tests (tsan) [1/5]", - "Stateless tests (tsan) [2/5]", - "Stateless tests (tsan) [3/5]", - "Stateless tests (tsan) [4/5]", - "Stateless tests (tsan) [5/5]", - "Stateless tests (tsan, s3 storage) [1/5]", - "Stateless tests (tsan, s3 storage) [2/5]", - "Stateless tests (tsan, s3 storage) [3/5]", - "Stateless tests (tsan, s3 storage) [4/5]", - "Stateless tests (tsan, s3 storage) [5/5]", - "Stateless tests (ubsan) [1/2]", - "Stateless tests (ubsan) [2/2]", - "Stress test (asan)", - "Stress test (debug)", - "Stress test (msan)", - "Stress test (tsan)", - "Stress test (ubsan)", - "Upgrade check (asan)", - "Upgrade check (debug)", - "Upgrade check (msan)", - "Upgrade check (tsan)", "Style Check", "Unit tests (asan)", "Unit tests (msan)", - "Unit tests (release-clang)", + "Unit tests (release)", "Unit tests (tsan)", "Unit tests (ubsan)", ] diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 9410b37d69f..1ace0ef1b24 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 -from typing import List +from pathlib import Path +from typing import Dict, List, Optional +import fileinput import json import logging import time @@ -11,35 +13,70 @@ from pr_info import PRInfo from report import TestResults +class CHException(Exception): + pass + + class InsertException(Exception): pass class ClickHouseHelper: - def __init__(self, url=None): + def __init__( + self, url: Optional[str] = None, auth: Optional[Dict[str, str]] = None + ): if url is None: url = get_parameter_from_ssm("clickhouse-test-stat-url") self.url = url - self.auth = { + self.auth = auth or { "X-ClickHouse-User": get_parameter_from_ssm("clickhouse-test-stat-login"), "X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password"), } @staticmethod - def _insert_json_str_info_impl(url, auth, db, table, json_str): + def insert_file( + url: str, + auth: Optional[Dict[str, str]], + query: str, + file: Path, + additional_options: Optional[Dict[str, str]] = None, + ) -> None: + params = { + "query": query, + "date_time_input_format": "best_effort", + "send_logs_level": "warning", + } + if additional_options: + for k, v in additional_options.items(): + params[k] = v + + with open(file, "rb") as data_fd: + ClickHouseHelper._insert_post( + url, params=params, data=data_fd, headers=auth + ) + + @staticmethod + def insert_json_str(url, auth, db, table, json_str): params = { "database": db, "query": f"INSERT INTO {table} FORMAT JSONEachRow", "date_time_input_format": "best_effort", "send_logs_level": "warning", } + ClickHouseHelper._insert_post(url, params=params, data=json_str, headers=auth) + + @staticmethod + def _insert_post(*args, **kwargs): + url = "" + if args: + url = args[0] + url = kwargs.get("url", url) + kwargs["timeout"] = kwargs.get("timeout", 100) for i in range(5): try: - response = requests.post( - url, params=params, data=json_str, headers=auth - ) + response = requests.post(*args, **kwargs) except Exception as e: error = f"Received exception while sending data to {url} on {i} attempt: {e}" logging.warning(error) @@ -51,17 +88,12 @@ class ClickHouseHelper: break error = ( - "Cannot insert data into clickhouse at try " - + str(i) - + ": HTTP code " - + str(response.status_code) - + ": '" - + str(response.text) - + "'" + f"Cannot insert data into clickhouse at try {i}: HTTP code " + f"{response.status_code}: '{response.text}'" ) if response.status_code >= 500: - # A retriable error + # A retryable error time.sleep(1) continue @@ -76,7 +108,7 @@ class ClickHouseHelper: raise InsertException(error) def _insert_json_str_info(self, db, table, json_str): - self._insert_json_str_info_impl(self.url, self.auth, db, table, json_str) + self.insert_json_str(self.url, self.auth, db, table, json_str) def insert_event_into(self, db, table, event, safe=True): event_str = json.dumps(event) @@ -103,12 +135,16 @@ class ClickHouseHelper: if not safe: raise - def _select_and_get_json_each_row(self, db, query): + def _select_and_get_json_each_row(self, db, query, query_params): params = { "database": db, "query": query, "default_format": "JSONEachRow", } + if query_params is not None: + for name, value in query_params.items(): + params[f"param_{name}"] = str(value) + for i in range(5): response = None try: @@ -116,15 +152,15 @@ class ClickHouseHelper: response.raise_for_status() return response.text except Exception as ex: - logging.warning("Cannot insert with exception %s", str(ex)) + logging.warning("Select query failed with exception %s", str(ex)) if response: - logging.warning("Reponse text %s", response.text) + logging.warning("Response text %s", response.text) time.sleep(0.1 * i) - raise Exception("Cannot fetch data from clickhouse") + raise CHException("Cannot fetch data from clickhouse") - def select_json_each_row(self, db, query): - text = self._select_and_get_json_each_row(db, query) + def select_json_each_row(self, db, query, query_params=None): + text = self._select_and_get_json_each_row(db, query, query_params) result = [] for line in text.split("\n"): if line: @@ -132,6 +168,32 @@ class ClickHouseHelper: return result +def _query_imds(path): + url = f"http://169.254.169.254/{path}" + for i in range(5): + try: + response = requests.get(url, timeout=1) + if response.status_code == 200: + return response.text + except Exception as e: + error = ( + f"Received exception while sending data to {url} on {i} attempt: {e}" + ) + logging.warning(error) + continue + return "" + + +# Obtain the machine type from IMDS: +def get_instance_type(): + return _query_imds("latest/meta-data/instance-type") + + +# Obtain the instance id from IMDS: +def get_instance_id(): + return _query_imds("latest/meta-data/instance-id") + + def prepare_tests_results_for_clickhouse( pr_info: PRInfo, test_results: TestResults, @@ -168,6 +230,8 @@ def prepare_tests_results_for_clickhouse( head_ref=head_ref, head_repo=head_repo, task_url=pr_info.task_url, + instance_type=get_instance_type(), + instance_id=get_instance_id(), ) # Always publish a total record for all checks. For checks with individual @@ -190,3 +254,89 @@ def prepare_tests_results_for_clickhouse( result.append(current_row) return result + + +class CiLogsCredentials: + def __init__(self, config_path: Path): + self.config_path = config_path + try: + self._host = get_parameter_from_ssm("clickhouse_ci_logs_host") # type: str + self._password = get_parameter_from_ssm( + "clickhouse_ci_logs_password" + ) # type: str + except: + logging.warning( + "Unable to retreive host and/or password from smm, all other " + "methods will noop" + ) + self._host = "" + self._password = "" + + def create_ci_logs_credentials(self) -> None: + if not (self.host and self.password): + logging.info( + "Hostname or password for CI logs instance are unknown, " + "skipping creating of credentials file, removing existing" + ) + self.config_path.unlink(missing_ok=True) + return + self.config_path.parent.mkdir(parents=True, exist_ok=True) + self.config_path.write_text( + f"CLICKHOUSE_CI_LOGS_HOST={self.host}\n" + "CLICKHOUSE_CI_LOGS_USER=ci\n" + f"CLICKHOUSE_CI_LOGS_PASSWORD={self.password}\n", + encoding="utf-8", + ) + + def get_docker_arguments( + self, pr_info: PRInfo, check_start_time: str, check_name: str + ) -> str: + self.create_ci_logs_credentials() + if not self.config_path.exists(): + logging.info("Do not use external logs pushing") + return "" + extra_columns = ( + f"{pr_info.number} AS pull_request_number, '{pr_info.sha}' AS commit_sha, " + f"toDateTime('{check_start_time}', 'UTC') AS check_start_time, '{check_name}' AS check_name, " + f"'{get_instance_type()}' AS instance_type, '{get_instance_id()}' AS instance_id" + ) + return ( + f'-e EXTRA_COLUMNS_EXPRESSION="{extra_columns}" ' + f"-e CLICKHOUSE_CI_LOGS_CREDENTIALS=/tmp/export-logs-config.sh " + f"--volume={self.config_path.absolute()}:/tmp/export-logs-config.sh:ro " + ) + + def clean_ci_logs_from_credentials(self, log_path: Path) -> None: + if not (self.host or self.password): + logging.info( + "Hostname and password for CI logs instance are unknown, " + "skipping cleaning %s", + log_path, + ) + return + + def process_line(line: str) -> str: + if self.host and self.password: + return line.replace(self.host, "CLICKHOUSE_CI_LOGS_HOST").replace( + self.password, "CLICKHOUSE_CI_LOGS_PASSWORD" + ) + if self.host: + return line.replace(self.host, "CLICKHOUSE_CI_LOGS_HOST") + # the remaining is self.password + return line.replace(self.password, "CLICKHOUSE_CI_LOGS_PASSWORD") + + # errors="surrogateescape" require python 3.10. + # With ubuntu 22.04 we are safe + with fileinput.input( + log_path, inplace=True, errors="surrogateescape" + ) as log_fd: + for line in log_fd: + print(process_line(line), end="") + + @property + def host(self) -> str: + return self._host + + @property + def password(self) -> str: + return self._password diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index efe149b0aa4..a5fd27efb6b 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -3,7 +3,8 @@ import csv import os import time -from typing import Dict, List, Literal, Optional, Union +from typing import Dict, List, Optional, Union +from collections import defaultdict import logging from github import Github @@ -11,12 +12,22 @@ from github.GithubObject import _NotSetType, NotSet as NotSet from github.Commit import Commit from github.CommitStatus import CommitStatus from github.IssueComment import IssueComment +from github.PullRequest import PullRequest from github.Repository import Repository from ci_config import CI_CONFIG, REQUIRED_CHECKS, CHECK_DESCRIPTIONS, CheckDescription from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL from pr_info import PRInfo, SKIP_MERGEABLE_CHECK_LABEL -from report import TestResult, TestResults +from report import ( + ERROR, + FAILURE, + PENDING, + StatusType, + SUCCESS, + TestResult, + TestResults, + get_worst_status, +) from s3_helper import S3Helper from upload_result_helper import upload_results @@ -37,8 +48,8 @@ class RerunHelper: # currently we agree even for failed statuses for status in self.statuses: if self.check_name in status.context and status.state in ( - "success", - "failure", + SUCCESS, + FAILURE, ): return True return False @@ -51,13 +62,14 @@ class RerunHelper: def override_status(status: str, check_name: str, invert: bool = False) -> str: - if CI_CONFIG["tests_config"].get(check_name, {}).get("force_tests", False): - return "success" + test_config = CI_CONFIG.test_configs.get(check_name) + if test_config and test_config.force_tests: + return SUCCESS if invert: - if status == "success": - return "error" - return "success" + if status == SUCCESS: + return ERROR + return SUCCESS return status @@ -118,6 +130,27 @@ def post_commit_status( logging.error("Failed to update the status comment, continue anyway") +STATUS_ICON_MAP = defaultdict( + str, + { + ERROR: "❌", + FAILURE: "❌", + PENDING: "⏳", + SUCCESS: "✅", + }, +) + + +def update_pr_status_label(pr: PullRequest, status: str) -> None: + new_label = "pr-status-" + STATUS_ICON_MAP[status] + for label in pr.get_labels(): + if label.name == new_label: + return + if label.name.startswith("pr-status-"): + pr.remove_from_labels(label.name) + pr.add_to_labels(new_label) + + def set_status_comment(commit: Commit, pr_info: PRInfo) -> None: """It adds or updates the comment status to all Pull Requests but for release one, so the method does nothing for simple pushes and pull requests with @@ -136,7 +169,7 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None: # W/o pr_info to avoid recursion, and yes, one extra create_ci_report post_commit_status( commit, - "pending", + PENDING, create_ci_report(pr_info, statuses), "The report for running CI", CI_STATUS_NAME, @@ -157,6 +190,8 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None: comment = ic break + update_pr_status_label(pr, get_worst_state(statuses)) + if comment is None: pr.create_issue_comment(comment_body) return @@ -170,33 +205,16 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None: def generate_status_comment(pr_info: PRInfo, statuses: CommitStatuses) -> str: """The method generates the comment body, as well it updates the CI report""" - def beauty_state(state: str) -> str: - if state == "success": - return f"🟢 {state}" - if state == "pending": - return f"🟡 {state}" - if state in ["error", "failure"]: - return f"🔴 {state}" - return state - report_url = create_ci_report(pr_info, statuses) worst_state = get_worst_state(statuses) - if not worst_state: - # Theoretically possible, although - # the function should not be used on empty statuses - worst_state = "The commit doesn't have the statuses yet" - else: - worst_state = f"The overall status of the commit is {beauty_state(worst_state)}" comment_body = ( f"\n" - f"This is an automated comment for commit {pr_info.sha} with " - f"description of existing statuses. It's updated for the latest CI running\n" - f"The full report is available [here]({report_url})\n" - f"{worst_state}\n\n" - "\n" - "" + f"*This is an automated comment for commit {pr_info.sha} with " + f"description of existing statuses. It's updated for the latest CI running*\n\n" + f"[{STATUS_ICON_MAP[worst_state]} Click here]({report_url}) to open a full report in a separate page\n" + f"\n" ) # group checks by the name to get the worst one per each grouped_statuses = {} # type: Dict[CheckDescription, CommitStatuses] @@ -220,34 +238,50 @@ def generate_status_comment(pr_info: PRInfo, statuses: CommitStatuses) -> str: else: grouped_statuses[cd] = [status] - table_rows = [] # type: List[str] + table_header = ( + "
Check nameDescriptionStatus
\n" + "\n" + "\n" + ) + table_footer = "\n
Check nameDescriptionStatus
\n" + + details_header = "
Successful checks\n" + details_footer = "
\n" + + visible_table_rows = [] # type: List[str] + hidden_table_rows = [] # type: List[str] for desc, gs in grouped_statuses.items(): - table_rows.append( + state = get_worst_state(gs) + table_row = ( f"{desc.name}{desc.description}" - f"{beauty_state(get_worst_state(gs))}\n" + f"{STATUS_ICON_MAP[state]} {state}\n" ) + if state == SUCCESS: + hidden_table_rows.append(table_row) + else: + visible_table_rows.append(table_row) - table_rows.sort() + result = [comment_body] - comment_footer = "" - return "".join([comment_body, *table_rows, comment_footer]) + if hidden_table_rows: + hidden_table_rows.sort() + result.append(details_header) + result.append(table_header) + result.extend(hidden_table_rows) + result.append(table_footer) + result.append(details_footer) + + if visible_table_rows: + visible_table_rows.sort() + result.append(table_header) + result.extend(visible_table_rows) + result.append(table_footer) + + return "".join(result) def get_worst_state(statuses: CommitStatuses) -> str: - worst_status = None - states = {"error": 0, "failure": 1, "pending": 2, "success": 3} - for status in statuses: - if worst_status is None: - worst_status = status - continue - if states[status.state] < states[worst_status.state]: - worst_status = status - if worst_status.state == "error": - break - - if worst_status is None: - return "" - return worst_status.state + return get_worst_status(status.state for status in statuses) def create_ci_report(pr_info: PRInfo, statuses: CommitStatuses) -> str: @@ -255,10 +289,15 @@ def create_ci_report(pr_info: PRInfo, statuses: CommitStatuses) -> str: to S3 tests bucket. Then it returns the URL""" test_results = [] # type: TestResults for status in statuses: - log_urls = None + log_urls = [] if status.target_url is not None: - log_urls = [status.target_url] - test_results.append(TestResult(status.context, status.state, log_urls=log_urls)) + log_urls.append(status.target_url) + raw_logs = status.description or None + test_results.append( + TestResult( + status.context, status.state, log_urls=log_urls, raw_logs=raw_logs + ) + ) return upload_results( S3Helper(), pr_info.number, pr_info.sha, test_results, [], CI_STATUS_NAME ) @@ -323,7 +362,7 @@ def format_description(description: str) -> str: def set_mergeable_check( commit: Commit, description: str = "", - state: Literal["success", "failure"] = "success", + state: StatusType = "success", ) -> None: commit.create_status( context=MERGEABLE_NAME, @@ -362,7 +401,7 @@ def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None success = [] fail = [] for status in required_checks: - if status.state == "success": + if status.state == SUCCESS: success.append(status.context) else: fail.append(status.context) @@ -371,7 +410,7 @@ def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None description = "failed: " + ", ".join(fail) description = format_description(description) if mergeable_status is None or mergeable_status.description != description: - set_mergeable_check(commit, description, "failure") + set_mergeable_check(commit, description, FAILURE) return description = ", ".join(success) diff --git a/tests/ci/compress_files.py b/tests/ci/compress_files.py index 8d52d030b84..d8e691ce3b4 100644 --- a/tests/ci/compress_files.py +++ b/tests/ci/compress_files.py @@ -1,24 +1,31 @@ #!/usr/bin/env python3 import subprocess import logging -import os + +from pathlib import Path +from typing import Optional -def compress_file_fast(path, archive_path): - if archive_path.endswith(".zst"): +PIGZ = Path("/usr/bin/pigz") + + +def compress_file_fast(path: Path, archive_path: Path) -> None: + if archive_path.suffix == ".zst": subprocess.check_call(f"zstd < {path} > {archive_path}", shell=True) - elif os.path.exists("/usr/bin/pigz"): + elif PIGZ.exists(): subprocess.check_call(f"pigz < {path} > {archive_path}", shell=True) else: subprocess.check_call(f"gzip < {path} > {archive_path}", shell=True) -def compress_fast(path, archive_path, exclude=None): +def compress_fast( + path: Path, archive_path: Path, exclude: Optional[Path] = None +) -> None: program_part = "" - if archive_path.endswith(".zst"): + if archive_path.suffix == ".zst": logging.info("zstd will be used for compression") program_part = "--use-compress-program='zstd --threads=0'" - elif os.path.exists("/usr/bin/pigz"): + elif PIGZ.exists(): logging.info("pigz found, will compress and decompress faster") program_part = "--use-compress-program='pigz'" else: @@ -32,27 +39,25 @@ def compress_fast(path, archive_path, exclude=None): else: exclude_part = f"--exclude {exclude}" - fname = os.path.basename(path) - if os.path.isfile(path): - path = os.path.dirname(path) - else: - path += "/.." + fname = path.name - cmd = f"tar {program_part} {exclude_part} -cf {archive_path} -C {path} {fname}" + cmd = ( + f"tar {program_part} {exclude_part} -cf {archive_path} -C {path.parent} {fname}" + ) logging.debug("compress_fast cmd: %s", cmd) subprocess.check_call(cmd, shell=True) -def decompress_fast(archive_path, result_path=None): +def decompress_fast(archive_path: Path, result_path: Optional[Path] = None) -> None: program_part = "" - if archive_path.endswith(".zst"): + if archive_path.suffix == ".zst": logging.info( "zstd will be used for decompression ('%s' -> '%s')", archive_path, result_path, ) program_part = "--use-compress-program='zstd --threads=0'" - elif os.path.exists("/usr/bin/pigz"): + elif PIGZ.exists(): logging.info( "pigz found, will compress and decompress faster ('%s' -> '%s')", archive_path, diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index fff2975cea4..ab20e7fb9cf 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,9 +8,8 @@ import shutil import subprocess import time import sys -from glob import glob from pathlib import Path -from typing import Any, Dict, List, Optional, Set, Tuple, Union +from typing import Any, List, Optional, Set, Tuple, Union from github import Github @@ -24,24 +23,12 @@ from s3_helper import S3Helper from stopwatch import Stopwatch from tee_popen import TeePopen from upload_result_helper import upload_results +from docker_images_helper import ImagesDict, IMAGES_FILE_PATH, get_images_dict NAME = "Push to Dockerhub" TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check") -ImagesDict = Dict[str, dict] - - -# workaround for mypy issue [1]: -# -# "Argument 1 to "map" has incompatible type overloaded function" [1] -# -# [1]: https://github.com/python/mypy/issues/9864 -# -# NOTE: simply lambda will do the trick as well, but pylint will not like it -def realpath(*args, **kwargs): - return os.path.realpath(*args, **kwargs) - class DockerImage: def __init__( @@ -90,21 +77,6 @@ class DockerImage: return f"DockerImage(path={self.path},repo={self.repo},parent={self.parent})" -def get_images_dict(repo_path: str, image_file_path: str) -> ImagesDict: - """Return images suppose to build on the current architecture host""" - images_dict = {} - path_to_images_file = os.path.join(repo_path, image_file_path) - if os.path.exists(path_to_images_file): - with open(path_to_images_file, "rb") as dict_file: - images_dict = json.load(dict_file) - else: - logging.info( - "Image file %s doesn't exist in repo %s", image_file_path, repo_path - ) - - return images_dict - - def get_changed_docker_images( pr_info: PRInfo, images_dict: ImagesDict ) -> Set[DockerImage]: @@ -123,23 +95,8 @@ def get_changed_docker_images( changed_images = [] for dockerfile_dir, image_description in images_dict.items(): - source_dir = GITHUB_WORKSPACE.rstrip("/") + "/" - dockerfile_files = glob(f"{source_dir}/{dockerfile_dir}/**", recursive=True) - # resolve symlinks - dockerfile_files = list(map(realpath, dockerfile_files)) - # trim prefix to get relative path again, to match with files_changed - dockerfile_files = list(map(lambda x: x[len(source_dir) :], dockerfile_files)) - logging.info( - "Docker %s (source_dir=%s) build context for PR %s @ %s: %s", - dockerfile_dir, - source_dir, - pr_info.number, - pr_info.sha, - str(dockerfile_files), - ) - for f in files_changed: - if f in dockerfile_files: + if f.startswith(dockerfile_dir): name = image_description["name"] only_amd64 = image_description.get("only_amd64", False) logging.info( @@ -272,8 +229,6 @@ def build_and_push_one_image( cache_from = f"{cache_from} --cache-from type=registry,ref={image.repo}:{tag}" cmd = ( - # tar is requried to follow symlinks, since docker-build cannot do this - f"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#{image.full_path.lstrip('/')}#./#' --dereference --create {image.full_path} | " "docker buildx build --builder default " f"--label build-url={GITHUB_RUN_URL} " f"{from_tag_arg}" @@ -283,7 +238,7 @@ def build_and_push_one_image( f"{cache_from} " f"--cache-to type=inline,mode=max " f"{push_arg}" - f"--progress plain -" + f"--progress plain {image.full_path}" ) logging.info("Docker command to run: %s", cmd) with TeePopen(cmd, build_log) as proc: @@ -439,7 +394,7 @@ def main(): shutil.rmtree(TEMP_PATH) os.makedirs(TEMP_PATH) - images_dict = get_images_dict(GITHUB_WORKSPACE, "docker/images.json") + images_dict = get_images_dict(GITHUB_WORKSPACE, IMAGES_FILE_PATH) pr_info = PRInfo() if args.all: diff --git a/tests/ci/docker_images_helper.py b/tests/ci/docker_images_helper.py new file mode 100644 index 00000000000..6aed6e82ce3 --- /dev/null +++ b/tests/ci/docker_images_helper.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 + +import json +import logging +import os +from typing import Dict, List + +IMAGES_FILE_PATH = "docker/images.json" + +ImagesDict = Dict[str, dict] + + +def get_images_dict(repo_path: str, images_file_path: str) -> ImagesDict: + """Return images suppose to build on the current architecture host""" + images_dict = {} + path_to_images_file = os.path.join(repo_path, images_file_path) + if os.path.exists(path_to_images_file): + with open(path_to_images_file, "rb") as dict_file: + images_dict = json.load(dict_file) + else: + logging.info( + "Image file %s doesn't exist in repo %s", images_file_path, repo_path + ) + + return images_dict + + +def get_image_names(repo_path: str, images_file_path: str) -> List[str]: + images_dict = get_images_dict(repo_path, images_file_path) + return [info["name"] for (_, info) in images_dict.items()] diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index d89708b9277..56923c5e2d9 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -9,10 +9,16 @@ import subprocess from typing import List, Dict, Tuple from github import Github -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from clickhouse_helper import ( + ClickHouseHelper, + prepare_tests_results_for_clickhouse, + CHException, +) from commit_status_helper import format_description, get_commit, post_commit_status -from env_helper import RUNNER_TEMP +from docker_images_helper import IMAGES_FILE_PATH, get_image_names +from env_helper import RUNNER_TEMP, GITHUB_WORKSPACE from get_robot_token import get_best_robot_token, get_parameter_from_ssm +from git_helper import Runner from pr_info import PRInfo from report import TestResults, TestResult from s3_helper import S3Helper @@ -167,6 +173,74 @@ def create_manifest(image: str, tags: List[str], push: bool) -> Tuple[str, str]: return manifest, "OK" +def enrich_images(changed_images: Dict[str, str]) -> None: + all_image_names = get_image_names(GITHUB_WORKSPACE, IMAGES_FILE_PATH) + + images_to_find_tags_for = [ + image for image in all_image_names if image not in changed_images + ] + images_to_find_tags_for.sort() + + logging.info( + "Trying to find versions for images:\n %s", "\n ".join(images_to_find_tags_for) + ) + + COMMIT_SHA_BATCH_SIZE = 100 + MAX_COMMIT_BATCHES_TO_CHECK = 10 + # Gets the sha of the last COMMIT_SHA_BATCH_SIZE commits after skipping some commits (see below) + LAST_N_ANCESTOR_SHA_COMMAND = f"git log --format=format:'%H' --max-count={COMMIT_SHA_BATCH_SIZE} --skip={{}} --merges" + git_runner = Runner() + + GET_COMMIT_SHAS_QUERY = """ + WITH {commit_shas:Array(String)} AS commit_shas, + {images:Array(String)} AS images + SELECT + substring(test_name, 1, position(test_name, ':') -1) AS image_name, + argMax(commit_sha, check_start_time) AS commit_sha + FROM checks + WHERE + check_name == 'Push multi-arch images to Dockerhub' + AND position(test_name, checks.commit_sha) + AND checks.commit_sha IN commit_shas + AND image_name IN images + GROUP BY image_name + """ + + batch_count = 0 + ch_helper = ClickHouseHelper() + + while ( + batch_count <= MAX_COMMIT_BATCHES_TO_CHECK and len(images_to_find_tags_for) != 0 + ): + commit_shas = git_runner( + LAST_N_ANCESTOR_SHA_COMMAND.format(batch_count * COMMIT_SHA_BATCH_SIZE) + ).split("\n") + + result = ch_helper.select_json_each_row( + "default", + GET_COMMIT_SHAS_QUERY, + {"commit_shas": commit_shas, "images": images_to_find_tags_for}, + ) + result.sort(key=lambda x: x["image_name"]) + + logging.info( + "Found images for commits %s..%s:\n %s", + commit_shas[0], + commit_shas[-1], + "\n ".join(f"{im['image_name']}:{im['commit_sha']}" for im in result), + ) + + for row in result: + image_name = row["image_name"] + commit_sha = row["commit_sha"] + # As we only get the SHAs of merge commits from master, the PR number will be always 0 + tag = f"0-{commit_sha}" + changed_images[image_name] = tag + images_to_find_tags_for.remove(image_name) + + batch_count += 1 + + def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() @@ -198,6 +272,12 @@ def main(): if test_result != "OK": status = "failure" + try: + # changed_images now contains all the images that are changed in this PR. Let's find the latest tag for the images that are not changed. + enrich_images(changed_images) + except CHException as ex: + logging.warning("Couldn't get proper tags for not changed images: %s", ex) + with open( os.path.join(args.path, "changed_images.json"), "w", encoding="utf-8" ) as ci: diff --git a/tests/ci/docker_pull_helper.py b/tests/ci/docker_pull_helper.py index 5336966b3eb..e1327f505a0 100644 --- a/tests/ci/docker_pull_helper.py +++ b/tests/ci/docker_pull_helper.py @@ -6,7 +6,8 @@ import time import subprocess import logging -from typing import List, Optional +from pathlib import Path +from typing import List, Optional, Union class DockerImage: @@ -22,7 +23,7 @@ class DockerImage: def get_images_with_versions( - reports_path: str, + reports_path: Union[Path, str], required_images: List[str], pull: bool = True, version: Optional[str] = None, @@ -80,7 +81,10 @@ def get_images_with_versions( def get_image_with_version( - reports_path: str, image: str, pull: bool = True, version: Optional[str] = None + reports_path: Union[Path, str], + image: str, + pull: bool = True, + version: Optional[str] = None, ) -> DockerImage: logging.info("Looking for images file in %s", reports_path) return get_images_with_versions(reports_path, [image], pull, version=version)[0] diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index a434d3cc841..89bd7b7755b 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -238,7 +238,7 @@ def build_and_push_image( result = [] # type: TestResults if os != "ubuntu": tag += f"-{os}" - init_args = ["docker", "buildx", "build", "--build-arg BUILDKIT_INLINE_CACHE=1"] + init_args = ["docker", "buildx", "build"] if push: init_args.append("--push") init_args.append("--output=type=image,push-by-digest=true") diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index c679ab984ee..ac43069a679 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -9,6 +9,7 @@ from env_helper import GITHUB_RUN_URL from pr_info import PRInfo from report import TestResult import docker_images_check as di +from docker_images_helper import get_images_dict from version_helper import get_version_from_string import docker_server as ds @@ -18,7 +19,7 @@ import docker_server as ds class TestDockerImageCheck(unittest.TestCase): docker_images_path = os.path.join( - os.path.dirname(__file__), "tests/docker_images.json" + os.path.dirname(__file__), "tests/docker_images_for_tests.json" ) def test_get_changed_docker_images(self): @@ -31,7 +32,7 @@ class TestDockerImageCheck(unittest.TestCase): images = sorted( list( di.get_changed_docker_images( - pr_info, di.get_images_dict("/", self.docker_images_path) + pr_info, get_images_dict("/", self.docker_images_path) ) ) ) @@ -40,6 +41,12 @@ class TestDockerImageCheck(unittest.TestCase): [ di.DockerImage("docker/test/base", "clickhouse/test-base", False), di.DockerImage("docker/docs/builder", "clickhouse/docs-builder", True), + di.DockerImage( + "docker/test/sqltest", + "clickhouse/sqltest", + False, + "clickhouse/test-base", # type: ignore + ), di.DockerImage( "docker/test/stateless", "clickhouse/stateless-test", @@ -126,13 +133,12 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg FROM_TAG=version " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version --cache-from type=registry,ref=name:version " "--cache-from type=registry,ref=name:latest " - "--cache-to type=inline,mode=max --push --progress plain -", + "--cache-to type=inline,mode=max --push --progress plain path", mock_popen.call_args.args, ) self.assertTrue(result) @@ -144,13 +150,12 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg FROM_TAG=version2 " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " - "--cache-to type=inline,mode=max --progress plain -", + "--cache-to type=inline,mode=max --progress plain path", mock_popen.call_args.args, ) self.assertTrue(result) @@ -162,12 +167,11 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " - "--cache-to type=inline,mode=max --progress plain -", + "--cache-to type=inline,mode=max --progress plain path", mock_popen.call_args.args, ) self.assertFalse(result) @@ -181,14 +185,13 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " "--cache-from type=registry,ref=name:cached-version " "--cache-from type=registry,ref=name:another-cached " - "--cache-to type=inline,mode=max --progress plain -", + "--cache-to type=inline,mode=max --progress plain path", mock_popen.call_args.args, ) self.assertFalse(result) diff --git a/tests/ci/download_binary.py b/tests/ci/download_binary.py index c57780daa36..034e65f204d 100755 --- a/tests/ci/download_binary.py +++ b/tests/ci/download_binary.py @@ -9,7 +9,7 @@ import os from pathlib import Path from build_download_helper import download_build_with_progress -from ci_config import CI_CONFIG, BuildConfig +from ci_config import CI_CONFIG from env_helper import RUNNER_TEMP, S3_ARTIFACT_DOWNLOAD_TEMPLATE from git_helper import Git, commit from version_helper import get_version_from_repo, version_arg @@ -62,9 +62,9 @@ def main(): temp_path.mkdir(parents=True, exist_ok=True) for build in args.build_names: # check if it's in CI_CONFIG - config = CI_CONFIG["build_config"][build] # type: BuildConfig - if args.rename: - path = temp_path / f"clickhouse-{config['static_binary_name']}" + config = CI_CONFIG.build_config[build] + if args.rename and config.static_binary_name: + path = temp_path / f"clickhouse-{config.static_binary_name}" else: path = temp_path / "clickhouse" diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index 9303f9ae293..04532ea3b96 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -1,6 +1,9 @@ +#!/usr/bin/env python + import logging import os from os import path as p +from typing import Tuple from build_download_helper import get_gh_api @@ -40,13 +43,27 @@ _GITHUB_JOB_URL = "" def GITHUB_JOB_ID() -> str: global _GITHUB_JOB_ID global _GITHUB_JOB_URL - if GITHUB_RUN_ID == "0": - _GITHUB_JOB_ID = "0" if _GITHUB_JOB_ID: return _GITHUB_JOB_ID + _GITHUB_JOB_ID, _GITHUB_JOB_URL = get_job_id_url(GITHUB_JOB) + return _GITHUB_JOB_ID + + +def GITHUB_JOB_URL() -> str: + GITHUB_JOB_ID() + return _GITHUB_JOB_URL + + +def get_job_id_url(job_name: str) -> Tuple[str, str]: + job_id = "" + job_url = "" + if GITHUB_RUN_ID == "0": + job_id = "0" + if job_id: + return job_id, job_url jobs = [] page = 1 - while not _GITHUB_JOB_ID: + while not job_id: response = get_gh_api( f"https://api.github.com/repos/{GITHUB_REPOSITORY}/" f"actions/runs/{GITHUB_RUN_ID}/jobs?per_page=100&page={page}" @@ -55,46 +72,41 @@ def GITHUB_JOB_ID() -> str: data = response.json() jobs.extend(data["jobs"]) for job in data["jobs"]: - if job["name"] != GITHUB_JOB: + if job["name"] != job_name: continue - _GITHUB_JOB_ID = job["id"] - _GITHUB_JOB_URL = job["html_url"] - return _GITHUB_JOB_ID + job_id = job["id"] + job_url = job["html_url"] + return job_id, job_url if ( len(jobs) >= data["total_count"] # just in case of inconsistency or len(data["jobs"]) == 0 # if we excided pages ): - _GITHUB_JOB_ID = "0" + job_id = "0" # FIXME: until it's here, we can't move to reusable workflows - if not _GITHUB_JOB_URL: + if not job_url: # This is a terrible workaround for the case of another broken part of - # GitHub actions. For nested workflows it doesn't provide a proper GITHUB_JOB + # GitHub actions. For nested workflows it doesn't provide a proper job_name # value, but only the final one. So, for `OriginalJob / NestedJob / FinalJob` - # full name, GITHUB_JOB contains only FinalJob + # full name, job_name contains only FinalJob matched_jobs = [] for job in jobs: nested_parts = job["name"].split(" / ") if len(nested_parts) <= 1: continue - if nested_parts[-1] == GITHUB_JOB: + if nested_parts[-1] == job_name: matched_jobs.append(job) if len(matched_jobs) == 1: # The best case scenario - _GITHUB_JOB_ID = matched_jobs[0]["id"] - _GITHUB_JOB_URL = matched_jobs[0]["html_url"] - return _GITHUB_JOB_ID + job_id = matched_jobs[0]["id"] + job_url = matched_jobs[0]["html_url"] + return job_id, job_url if matched_jobs: logging.error( "We could not get the ID and URL for the current job name %s, there " "are more than one jobs match it for the nested workflows. Please, " "refer to https://github.com/actions/runner/issues/2577", - GITHUB_JOB, + job_name, ) - return _GITHUB_JOB_ID - - -def GITHUB_JOB_URL() -> str: - GITHUB_JOB_ID() - return _GITHUB_JOB_URL + return job_id, job_url diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 460e17acd37..6ea56370e63 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -54,23 +54,21 @@ def get_fasttest_cmd(workspace, output_path, repo_path, pr_number, commit_sha, i ) -def process_results(result_folder: str) -> Tuple[str, str, TestResults, List[str]]: +def process_results(result_folder: Path) -> Tuple[str, str, TestResults, List[str]]: test_results = [] # type: TestResults additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible for content of # result_folder - if os.path.exists(result_folder): + if result_folder.exists(): test_files = [ - f - for f in os.listdir(result_folder) - if os.path.isfile(os.path.join(result_folder, f)) - ] - additional_files = [os.path.join(result_folder, f) for f in test_files] + f for f in result_folder.iterdir() if f.is_file() + ] # type: List[Path] + additional_files = [f.absolute().as_posix() for f in test_files] status = [] - status_path = os.path.join(result_folder, "check_status.tsv") - if os.path.exists(status_path): + status_path = result_folder / "check_status.tsv" + if status_path.exists(): logging.info("Found test_results.tsv") with open(status_path, "r", encoding="utf-8") as status_file: status = list(csv.reader(status_file, delimiter="\t")) @@ -80,7 +78,7 @@ def process_results(result_folder: str) -> Tuple[str, str, TestResults, List[str state, description = status[0][0], status[0][1] try: - results_path = Path(result_folder) / "test_results.tsv" + results_path = result_folder / "test_results.tsv" test_results = read_test_results(results_path) if len(test_results) == 0: return "error", "Empty test_results.tsv", test_results, additional_files @@ -100,10 +98,9 @@ def main(): stopwatch = Stopwatch() - temp_path = TEMP_PATH + temp_path = Path(TEMP_PATH) - if not os.path.exists(temp_path): - os.makedirs(temp_path) + temp_path.mkdir(parents=True, exist_ok=True) pr_info = PRInfo() @@ -124,17 +121,14 @@ def main(): s3_helper = S3Helper() - workspace = os.path.join(temp_path, "fasttest-workspace") - if not os.path.exists(workspace): - os.makedirs(workspace) + workspace = temp_path / "fasttest-workspace" + workspace.mkdir(parents=True, exist_ok=True) - output_path = os.path.join(temp_path, "fasttest-output") - if not os.path.exists(output_path): - os.makedirs(output_path) + output_path = temp_path / "fasttest-output" + output_path.mkdir(parents=True, exist_ok=True) - repo_path = os.path.join(temp_path, "fasttest-repo") - if not os.path.exists(repo_path): - os.makedirs(repo_path) + repo_path = temp_path / "fasttest-repo" + repo_path.mkdir(parents=True, exist_ok=True) run_cmd = get_fasttest_cmd( workspace, @@ -146,11 +140,10 @@ def main(): ) logging.info("Going to run fasttest with cmd %s", run_cmd) - logs_path = os.path.join(temp_path, "fasttest-logs") - if not os.path.exists(logs_path): - os.makedirs(logs_path) + logs_path = temp_path / "fasttest-logs" + logs_path.mkdir(parents=True, exist_ok=True) - run_log_path = os.path.join(logs_path, "run.log") + run_log_path = logs_path / "run.log" with TeePopen(run_cmd, run_log_path, timeout=90 * 60) as process: retcode = process.wait() if retcode == 0: @@ -161,9 +154,7 @@ def main(): subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) test_output_files = os.listdir(output_path) - additional_logs = [] - for f in test_output_files: - additional_logs.append(os.path.join(output_path, f)) + additional_logs = [os.path.join(output_path, f) for f in test_output_files] test_log_exists = ( "test_log.txt" in test_output_files or "test_result.txt" in test_output_files @@ -194,8 +185,8 @@ def main(): pr_info.sha, "fast_tests", ) - build_urls = s3_helper.upload_build_folder_to_s3( - os.path.join(output_path, "binaries"), + build_urls = s3_helper.upload_build_directory_to_s3( + output_path / "binaries", s3_path_prefix, keep_dirs_in_s3_path=False, upload_symlinks=False, @@ -206,7 +197,7 @@ def main(): pr_info.number, pr_info.sha, test_results, - [run_log_path] + additional_logs, + [run_log_path.as_posix()] + additional_logs, NAME, build_urls, ) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index aa8a0cf9553..74392947b82 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -8,6 +8,7 @@ from commit_status_helper import ( get_commit, get_commit_filtered_statuses, post_commit_status, + update_mergeable_check, ) from get_robot_token import get_best_robot_token from pr_info import PRInfo @@ -18,6 +19,8 @@ def main(): pr_info = PRInfo(need_orgs=True) gh = Github(get_best_robot_token(), per_page=100) + # Update the Mergeable Check at the final step + update_mergeable_check(gh, pr_info, CI_STATUS_NAME) commit = get_commit(gh, pr_info.sha) statuses = [ @@ -27,7 +30,8 @@ def main(): ] if not statuses: return - status = statuses[0] + # Take the latest status + status = statuses[-1] if status.state == "pending": post_commit_status( commit, diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index b773d1eddd9..1cce5821cd6 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -15,6 +15,7 @@ from github import Github from build_download_helper import download_all_deb_packages from clickhouse_helper import ( + CiLogsCredentials, ClickHouseHelper, prepare_tests_results_for_clickhouse, ) @@ -27,7 +28,7 @@ from commit_status_helper import ( post_commit_status_to_file, update_mergeable_check, ) -from docker_pull_helper import get_image_with_version +from docker_pull_helper import DockerImage, get_image_with_version from download_release_packages import download_last_release from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH from get_robot_token import get_best_robot_token @@ -73,17 +74,18 @@ def get_image_name(check_name): def get_run_command( - check_name, - builds_path, - repo_tests_path, - result_path, - server_log_path, - kill_timeout, - additional_envs, - image, - flaky_check, - tests_to_run, -): + check_name: str, + builds_path: str, + repo_path: str, + result_path: str, + server_log_path: str, + kill_timeout: int, + additional_envs: List[str], + ci_logs_args: str, + image: DockerImage, + flaky_check: bool, + tests_to_run: List[str], +) -> str: additional_options = ["--hung-check"] additional_options.append("--print-time") @@ -101,27 +103,30 @@ def get_run_command( ] if flaky_check: - envs += ["-e NUM_TRIES=100", "-e MAX_RUN_TIME=1800"] + envs.append("-e NUM_TRIES=100") + envs.append("-e MAX_RUN_TIME=1800") envs += [f"-e {e}" for e in additional_envs] env_str = " ".join(envs) volume_with_broken_test = ( - f"--volume={repo_tests_path}/analyzer_tech_debt.txt:/analyzer_tech_debt.txt" + f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt " if "analyzer" in check_name else "" ) return ( f"docker run --volume={builds_path}:/package_folder " - f"--volume={repo_tests_path}:/usr/share/clickhouse-test " - f"{volume_with_broken_test} " - f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server " + f"{ci_logs_args}" + f"--volume={repo_path}/tests:/usr/share/clickhouse-test " + f"{volume_with_broken_test}" + f"--volume={result_path}:/test_output " + f"--volume={server_log_path}:/var/log/clickhouse-server " f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" ) -def get_tests_to_run(pr_info): +def get_tests_to_run(pr_info: PRInfo) -> List[str]: result = set() if pr_info.changed_files is None: @@ -306,8 +311,6 @@ def main(): image_name = get_image_name(check_name) docker_image = get_image_with_version(reports_path, image_name) - repo_tests_path = os.path.join(repo_path, "tests") - packages_path = os.path.join(temp_path, "packages") if not os.path.exists(packages_path): os.makedirs(packages_path) @@ -333,14 +336,20 @@ def main(): if validate_bugfix_check: additional_envs.append("GLOBAL_TAGS=no-random-settings") + ci_logs_credentials = CiLogsCredentials(Path(temp_path) / "export-logs-config.sh") + ci_logs_args = ci_logs_credentials.get_docker_arguments( + pr_info, stopwatch.start_time_str, check_name + ) + run_command = get_run_command( check_name, packages_path, - repo_tests_path, + repo_path, result_path, server_log_path, kill_timeout, additional_envs, + ci_logs_args, docker_image, flaky_check, tests_to_run, @@ -359,6 +368,7 @@ def main(): except subprocess.CalledProcessError: logging.warning("Failed to change files owner in %s, ignoring it", temp_path) + ci_logs_credentials.clean_ci_logs_from_credentials(Path(run_log_path)) s3_helper = S3Helper() state, description, test_results, additional_logs = process_results( diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py index b41eba49cc3..530f894a36a 100644 --- a/tests/ci/get_robot_token.py +++ b/tests/ci/get_robot_token.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import logging from dataclasses import dataclass -from typing import Optional +from typing import Any, Dict, List, Optional import boto3 # type: ignore from github import Github @@ -15,29 +15,59 @@ class Token: rest: int -def get_parameter_from_ssm(name, decrypt=True, client=None): +def get_parameter_from_ssm( + name: str, decrypt: bool = True, client: Optional[Any] = None +) -> str: if not client: client = boto3.client("ssm", region_name="us-east-1") - return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"] + return client.get_parameter( # type:ignore + Name=name, WithDecryption=decrypt + )[ + "Parameter" + ]["Value"] + + +def get_parameters_from_ssm( + names: List[str], decrypt: bool = True, client: Optional[Any] = None +) -> Dict[str, str]: + if not client: + client = boto3.client("ssm", region_name="us-east-1") + + names = list(set(names)) + results = {} # type: Dict[str,str] + i = 0 + while (i) * 10 < len(names): + # the get_parameters returns up to 10 values, so the call is split by 10 + results.update( + **{ + p["Name"]: p["Value"] + for p in client.get_parameters( + Names=names[i * 10 : (i + 1) * 10], WithDecryption=decrypt + )["Parameters"] + } + ) + i += 1 + + return results ROBOT_TOKEN = None # type: Optional[Token] -def get_best_robot_token(token_prefix_env_name="github_robot_token_"): +def get_best_robot_token(tokens_path: str = "/github-tokens") -> str: global ROBOT_TOKEN if ROBOT_TOKEN is not None: return ROBOT_TOKEN.value client = boto3.client("ssm", region_name="us-east-1") - parameters = client.describe_parameters( - ParameterFilters=[ - {"Key": "Name", "Option": "BeginsWith", "Values": [token_prefix_env_name]} + tokens = { + p["Name"]: p["Value"] + for p in client.get_parameters_by_path(Path=tokens_path, WithDecryption=True)[ + "Parameters" ] - )["Parameters"] - assert parameters + } + assert tokens - for token_name in [p["Name"] for p in parameters]: - value = get_parameter_from_ssm(token_name, True, client) + for value in tokens.values(): gh = Github(value, per_page=100) # Do not spend additional request to API by accessin user.login unless # the token is chosen by the remaining requests number diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py index 73e1a6ef739..9971d0c236c 100644 --- a/tests/ci/install_check.py +++ b/tests/ci/install_check.py @@ -29,7 +29,7 @@ from docker_pull_helper import get_image_with_version, DockerImage from env_helper import CI, TEMP_PATH as TEMP, REPORTS_PATH from get_robot_token import get_best_robot_token from pr_info import PRInfo -from report import TestResults, TestResult +from report import TestResults, TestResult, FAILURE, FAIL, OK, SUCCESS from s3_helper import S3Helper from stopwatch import Stopwatch from tee_popen import TeePopen @@ -40,18 +40,25 @@ RPM_IMAGE = "clickhouse/install-rpm-test" DEB_IMAGE = "clickhouse/install-deb-test" TEMP_PATH = Path(TEMP) LOGS_PATH = TEMP_PATH / "tests_logs" -SUCCESS = "success" -FAILURE = "failure" -OK = "OK" -FAIL = "FAIL" def prepare_test_scripts(): server_test = r"""#!/bin/bash set -e trap "bash -ex /packages/preserve_logs.sh" ERR +test_env='TEST_THE_DEFAULT_PARAMETER=15' +echo "$test_env" >> /etc/default/clickhouse systemctl start clickhouse-server -clickhouse-client -q 'SELECT version()'""" +clickhouse-client -q 'SELECT version()' +grep "$test_env" /proc/$(cat /var/run/clickhouse-server/clickhouse-server.pid)/environ""" + initd_test = r"""#!/bin/bash +set -e +trap "bash -ex /packages/preserve_logs.sh" ERR +test_env='TEST_THE_DEFAULT_PARAMETER=15' +echo "$test_env" >> /etc/default/clickhouse +/etc/init.d/clickhouse-server start +clickhouse-client -q 'SELECT version()' +grep "$test_env" /proc/$(cat /var/run/clickhouse-server/clickhouse-server.pid)/environ""" keeper_test = r"""#!/bin/bash set -e trap "bash -ex /packages/preserve_logs.sh" ERR @@ -102,6 +109,7 @@ chmod a+rw -R /tests_logs exit 1 """ (TEMP_PATH / "server_test.sh").write_text(server_test, encoding="utf-8") + (TEMP_PATH / "initd_test.sh").write_text(initd_test, encoding="utf-8") (TEMP_PATH / "keeper_test.sh").write_text(keeper_test, encoding="utf-8") (TEMP_PATH / "binary_test.sh").write_text(binary_test, encoding="utf-8") (TEMP_PATH / "preserve_logs.sh").write_text(preserve_logs, encoding="utf-8") @@ -112,6 +120,9 @@ def test_install_deb(image: DockerImage) -> TestResults: "Install server deb": r"""#!/bin/bash -ex apt-get install /packages/clickhouse-{server,client,common}*deb bash -ex /packages/server_test.sh""", + "Run server init.d": r"""#!/bin/bash -ex +apt-get install /packages/clickhouse-{server,client,common}*deb +bash -ex /packages/initd_test.sh""", "Install keeper deb": r"""#!/bin/bash -ex apt-get install /packages/clickhouse-keeper*deb bash -ex /packages/keeper_test.sh""", @@ -191,18 +202,18 @@ def test_install(image: DockerImage, tests: Dict[str, str]) -> TestResults: retcode = process.wait() if retcode == 0: status = OK + subprocess.check_call( + f"docker kill -s 9 {container_id}", shell=True + ) break status = FAIL copy2(log_file, LOGS_PATH) archive_path = TEMP_PATH / f"{container_name}-{retry}.tar.gz" - compress_fast( - LOGS_PATH.as_posix(), - archive_path.as_posix(), - ) + compress_fast(LOGS_PATH, archive_path) logs.append(archive_path) + subprocess.check_call(f"docker kill -s 9 {container_id}", shell=True) - subprocess.check_call(f"docker kill -s 9 {container_id}", shell=True) test_results.append(TestResult(name, status, stopwatch.duration_seconds, logs)) return test_results @@ -279,7 +290,7 @@ def main(): sys.exit(0) docker_images = { - name: get_image_with_version(REPORTS_PATH, name) + name: get_image_with_version(REPORTS_PATH, name, args.download) for name in (RPM_IMAGE, DEB_IMAGE) } prepare_test_scripts() @@ -296,6 +307,8 @@ def main(): is_match = is_match or path.endswith(".rpm") if args.tgz: is_match = is_match or path.endswith(".tgz") + # We don't need debug packages, so let's filter them out + is_match = is_match and "-dbg" not in path return is_match download_builds_filter( diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 222b2197117..d5b70720ee9 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -37,19 +37,20 @@ from upload_result_helper import upload_results # When update, update -# integration/ci-runner.py:ClickhouseIntegrationTestsRunner.get_images_names too +# tests/integration/ci-runner.py:ClickhouseIntegrationTestsRunner.get_images_names too IMAGES = [ + "clickhouse/dotnet-client", + "clickhouse/integration-helper", + "clickhouse/integration-test", "clickhouse/integration-tests-runner", + "clickhouse/kerberized-hadoop", + "clickhouse/kerberos-kdc", "clickhouse/mysql-golang-client", "clickhouse/mysql-java-client", "clickhouse/mysql-js-client", "clickhouse/mysql-php-client", + "clickhouse/nginx-dav", "clickhouse/postgresql-java-client", - "clickhouse/integration-test", - "clickhouse/kerberos-kdc", - "clickhouse/kerberized-hadoop", - "clickhouse/integration-helper", - "clickhouse/dotnet-client", ] diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index c21fafa2605..54eb70536a8 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -1,11 +1,12 @@ #!/usr/bin/env python3 -import time +import argparse import logging import os import sys +import time -import argparse +from pathlib import Path import boto3 # type: ignore import requests # type: ignore @@ -268,8 +269,8 @@ if __name__ == "__main__": description = "Found invalid analysis (ノಥ益ಥ)ノ ┻━┻" compress_fast( - os.path.join(result_path, "store"), - os.path.join(result_path, "jepsen_store.tar.zst"), + Path(result_path) / "store", + Path(result_path) / "jepsen_store.tar.zst", ) additional_data.append(os.path.join(result_path, "jepsen_store.tar.zst")) except Exception as ex: diff --git a/tests/ci/lambda_shared_package/lambda_shared/__init__.py b/tests/ci/lambda_shared_package/lambda_shared/__init__.py index c56994cc86a..aa88342fcc3 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/__init__.py +++ b/tests/ci/lambda_shared_package/lambda_shared/__init__.py @@ -219,3 +219,12 @@ def list_runners(access_token: str) -> RunnerDescriptions: result.append(desc) return result + + +def cached_value_is_valid(updated_at: float, ttl: float) -> bool: + "a common function to identify if cachable value is still valid" + if updated_at == 0: + return False + if time.time() - ttl < updated_at: + return True + return False diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py index ef47eacc082..82fc53b9356 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/pr.py +++ b/tests/ci/lambda_shared_package/lambda_shared/pr.py @@ -101,7 +101,7 @@ LABELS = { CATEGORY_TO_LABEL = {c: lb for lb, categories in LABELS.items() for c in categories} -def check_pr_description(pr_body: str) -> Tuple[str, str]: +def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]: """The function checks the body to being properly formatted according to .github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty, then there is an error.""" @@ -109,11 +109,7 @@ def check_pr_description(pr_body: str) -> Tuple[str, str]: lines = [re.sub(r"\s+", " ", line) for line in lines] # Check if body contains "Reverts ClickHouse/ClickHouse#36337" - if [ - True - for line in lines - if re.match(r"\AReverts {GITHUB_REPOSITORY}#[\d]+\Z", line) - ]: + if [True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line)]: return "", LABELS["pr-not-for-changelog"][0] category = "" diff --git a/tests/ci/lambda_shared_package/lambda_shared/token.py b/tests/ci/lambda_shared_package/lambda_shared/token.py index d3bf15ab259..6d5653f6a58 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/token.py +++ b/tests/ci/lambda_shared_package/lambda_shared/token.py @@ -8,6 +8,8 @@ import boto3 # type: ignore import jwt import requests # type: ignore +from . import cached_value_is_valid + def get_key_and_app_from_aws() -> Tuple[str, int]: secret_name = "clickhouse_github_secret_key" @@ -68,7 +70,7 @@ def get_access_token_by_key_app(private_key: str, app_id: int) -> str: @dataclass class CachedToken: - time: int + time: float value: str updating: bool = False @@ -81,12 +83,9 @@ def get_cached_access_token() -> str: return _cached_token.value # Indicate that the value is updating now, so the cached value can be # used. The first setting and close-to-ttl are not counted as update - if _cached_token.time != 0 or time.time() - 590 < _cached_token.time: - _cached_token.updating = True - else: - _cached_token.updating = False + _cached_token.updating = cached_value_is_valid(_cached_token.time, 590) private_key, app_id = get_key_and_app_from_aws() - _cached_token.time = int(time.time()) + _cached_token.time = time.time() _cached_token.value = get_access_token_by_key_app(private_key, app_id) _cached_token.updating = False return _cached_token.value diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 41ace95c350..909fd32d2b7 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 - import os import logging import sys @@ -8,6 +7,7 @@ import json import subprocess import traceback import re +from pathlib import Path from typing import Dict from github import Github @@ -20,11 +20,15 @@ from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo from s3_helper import S3Helper from tee_popen import TeePopen +from clickhouse_helper import get_instance_type, get_instance_id +from stopwatch import Stopwatch IMAGE_NAME = "clickhouse/performance-comparison" def get_run_command( + check_start_time, + check_name, workspace, result_path, repo_tests_path, @@ -33,12 +37,26 @@ def get_run_command( additional_env, image, ): + instance_type = get_instance_type() + instance_id = get_instance_id() + + envs = [ + f"-e CHECK_START_TIME='{check_start_time}'", + f"-e CHECK_NAME='{check_name}'", + f"-e INSTANCE_TYPE='{instance_type}'", + f"-e INSTANCE_ID='{instance_id}'", + f"-e PR_TO_TEST={pr_to_test}", + f"-e SHA_TO_TEST={sha_to_test}", + ] + + env_str = " ".join(envs) + return ( f"docker run --privileged --volume={workspace}:/workspace " f"--volume={result_path}:/output " f"--volume={repo_tests_path}:/usr/share/clickhouse-test " f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio " - f"-e PR_TO_TEST={pr_to_test} -e SHA_TO_TEST={sha_to_test} {additional_env} " + f"{env_str} {additional_env} " f"{image}" ) @@ -62,6 +80,9 @@ class RamDrive: if __name__ == "__main__": logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) repo_tests_path = os.path.join(repo_path, "tests") @@ -71,7 +92,7 @@ if __name__ == "__main__": reports_path = os.getenv("REPORTS_PATH", "./reports") check_name = sys.argv[1] - required_build = CI_CONFIG["tests_config"][check_name]["required_build"] + required_build = CI_CONFIG.test_configs[check_name].required_build if not os.path.exists(temp_path): os.makedirs(temp_path) @@ -121,15 +142,6 @@ if __name__ == "__main__": ) sys.exit(0) - test_grep_exclude_filter = CI_CONFIG["tests_config"][check_name][ - "test_grep_exclude_filter" - ] - if test_grep_exclude_filter: - docker_env += f" -e CHPC_TEST_GREP_EXCLUDE={test_grep_exclude_filter}" - logging.info( - "Fill fliter our performance tests by grep -v %s", test_grep_exclude_filter - ) - rerun_helper = RerunHelper(commit, check_name_with_group) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") @@ -166,6 +178,8 @@ if __name__ == "__main__": docker_env += "".join([f" -e {name}" for name in env_extra]) run_command = get_run_command( + stopwatch.start_time_str, + check_name, result_path, result_path, repo_tests_path, @@ -177,6 +191,7 @@ if __name__ == "__main__": logging.info("Going to run command %s", run_command) run_log_path = os.path.join(temp_path, "run.log") + compare_log_path = os.path.join(result_path, "compare.log") popen_env = os.environ.copy() popen_env.update(env_extra) @@ -190,7 +205,7 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) paths = { - "compare.log": os.path.join(result_path, "compare.log"), + "compare.log": compare_log_path, "output.7z": os.path.join(result_path, "output.7z"), "report.html": os.path.join(result_path, "report.html"), "all-queries.html": os.path.join(result_path, "all-queries.html"), @@ -206,15 +221,17 @@ if __name__ == "__main__": uploaded = {} # type: Dict[str, str] for name, path in paths.items(): try: - uploaded[name] = s3_helper.upload_test_report_to_s3(path, s3_prefix + name) + uploaded[name] = s3_helper.upload_test_report_to_s3( + Path(path), s3_prefix + name + ) except Exception: uploaded[name] = "" traceback.print_exc() # Upload all images and flamegraphs to S3 try: - s3_helper.upload_test_folder_to_s3( - os.path.join(result_path, "images"), s3_prefix + "images" + s3_helper.upload_test_directory_to_s3( + Path(result_path) / "images", s3_prefix + "images" ) except Exception: traceback.print_exc() diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 86d4985c6b2..dee71b726df 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -279,7 +279,7 @@ class PRInfo: "user_orgs": self.user_orgs, } - def has_changes_in_documentation(self): + def has_changes_in_documentation(self) -> bool: # If the list wasn't built yet the best we can do is to # assume that there were changes. if self.changed_files is None or not self.changed_files: @@ -287,10 +287,9 @@ class PRInfo: for f in self.changed_files: _, ext = os.path.splitext(f) - path_in_docs = "docs" in f - path_in_website = "website" in f + path_in_docs = f.startswith("docs/") if ( - ext in DIFF_IN_DOCUMENTATION_EXT and (path_in_docs or path_in_website) + ext in DIFF_IN_DOCUMENTATION_EXT and path_in_docs ) or "docker/docs" in f: return True return False diff --git a/tests/ci/report.py b/tests/ci/report.py index 8b301d08d56..dc31314d031 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -2,17 +2,58 @@ from ast import literal_eval from dataclasses import dataclass from pathlib import Path -from typing import List, Optional, Tuple +from typing import Dict, Final, Iterable, List, Literal, Optional, Tuple from html import escape import csv -import os import datetime +import json +import logging +import os + +from ci_config import BuildConfig, CI_CONFIG +from env_helper import get_job_id_url + + +logger = logging.getLogger(__name__) + +ERROR: Final = "error" +FAILURE: Final = "failure" +PENDING: Final = "pending" +SUCCESS: Final = "success" + +OK: Final = "OK" +FAIL: Final = "FAIL" + +StatusType = Literal["error", "failure", "pending", "success"] +# The order of statuses from the worst to the best +_STATES = {ERROR: 0, FAILURE: 1, PENDING: 2, SUCCESS: 3} + + +def get_worst_status(statuses: Iterable[str]) -> str: + worst_status = None + for status in statuses: + if _STATES.get(status) is None: + continue + if worst_status is None: + worst_status = status + continue + if _STATES.get(status) < _STATES.get(worst_status): + worst_status = status + + if worst_status == ERROR: + break + + if worst_status is None: + return "" + return worst_status + ### BEST FRONTEND PRACTICES BELOW -HTML_BASE_TEST_TEMPLATE = """ +HEAD_HTML_TEMPLATE = """ + -{title} - - -
-

{header}

+HTML_BASE_BUILD_TEMPLATE = ( + f"{HEAD_HTML_TEMPLATE}" + """ + + @@ -457,13 +643,9 @@ tr:hover td {{filter: brightness(95%);}} {rows}
Config/job name Compiler Build typeVersion Sanitizer Status Build log
- - - """ + f"{FOOTER_HTML_TEMPLATE}" +) LINK_TEMPLATE = '{text}' @@ -471,64 +653,63 @@ LINK_TEMPLATE = '{text}' def create_build_html_report( header: str, build_results: BuildResults, - build_logs_urls: List[str], - artifact_urls_list: List[List[str]], task_url: str, branch_url: str, branch_name: str, commit_url: str, ) -> str: - rows = "" - for build_result, build_log_url, artifact_urls in zip( - build_results, build_logs_urls, artifact_urls_list - ): - row = "" - row += f"{build_result.compiler}" - if build_result.build_type: - row += f"{build_result.build_type}" - else: - row += "relwithdebuginfo" - if build_result.sanitizer: - row += f"{build_result.sanitizer}" - else: - row += "none" + rows = [] + for build_result in build_results: + for artifact_urls in build_result.grouped_urls: + row = [""] + row.append( + f"{build_result.build_name}
{build_result.job_link}" + ) + row.append(f"{build_result.compiler}") + if build_result.debug_build: + row.append("debug") + else: + row.append("relwithdebuginfo") + row.append(f"{build_result.version}") + if build_result.sanitizer: + row.append(f"{build_result.sanitizer}") + else: + row.append("none") - if build_result.status: - style = _get_status_style(build_result.status) - row += f'{build_result.status}' - else: - style = _get_status_style("error") - row += f'error' + if build_result.status: + style = _get_status_style(build_result.status) + row.append(f'{build_result.status}') + else: + style = _get_status_style(ERROR) + row.append(f'error') - row += f'link' + row.append(f'link') - if build_result.elapsed_seconds: - delta = datetime.timedelta(seconds=build_result.elapsed_seconds) - else: - delta = "unknown" # type: ignore + delta = "unknown" + if build_result.elapsed_seconds: + delta = str(datetime.timedelta(seconds=build_result.elapsed_seconds)) - row += f"{delta}" + row.append(f"{delta}") - links = "" - link_separator = "
" - if artifact_urls: - for artifact_url in artifact_urls: - links += LINK_TEMPLATE.format( - text=_get_html_url_name(artifact_url), url=artifact_url - ) - links += link_separator - if links: - links = links[: -len(link_separator)] - row += f"{links}" + links = [] + link_separator = "
" + if artifact_urls: + for artifact_url in artifact_urls: + links.append( + LINK_TEMPLATE.format( + text=_get_html_url_name(artifact_url), url=artifact_url + ) + ) + row.append(f"{link_separator.join(links)}") - row += f"{build_result.comment}" + row.append(f"{build_result.comment}") - row += "" - rows += row + row.append("") + rows.append("".join(row)) return HTML_BASE_BUILD_TEMPLATE.format( title=_format_header(header, branch_name), header=_format_header(header, branch_name, branch_url), - rows=rows, + rows="".join(rows), task_url=task_url, branch_name=branch_name, commit_url=commit_url, diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 330a1309016..db98a2c1ab5 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -108,7 +108,7 @@ def main(): gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) - description_error, category = check_pr_description(pr_info.body) + description_error, category = check_pr_description(pr_info.body, GITHUB_REPOSITORY) pr_labels_to_add = [] pr_labels_to_remove = [] if ( @@ -137,17 +137,20 @@ def main(): if pr_labels_to_remove: remove_labels(gh, pr_info, pr_labels_to_remove) - if FEATURE_LABEL in pr_info.labels: - print(f"The '{FEATURE_LABEL}' in the labels, expect the 'Docs Check' status") + if FEATURE_LABEL in pr_info.labels and not pr_info.has_changes_in_documentation(): + print( + f"The '{FEATURE_LABEL}' in the labels, " + "but there's no changed documentation" + ) post_commit_status( # do not pass pr_info here intentionally commit, - "pending", + "failure", NotSet, f"expect adding docs for {FEATURE_LABEL}", DOCS_NAME, + pr_info, ) - elif not description_error: - set_mergeable_check(commit, "skipped") + sys.exit(1) if description_error: print( @@ -173,6 +176,7 @@ def main(): ) sys.exit(1) + set_mergeable_check(commit, "skipped") ci_report_url = create_ci_report(pr_info, []) if not can_run: print("::notice ::Cannot run") diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 9ee0a431294..8cb33400c13 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -52,12 +52,14 @@ class S3Helper: self.host = S3_URL self.download_host = S3_DOWNLOAD - def _upload_file_to_s3(self, bucket_name: str, file_path: str, s3_path: str) -> str: + def _upload_file_to_s3( + self, bucket_name: str, file_path: Path, s3_path: str + ) -> str: logging.debug( "Start uploading %s to bucket=%s path=%s", file_path, bucket_name, s3_path ) metadata = {} - if os.path.getsize(file_path) < 64 * 1024 * 1024: + if file_path.stat().st_size < 64 * 1024 * 1024: if ( s3_path.endswith("txt") or s3_path.endswith("log") @@ -92,46 +94,46 @@ class S3Helper: file_path, ) else: - logging.info("No content type provied for %s", file_path) + logging.info("No content type provided for %s", file_path) else: if re.search(r"\.(txt|log|err|out)$", s3_path) or re.search( r"\.log\..*(? str: + def upload_test_report_to_s3(self, file_path: Path, s3_path: str) -> str: if CI: return self._upload_file_to_s3(S3_TEST_REPORTS_BUCKET, file_path, s3_path) - else: - return S3Helper.copy_file_to_local( - S3_TEST_REPORTS_BUCKET, file_path, s3_path - ) - def upload_build_file_to_s3(self, file_path, s3_path): + return S3Helper.copy_file_to_local(S3_TEST_REPORTS_BUCKET, file_path, s3_path) + + def upload_build_file_to_s3(self, file_path: Path, s3_path: str) -> str: if CI: return self._upload_file_to_s3(S3_BUILDS_BUCKET, file_path, s3_path) - else: - return S3Helper.copy_file_to_local(S3_BUILDS_BUCKET, file_path, s3_path) + + return S3Helper.copy_file_to_local(S3_BUILDS_BUCKET, file_path, s3_path) def fast_parallel_upload_dir( - self, dir_path: Union[str, Path], s3_dir_path: str, bucket_name: str + self, dir_path: Path, s3_dir_path: str, bucket_name: str ) -> List[str]: all_files = [] @@ -191,37 +193,37 @@ class S3Helper: logging.basicConfig(level=original_level) return result - def _upload_folder_to_s3( + def _upload_directory_to_s3( self, - folder_path, - s3_folder_path, - bucket_name, - keep_dirs_in_s3_path, - upload_symlinks, - ): + directory_path: Path, + s3_directory_path: str, + bucket_name: str, + keep_dirs_in_s3_path: bool, + upload_symlinks: bool, + ) -> List[str]: logging.info( - "Upload folder '%s' to bucket=%s of s3 folder '%s'", - folder_path, + "Upload directory '%s' to bucket=%s of s3 directory '%s'", + directory_path, bucket_name, - s3_folder_path, + s3_directory_path, ) - if not os.path.exists(folder_path): + if not directory_path.exists(): return [] - files = os.listdir(folder_path) + files = list(directory_path.iterdir()) if not files: return [] p = Pool(min(len(files), 5)) - def task(file_name): - full_fs_path = os.path.join(folder_path, file_name) + def task(file_path: Path) -> Union[str, List[str]]: + full_fs_path = file_path.absolute() if keep_dirs_in_s3_path: - full_s3_path = s3_folder_path + "/" + os.path.basename(folder_path) + full_s3_path = os.path.join(s3_directory_path, directory_path.name) else: - full_s3_path = s3_folder_path + full_s3_path = s3_directory_path if os.path.isdir(full_fs_path): - return self._upload_folder_to_s3( + return self._upload_directory_to_s3( full_fs_path, full_s3_path, bucket_name, @@ -229,60 +231,63 @@ class S3Helper: upload_symlinks, ) - if os.path.islink(full_fs_path): + if full_fs_path.is_symlink(): if upload_symlinks: if CI: return self._upload_file_to_s3( - bucket_name, full_fs_path, full_s3_path + "/" + file_name - ) - else: - return S3Helper.copy_file_to_local( - bucket_name, full_fs_path, full_s3_path + "/" + file_name + bucket_name, + full_fs_path, + full_s3_path + "/" + file_path.name, ) + return S3Helper.copy_file_to_local( + bucket_name, full_fs_path, full_s3_path + "/" + file_path.name + ) return [] if CI: return self._upload_file_to_s3( - bucket_name, full_fs_path, full_s3_path + "/" + file_name - ) - else: - return S3Helper.copy_file_to_local( - bucket_name, full_fs_path, full_s3_path + "/" + file_name + bucket_name, full_fs_path, full_s3_path + "/" + file_path.name ) + return S3Helper.copy_file_to_local( + bucket_name, full_fs_path, full_s3_path + "/" + file_path.name + ) + return sorted(_flatten_list(list(p.map(task, files)))) - def upload_build_folder_to_s3( + def upload_build_directory_to_s3( self, - folder_path, - s3_folder_path, - keep_dirs_in_s3_path=True, - upload_symlinks=True, - ): - return self._upload_folder_to_s3( - folder_path, - s3_folder_path, + directory_path: Path, + s3_directory_path: str, + keep_dirs_in_s3_path: bool = True, + upload_symlinks: bool = True, + ) -> List[str]: + return self._upload_directory_to_s3( + directory_path, + s3_directory_path, S3_BUILDS_BUCKET, keep_dirs_in_s3_path, upload_symlinks, ) - def upload_test_folder_to_s3( + def upload_test_directory_to_s3( self, - folder_path, - s3_folder_path, - keep_dirs_in_s3_path=True, - upload_symlinks=True, - ): - return self._upload_folder_to_s3( - folder_path, - s3_folder_path, + directory_path: Path, + s3_directory_path: str, + keep_dirs_in_s3_path: bool = True, + upload_symlinks: bool = True, + ) -> List[str]: + return self._upload_directory_to_s3( + directory_path, + s3_directory_path, S3_TEST_REPORTS_BUCKET, keep_dirs_in_s3_path, upload_symlinks, ) - def list_prefix(self, s3_prefix_path, bucket=S3_BUILDS_BUCKET): + def list_prefix( + self, s3_prefix_path: str, bucket: str = S3_BUILDS_BUCKET + ) -> List[str]: objects = self.client.list_objects_v2(Bucket=bucket, Prefix=s3_prefix_path) result = [] if "Contents" in objects: @@ -291,7 +296,7 @@ class S3Helper: return result - def exists(self, key, bucket=S3_BUILDS_BUCKET): + def exists(self, key: str, bucket: str = S3_BUILDS_BUCKET) -> bool: try: self.client.head_object(Bucket=bucket, Key=key) return True @@ -299,13 +304,12 @@ class S3Helper: return False @staticmethod - def copy_file_to_local(bucket_name: str, file_path: str, s3_path: str) -> str: - local_path = os.path.abspath( - os.path.join(RUNNER_TEMP, "s3", bucket_name, s3_path) - ) - local_dir = os.path.dirname(local_path) - if not os.path.exists(local_dir): - os.makedirs(local_dir) + def copy_file_to_local(bucket_name: str, file_path: Path, s3_path: str) -> str: + local_path = ( + Path(RUNNER_TEMP) / "s3" / os.path.join(bucket_name, s3_path) + ).absolute() + local_dir = local_path.parent + local_dir.mkdir(parents=True, exist_ok=True) shutil.copy(file_path, local_path) logging.info("Copied %s to %s", file_path, local_path) diff --git a/utils/ci-slack-bot/ci-slack-bot.py b/tests/ci/slack_bot_ci_lambda/app.py similarity index 88% rename from utils/ci-slack-bot/ci-slack-bot.py rename to tests/ci/slack_bot_ci_lambda/app.py index ea883e3cda3..45e14138335 100755 --- a/utils/ci-slack-bot/ci-slack-bot.py +++ b/tests/ci/slack_bot_ci_lambda/app.py @@ -1,28 +1,28 @@ #!/usr/bin/env python3 -# A trivial stateless slack bot that notifies about new broken tests in ClickHouse CI. -# It checks what happened to our CI during the last check_period hours (1 hour) and notifies us in slack if necessary. -# This script should be executed once each check_period hours (1 hour). -# It will post duplicate messages if you run it more often; it will lose some messages if you run it less often. -# -# You can run it locally with no arguments, it will work in a dry-run mode. Or you can set your own SLACK_URL_DEFAULT. -# Feel free to add more checks, more details to messages, or better heuristics. -# NOTE There's no deployment automation for now, -# an AWS Lambda (slack-ci-bot-test lambda in CI-CD) has to be updated manually after changing this script. -# -# See also: https://aretestsgreenyet.com/ +""" +A trivial stateless slack bot that notifies about new broken tests in ClickHouse CI. +It checks what happened to our CI during the last check_period hours (1 hour) and + notifies us in slack if necessary. +This script should be executed once each check_period hours (1 hour). +It will post duplicate messages if you run it more often; it will lose some messages + if you run it less often. + +You can run it locally with no arguments, it will work in a dry-run mode. + Or you can set your own SLACK_URL_DEFAULT. +Feel free to add more checks, more details to messages, or better heuristics. + +It's deployed to slack-bot-ci-lambda in CI/CD account + +See also: https://aretestsgreenyet.com/ +""" import os import json import base64 import random -if os.environ.get("AWS_LAMBDA_ENV", "0") == "1": - # For AWS labmda (python 3.7) - from botocore.vendored import requests -else: - # For running locally - import requests +import requests # type: ignore DRY_RUN_MARK = "" @@ -34,7 +34,8 @@ REPORT_NO_FAILURES_PROBABILITY = 0.99 MAX_TESTS_TO_REPORT = 4 -# Slack has a stupid limitation on message size, it splits long messages into multiple ones breaking formatting +# Slack has a stupid limitation on message size, it splits long messages into multiple, +# ones breaking formatting MESSAGE_LENGTH_LIMIT = 4000 # Find tests that failed in master during the last check_period * 24 hours, @@ -61,7 +62,7 @@ WHERE 1 AND test_name NOT IN ( SELECT test_name FROM checks WHERE 1 AND check_start_time >= now - INTERVAL 1 MONTH - AND (check_start_time + check_duration_ms / 1000) BETWEEN now - INTERVAL 2 WEEK AND now - INTERVAL extended_check_period HOUR + AND (check_start_time + check_duration_ms / 1000) BETWEEN now - INTERVAL 2 WEEK AND now - INTERVAL extended_check_period HOUR AND pull_request_number = 0 AND check_status != 'success' AND test_status LIKE 'F%') @@ -95,11 +96,11 @@ FAILED_CHECKS_PERCENTAGE_QUERY = """ SELECT if(toHour(now('Europe/Amsterdam')) = 12, v, 0) FROM ( - SELECT - countDistinctIf((commit_sha, check_name), (test_status LIKE 'F%') AND (check_status != 'success')) + SELECT + countDistinctIf((commit_sha, check_name), (test_status LIKE 'F%') AND (check_status != 'success')) / countDistinct((commit_sha, check_name)) AS v FROM checks - WHERE 1 + WHERE 1 AND (pull_request_number = 0) AND (test_status != 'SKIPPED') AND (check_start_time > (now() - toIntervalDay(1))) @@ -111,7 +112,7 @@ ALL_RECENT_FAILURES_QUERY = """ WITH '{}' AS name_substr, 90 AS interval_days, - ('Stateless tests (asan)', 'Stateless tests (address)', 'Stateless tests (address, actions)') AS backport_and_release_specific_checks + ('Stateless tests (asan)', 'Stateless tests (address)', 'Stateless tests (address, actions)', 'Integration tests (asan) [1/3]', 'Stateless tests (tsan) [1/3]') AS backport_and_release_specific_checks SELECT toStartOfDay(check_start_time) AS d, count(), @@ -315,14 +316,14 @@ def check_and_alert(): ) -def lambda_handler(event, context): +def handler(event, context): try: check_and_alert() return {"statusCode": 200, "body": "OK"} except Exception as e: send_to_slack( - "I failed, please help me (see ClickHouse/utils/ci-slack-bot/ci-slack-bot.py): " - + str(e) + "I failed, please help me " + f"(see ClickHouse/ClickHouse/tests/ci/slack_bot_ci_lambda/app.py): {e}" ) return {"statusCode": 200, "body": "FAIL"} diff --git a/tests/ci/slack_bot_ci_lambda/build_and_deploy_archive.sh b/tests/ci/slack_bot_ci_lambda/build_and_deploy_archive.sh new file mode 120000 index 00000000000..96ba3fa024e --- /dev/null +++ b/tests/ci/slack_bot_ci_lambda/build_and_deploy_archive.sh @@ -0,0 +1 @@ +../team_keys_lambda/build_and_deploy_archive.sh \ No newline at end of file diff --git a/tests/ci/slack_bot_ci_lambda/requirements.txt b/tests/ci/slack_bot_ci_lambda/requirements.txt new file mode 100644 index 00000000000..098e04a9798 --- /dev/null +++ b/tests/ci/slack_bot_ci_lambda/requirements.txt @@ -0,0 +1 @@ +../lambda_shared_package diff --git a/tests/ci/sqltest.py b/tests/ci/sqltest.py new file mode 100644 index 00000000000..a4eb1b23349 --- /dev/null +++ b/tests/ci/sqltest.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 + +import logging +import subprocess +import os +import sys +from pathlib import Path +from typing import Dict + +from github import Github + +from build_download_helper import get_build_name_for_check, read_build_urls +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from commit_status_helper import ( + RerunHelper, + get_commit, + post_commit_status, +) +from docker_pull_helper import get_image_with_version +from env_helper import ( + GITHUB_RUN_URL, + REPORTS_PATH, + TEMP_PATH, +) +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResult +from s3_helper import S3Helper +from stopwatch import Stopwatch + +IMAGE_NAME = "clickhouse/sqltest" + + +def get_run_command(pr_number, sha, download_url, workspace_path, image): + return ( + f"docker run " + # For sysctl + "--privileged " + "--network=host " + f"--volume={workspace_path}:/workspace " + "--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE " + f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" ' + f"{image}" + ) + + +def main(): + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + temp_path = Path(TEMP_PATH) + reports_path = Path(REPORTS_PATH) + + check_name = sys.argv[1] + + temp_path.mkdir(parents=True, exist_ok=True) + + pr_info = PRInfo() + + gh = Github(get_best_robot_token(), per_page=100) + commit = get_commit(gh, pr_info.sha) + + rerun_helper = RerunHelper(commit, check_name) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + + docker_image = get_image_with_version(reports_path, IMAGE_NAME) + + build_name = get_build_name_for_check(check_name) + print(build_name) + urls = read_build_urls(build_name, reports_path) + if not urls: + raise Exception("No build URLs found") + + for url in urls: + if url.endswith("/clickhouse"): + build_url = url + break + else: + raise Exception("Cannot find the clickhouse binary among build results") + + logging.info("Got build url %s", build_url) + + workspace_path = temp_path / "workspace" + if not os.path.exists(workspace_path): + os.makedirs(workspace_path) + + run_command = get_run_command( + pr_info.number, pr_info.sha, build_url, workspace_path, docker_image + ) + logging.info("Going to run %s", run_command) + + run_log_path = temp_path / "run.log" + with open(run_log_path, "w", encoding="utf-8") as log: + with subprocess.Popen( + run_command, shell=True, stderr=log, stdout=log + ) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + + check_name_lower = ( + check_name.lower().replace("(", "").replace(")", "").replace(" ", "") + ) + s3_prefix = f"{pr_info.number}/{pr_info.sha}/sqltest_{check_name_lower}/" + paths = { + "run.log": run_log_path, + "server.log.zst": workspace_path / "server.log.zst", + "server.err.log.zst": workspace_path / "server.err.log.zst", + "report.html": workspace_path / "report.html", + "test.log": workspace_path / "test.log", + } + path_urls = {} # type: Dict[str, str] + + s3_helper = S3Helper() + for f in paths: + try: + path_urls[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + f) + except Exception as ex: + logging.info("Exception uploading file %s text %s", f, ex) + path_urls[f] = "" + + report_url = GITHUB_RUN_URL + if path_urls["report.html"]: + report_url = path_urls["report.html"] + + status = "success" + description = "See the report" + test_result = TestResult(description, "OK") + + ch_helper = ClickHouseHelper() + + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + [test_result], + status, + stopwatch.duration_seconds, + stopwatch.start_time_str, + report_url, + check_name, + ) + + ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + + logging.info("Result: '%s', '%s', '%s'", status, description, report_url) + print(f"::notice ::Report url: {report_url}") + post_commit_status(commit, status, report_url, description, check_name, pr_info) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 6d17384c63f..2c566144f2c 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -125,6 +125,8 @@ def prepare_for_hung_check(drop_databases): # However, it obstruct checking for hung queries. logging.info("Will terminate gdb (if any)") call_with_retry("kill -TERM $(pidof gdb)") + # Sometimes there is a message `Child process was stopped by signal 19` in logs after stopping gdb + call_with_retry("kill -CONT $(lsof -ti:9000)") # ThreadFuzzer significantly slows down server and causes false-positive hung check failures call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'") @@ -200,13 +202,14 @@ def prepare_for_hung_check(drop_databases): call( make_query_command( """ - select sleepEachRow(( - select maxOrDefault(300 - elapsed) + 1 - from system.processes - where query not like '%from system.processes%' and elapsed < 300 + SELECT sleepEachRow(( + SELECT maxOrDefault(300 - elapsed) + 1 + FROM system.processes + WHERE query NOT LIKE '%FROM system.processes%' AND elapsed < 300 ) / 300) - from numbers(300) - format Null + FROM numbers(300) + FORMAT Null + SETTINGS function_sleep_max_microseconds_per_block = 0 """ ), shell=True, @@ -299,7 +302,7 @@ if __name__ == "__main__": have_long_running_queries = prepare_for_hung_check(args.drop_databases) except Exception as ex: have_long_running_queries = True - logging.error("Failed to prepare for hung check %s", str(ex)) + logging.error("Failed to prepare for hung check: %s", str(ex)) logging.info("Checking if some queries hung") cmd = " ".join( [ diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 895eb318bc4..7bac4e1d511 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -12,11 +12,12 @@ from github import Github from build_download_helper import download_all_deb_packages from clickhouse_helper import ( + CiLogsCredentials, ClickHouseHelper, prepare_tests_results_for_clickhouse, ) from commit_status_helper import RerunHelper, get_commit, post_commit_status -from docker_pull_helper import get_image_with_version +from docker_pull_helper import DockerImage, get_image_with_version from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH from get_robot_token import get_best_robot_token from pr_info import PRInfo @@ -28,18 +29,24 @@ from upload_result_helper import upload_results def get_run_command( - build_path, result_folder, repo_tests_path, server_log_folder, image -): + build_path: str, + result_path: str, + repo_tests_path: str, + server_log_path: str, + ci_logs_args: str, + image: DockerImage, +) -> str: cmd = ( "docker run --cap-add=SYS_PTRACE " - # a static link, don't use S3_URL or S3_DOWNLOAD - "-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets' " # For dmesg and sysctl "--privileged " + # a static link, don't use S3_URL or S3_DOWNLOAD + "-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets' " + f"{ci_logs_args}" f"--volume={build_path}:/package_folder " - f"--volume={result_folder}:/test_output " + f"--volume={result_path}:/test_output " f"--volume={repo_tests_path}:/usr/share/clickhouse-test " - f"--volume={server_log_folder}:/var/log/clickhouse-server {image} " + f"--volume={server_log_path}:/var/log/clickhouse-server {image} " ) return cmd @@ -147,11 +154,20 @@ def run_stress_test(docker_image_name): os.makedirs(result_path) run_log_path = os.path.join(temp_path, "run.log") + ci_logs_credentials = CiLogsCredentials(Path(temp_path) / "export-logs-config.sh") + ci_logs_args = ci_logs_credentials.get_docker_arguments( + pr_info, stopwatch.start_time_str, check_name + ) run_command = get_run_command( - packages_path, result_path, repo_tests_path, server_log_path, docker_image + packages_path, + result_path, + repo_tests_path, + server_log_path, + ci_logs_args, + docker_image, ) - logging.info("Going to run func tests: %s", run_command) + logging.info("Going to run stress test: %s", run_command) with TeePopen(run_command, run_log_path, timeout=60 * 150) as process: retcode = process.wait() @@ -161,6 +177,7 @@ def run_stress_test(docker_image_name): logging.info("Run failed") subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + ci_logs_credentials.clean_ci_logs_from_credentials(Path(run_log_path)) s3_helper = S3Helper() state, description, test_results, additional_logs = process_results( diff --git a/tests/ci/stress_tests.lib b/tests/ci/stress_tests.lib index 190f3f39f9e..e56369ce161 100644 --- a/tests/ci/stress_tests.lib +++ b/tests/ci/stress_tests.lib @@ -9,8 +9,6 @@ FAIL="\tFAIL\t\\N\t" FAILURE_CONTEXT_LINES=100 FAILURE_CONTEXT_MAX_LINE_WIDTH=300 -source attach_gdb.lib - function escaped() { # That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language. @@ -266,7 +264,7 @@ function collect_query_and_trace_logs() { for table in query_log trace_log do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: + clickhouse-local --config-file=/etc/clickhouse-server/config.xml --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: done } @@ -277,3 +275,5 @@ function collect_core_dumps() mv $core.zst /test_output/ done } + +# vi: ft=bash diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh index 02d5638cf18..a55c1bb2b3b 100644 --- a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh +++ b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh @@ -23,7 +23,7 @@ cp app.py "$PACKAGE" if [ -f requirements.txt ]; then VENV=lambda-venv rm -rf "$VENV" lambda-package.zip - docker run --rm --user="${UID}" -e HOME=/tmp --entrypoint=/bin/bash \ + docker run --net=host --rm --user="${UID}" -e HOME=/tmp --entrypoint=/bin/bash \ --volume="${WORKDIR}/..:/ci" --workdir="/ci/${DIR_NAME}" "${DOCKER_IMAGE}" \ -exc " '$PY_EXEC' -m venv '$VENV' && diff --git a/tests/ci/terminate_runner_lambda/app.py b/tests/ci/terminate_runner_lambda/app.py index ed198d855b9..010f7dd6734 100644 --- a/tests/ci/terminate_runner_lambda/app.py +++ b/tests/ci/terminate_runner_lambda/app.py @@ -9,13 +9,13 @@ from typing import Any, Dict, List import boto3 # type: ignore -from lambda_shared import RunnerDescriptions, list_runners +from lambda_shared import RunnerDescriptions, list_runners, cached_value_is_valid from lambda_shared.token import get_access_token_by_key_app, get_cached_access_token @dataclass class CachedInstances: - time: int + time: float value: dict updating: bool = False @@ -27,17 +27,12 @@ def get_cached_instances() -> dict: """return cached instances description with updating it once per five minutes""" if time.time() - 250 < cached_instances.time or cached_instances.updating: return cached_instances.value - # Indicate that the value is updating now, so the cached value can be - # used. The first setting and close-to-ttl are not counted as update - if cached_instances.time != 0 or time.time() - 300 < cached_instances.time: - cached_instances.updating = True - else: - cached_instances.updating = False + cached_instances.updating = cached_value_is_valid(cached_instances.time, 300) ec2_client = boto3.client("ec2") instances_response = ec2_client.describe_instances( Filters=[{"Name": "instance-state-name", "Values": ["running"]}] ) - cached_instances.time = int(time.time()) + cached_instances.time = time.time() cached_instances.value = { instance["InstanceId"]: instance for reservation in instances_response["Reservations"] @@ -47,6 +42,28 @@ def get_cached_instances() -> dict: return cached_instances.value +@dataclass +class CachedRunners: + time: float + value: RunnerDescriptions + updating: bool = False + + +cached_runners = CachedRunners(0, []) + + +def get_cached_runners(access_token: str) -> RunnerDescriptions: + """From time to time request to GH api costs up to 3 seconds, and + it's a disaster from the termination lambda perspective""" + if time.time() - 5 < cached_runners.time or cached_instances.updating: + return cached_runners.value + cached_runners.updating = cached_value_is_valid(cached_runners.time, 15) + cached_runners.value = list_runners(access_token) + cached_runners.time = time.time() + cached_runners.updating = False + return cached_runners.value + + def how_many_instances_to_kill(event_data: dict) -> Dict[str, int]: data_array = event_data["CapacityToTerminate"] to_kill_by_zone = {} # type: Dict[str, int] @@ -104,7 +121,7 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]: ) print("Time spent on the requests to AWS: ", time.time() - start) - runners = list_runners(access_token) + runners = get_cached_runners(access_token) runner_ids = set(runner.name for runner in runners) # We used to delete potential hosts to terminate from GitHub runners pool, # but the documentation states: diff --git a/tests/ci/tests/docker_images.json b/tests/ci/tests/docker_images_for_tests.json similarity index 96% rename from tests/ci/tests/docker_images.json rename to tests/ci/tests/docker_images_for_tests.json index 0d40d43c33f..70db8760561 100644 --- a/tests/ci/tests/docker_images.json +++ b/tests/ci/tests/docker_images_for_tests.json @@ -119,7 +119,8 @@ "docker/test/stateless", "docker/test/integration/base", "docker/test/fuzzer", - "docker/test/keeper-jepsen" + "docker/test/keeper-jepsen", + "docker/test/sqltest" ] }, "docker/test/integration/kerberized_hadoop": { @@ -153,5 +154,9 @@ "docker/test/sqllogic": { "name": "clickhouse/sqllogic-test", "dependent": [] + }, + "docker/test/sqltest": { + "name": "clickhouse/sqltest", + "dependent": [] } } diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index fbb89ef8078..ef5f582dea5 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -34,7 +34,7 @@ def process_logs( test_result.log_urls.append(processed_logs[path]) elif path: url = s3_client.upload_test_report_to_s3( - path.as_posix(), s3_path_prefix + "/" + path.name + path, s3_path_prefix + "/" + path.name ) test_result.log_urls.append(url) processed_logs[path] = url @@ -44,7 +44,7 @@ def process_logs( if log_path: additional_urls.append( s3_client.upload_test_report_to_s3( - log_path, s3_path_prefix + "/" + os.path.basename(log_path) + Path(log_path), s3_path_prefix + "/" + os.path.basename(log_path) ) ) @@ -100,9 +100,9 @@ def upload_results( additional_urls, statuscolors=statuscolors, ) - with open("report.html", "w", encoding="utf-8") as f: - f.write(html_report) + report_path = Path("report.html") + report_path.write_text(html_report, encoding="utf-8") - url = s3_client.upload_test_report_to_s3("report.html", s3_path_prefix + ".html") + url = s3_client.upload_test_report_to_s3(report_path, s3_path_prefix + ".html") logging.info("Search result in url %s", url) return url diff --git a/tests/ci/utils.lib b/tests/ci/utils.lib index b5ce4ae0d78..1204434d853 100644 --- a/tests/ci/utils.lib +++ b/tests/ci/utils.lib @@ -2,6 +2,11 @@ function run_with_retry() { + if [[ $- =~ e ]]; then + set_e=true + else + set_e=false + fi set +e local total_retries="$1" @@ -12,7 +17,9 @@ function run_with_retry() until [ "$retry" -ge "$total_retries" ] do if "$@"; then - set -e + if $set_e; then + set -e + fi return else retry=$((retry + 1)) @@ -26,4 +33,6 @@ function run_with_retry() function fn_exists() { declare -F "$1" > /dev/null; -} \ No newline at end of file +} + +# vi: ft=bash diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 15229c3d21d..fb046e989a9 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -245,8 +245,10 @@ def get_version_from_string( def get_version_from_tag(tag: str) -> ClickHouseVersion: Git.check_tag(tag) - tag = tag[1:].split("-")[0] - return get_version_from_string(tag) + tag, description = tag[1:].split("-", 1) + version = get_version_from_string(tag) + version.with_description(description) + return version def version_arg(version: str) -> ClickHouseVersion: diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index 171dd743f90..d452c985407 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -12,7 +12,8 @@ echo "Running init script" export DEBIAN_FRONTEND=noninteractive export RUNNER_HOME=/home/ubuntu/actions-runner -export RUNNER_URL="https://github.com/ClickHouse" +export RUNNER_ORG="ClickHouse" +export RUNNER_URL="https://github.com/${RUNNER_ORG}" # Funny fact, but metadata service has fixed IP INSTANCE_ID=$(ec2metadata --instance-id) export INSTANCE_ID @@ -102,7 +103,8 @@ check_proceed_spot_termination() { runner_pid=$(pgrep Runner.Listener) if [ -n "$runner_pid" ]; then # Kill the runner to not allow it cancelling the job - kill -9 "$runner_pid" + # shellcheck disable=SC2046 + kill -9 $(list_children "$runner_pid") fi sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" terminate_and_exit @@ -171,6 +173,7 @@ set -uo pipefail echo "Runner's public DNS: $(ec2metadata --public-hostname)" echo "Runner's labels: ${LABELS}" +echo "Runner's instance type: $(ec2metadata --instance-type)" EOF # Create a post-run script that will restart docker daemon before the job started @@ -234,6 +237,19 @@ is_job_assigned() { || return 1 } +list_children () { + local children + children=$(ps --ppid "$1" -o pid=) + if [ -z "$children" ]; then + return + fi + + for pid in $children; do + list_children "$pid" + done + echo "$children" +} + while true; do runner_pid=$(pgrep Runner.Listener) echo "Got runner pid '$runner_pid'" @@ -268,17 +284,11 @@ while true; do RUNNER_AGE=$(( $(date +%s) - $(stat -c +%Y /proc/"$runner_pid" 2>/dev/null || date +%s) )) echo "The runner is launched $RUNNER_AGE seconds ago and still has hot received the job" if (( 60 < RUNNER_AGE )); then - echo "Check if the instance should tear down" - if ! no_terminating_metadata; then - # Another check if the worker still didn't start - if is_job_assigned; then - echo "During the metadata check the job was assigned, continue" - continue - fi - kill -9 "$runner_pid" - sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" - terminate_on_event - fi + echo "Attempt to delete the runner for a graceful shutdown" + sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \ + || continue + echo "Runner didn't launch or have assigned jobs after ${RUNNER_AGE} seconds, shutting down" + terminate_and_exit fi fi sleep 5 diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 5e2331ece3c..e511d773577 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -64,6 +64,7 @@ NEED_RERUN_WORKFLOWS = { "DocsCheck", "MasterCI", "NightlyBuilds", + "PublishedReleaseCI", "PullRequestCI", "ReleaseBranchCI", } diff --git a/tests/clickhouse-test b/tests/clickhouse-test index abd109d00b2..b9e2f4ddbe6 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -383,19 +383,19 @@ def get_stacktraces_from_clickhouse(args): ) replicated_msg = ( f"{args.client} {settings_str} --query " - '"SELECT materialize((hostName(), tcpPort())) as host, thread_id, ' + '"SELECT materialize((hostName(), tcpPort())) as host, thread_name, thread_id, query_id, trace, ' "arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), " "arrayMap(x -> addressToLine(x), trace), " - "arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace " + "arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace_str " "FROM clusterAllReplicas('test_cluster_database_replicated', 'system.stack_trace') " 'ORDER BY host, thread_id FORMAT Vertical"' ) msg = ( f"{args.client} {settings_str} --query " - "\"SELECT arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), " + "\"SELECT thread_name, thread_id, query_id, trace, arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), " "arrayMap(x -> addressToLine(x), trace), " - "arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace " + "arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace_str " 'FROM system.stack_trace FORMAT Vertical"' ) @@ -505,7 +505,6 @@ class FailureReason(enum.Enum): REPLICATED_DB = "replicated-database" S3_STORAGE = "s3-storage" BUILD = "not running for current build" - NO_UPGRADE_CHECK = "not running for upgrade check" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" # UNKNOWN reasons @@ -529,6 +528,12 @@ def threshold_generator(always_on_prob, always_off_prob, min_val, max_val): return gen +# To keep dependency list as short as possible, tzdata is not used here (to +# avoid try/except block for import) +def get_localzone(): + return os.getenv("TZ", "/".join(os.readlink("/etc/localtime").split("/")[-2:])) + + class SettingsRandomizer: settings = { "max_insert_threads": lambda: 0 @@ -602,20 +607,39 @@ class SettingsRandomizer: "enable_memory_bound_merging_of_aggregation_results": lambda: random.randint( 0, 1 ), + "min_count_to_compile_expression": lambda: random.choice([0, 3]), + "min_count_to_compile_aggregate_expression": lambda: random.choice([0, 3]), + "min_count_to_compile_sort_description": lambda: random.choice([0, 3]), + "session_timezone": lambda: random.choice( + [ + # special non-deterministic around 1970 timezone, see [1]. + # + # [1]: https://github.com/ClickHouse/ClickHouse/issues/42653 + "America/Mazatlan", + "America/Hermosillo", + "Mexico/BajaSur", + # These timezones had DST transitions on some unusual dates (e.g. 2000-01-15 12:00:00). + "Africa/Khartoum", + "Africa/Juba", + # server default that is randomized across all timezones + # NOTE: due to lots of trickery we cannot use empty timezone here, but this should be the same. + get_localzone(), + ] + ), } @staticmethod def get_random_settings(args): - random_settings = [] + random_settings = {} is_debug = BuildFlags.DEBUG in args.build_flags for setting, generator in SettingsRandomizer.settings.items(): if ( is_debug and setting == "allow_prefetched_read_pool_for_remote_filesystem" ): - random_settings.append(f"{setting}=0") + random_settings[setting] = 0 else: - random_settings.append(f"{setting}={generator()}") + random_settings[setting] = generator() return random_settings @@ -651,10 +675,10 @@ class MergeTreeSettingsRandomizer: @staticmethod def get_random_settings(args): - random_settings = [] + random_settings = {} for setting, generator in MergeTreeSettingsRandomizer.settings.items(): if setting not in args.changed_merge_tree_settings: - random_settings.append(f"{setting}={generator()}") + random_settings[setting] = generator() return random_settings @@ -766,7 +790,14 @@ class TestCase: @staticmethod def cli_format_settings(settings_list) -> str: - return " ".join([f"--{setting}" for setting in settings_list]) + out = [] + for k, v in settings_list.items(): + out.extend([f"--{k}", str(v)]) + return " ".join(out) + + @staticmethod + def http_format_settings(settings_list) -> str: + return urllib.parse.urlencode(settings_list) def has_show_create_table_in_test(self): return not subprocess.call(["grep", "-iq", "show create", self.case_file]) @@ -774,11 +805,12 @@ class TestCase: def add_random_settings(self, client_options): new_options = "" if self.randomize_settings: + http_params = self.http_format_settings(self.random_settings) if len(self.base_url_params) == 0: - os.environ["CLICKHOUSE_URL_PARAMS"] = "&".join(self.random_settings) + os.environ["CLICKHOUSE_URL_PARAMS"] = http_params else: os.environ["CLICKHOUSE_URL_PARAMS"] = ( - self.base_url_params + "&" + "&".join(self.random_settings) + self.base_url_params + "&" + http_params ) new_options += f" {self.cli_format_settings(self.random_settings)}" @@ -919,14 +951,18 @@ class TestCase: elif tags and ("no-replicated-database" in tags) and args.replicated_database: return FailureReason.REPLICATED_DB - # TODO: remove checking "no-upgrade-check" after 23.1 - elif args.upgrade_check and ( - "no-upgrade-check" in tags or "no-upgrade-check" in tags - ): - return FailureReason.NO_UPGRADE_CHECK - elif tags and ("no-s3-storage" in tags) and args.s3_storage: return FailureReason.S3_STORAGE + elif ( + tags + and ("no-s3-storage-with-slow-build" in tags) + and args.s3_storage + and ( + BuildFlags.THREAD in args.build_flags + or BuildFlags.DEBUG in args.build_flags + ) + ): + return FailureReason.S3_STORAGE elif tags: for build_flag in args.build_flags: @@ -1163,8 +1199,18 @@ class TestCase: # This is for .sh tests os.environ["CLICKHOUSE_LOG_COMMENT"] = args.testcase_basename + query_params = "" + if "need-query-parameters" in self.tags: + query_params = ( + " --param_CLICKHOUSE_DATABASE=" + + database + + " --param_CLICKHOUSE_DATABASE_1=" + + database + + "_1" + ) + params = { - "client": client + " --database=" + database, + "client": client + " --database=" + database + query_params, "logs_level": server_logs_level, "options": client_options, "test": self.case_file, @@ -1192,15 +1238,6 @@ class TestCase: ).total_seconds() < args.timeout and proc.poll() is None: sleep(0.01) - need_drop_database = not args.database - if need_drop_database and args.no_drop_if_fail: - maybe_passed = ( - (proc.returncode == 0) - and (proc.stderr is None) - and (proc.stdout is None or "Exception" not in proc.stdout) - ) - need_drop_database = maybe_passed - debug_log = "" if os.path.exists(self.testcase_args.debug_log_file): with open(self.testcase_args.debug_log_file, "rb") as stream: @@ -1208,65 +1245,6 @@ class TestCase: debug_log += str(stream.read(), errors="replace", encoding="utf-8") debug_log += "\n" - if need_drop_database: - seconds_left = max( - args.timeout - (datetime.now() - start_time).total_seconds(), 20 - ) - - # Check if the test does not cleanup its tables. - # Only for newly added tests. Please extend this check to the old tests as well. - if self.case_file >= "02800": - leftover_tables = ( - clickhouse_execute( - args, - f"SHOW TABLES FROM {database}", - timeout=seconds_left, - settings={ - "log_comment": args.testcase_basename, - }, - ) - .decode() - .replace("\n", ", ") - ) - - if len(leftover_tables) != 0: - raise Exception( - f"The test should cleanup its tables ({leftover_tables}), otherwise it is inconvenient for running it locally." - ) - - drop_database_query = f"DROP DATABASE IF EXISTS {database}" - if args.replicated_database: - drop_database_query += " ON CLUSTER test_cluster_database_replicated" - - try: - # It's possible to get an error "New table appeared in database being dropped or detached. Try again." - for _ in range(1, 60): - try: - clickhouse_execute( - args, - drop_database_query, - timeout=seconds_left, - settings={ - "log_comment": args.testcase_basename, - }, - ) - except HTTPError as e: - if need_retry(args, e.message, e.message, 0): - continue - raise - break - - except socket.timeout: - total_time = (datetime.now() - start_time).total_seconds() - return ( - None, - "", - f"Timeout dropping database {database} after test", - debug_log, - total_time, - ) - shutil.rmtree(args.test_tmp_dir) - total_time = (datetime.now() - start_time).total_seconds() # Normalize randomized database names in stdout, stderr files. @@ -1295,6 +1273,8 @@ class TestCase: return proc, stdout, stderr, debug_log, total_time def run(self, args, suite, client_options, server_logs_level): + start_time = datetime.now() + try: skip_reason = self.should_skip_test(suite) if skip_reason is not None: @@ -1330,40 +1310,118 @@ class TestCase: if result.status == TestStatus.FAIL: result.description = self.add_info_about_settings(result.description) + + self._cleanup(result.status == TestStatus.OK) + return result except KeyboardInterrupt as e: raise e except HTTPError: + total_time = (datetime.now() - start_time).total_seconds() return TestResult( self.name, TestStatus.FAIL, FailureReason.INTERNAL_QUERY_FAIL, - 0.0, + total_time, + self.add_info_about_settings( + self.get_description_from_exception_info(sys.exc_info()) + ), + ) + except socket.timeout: + total_time = (datetime.now() - start_time).total_seconds() + return TestResult( + self.name, + TestStatus.FAIL, + FailureReason.INTERNAL_QUERY_FAIL, + total_time, self.add_info_about_settings( self.get_description_from_exception_info(sys.exc_info()) ), ) except (ConnectionError, http.client.ImproperConnectionState): + total_time = (datetime.now() - start_time).total_seconds() return TestResult( self.name, TestStatus.FAIL, FailureReason.SERVER_DIED, - 0.0, + total_time, self.add_info_about_settings( self.get_description_from_exception_info(sys.exc_info()) ), ) except Exception: + total_time = (datetime.now() - start_time).total_seconds() return TestResult( self.name, TestStatus.UNKNOWN, FailureReason.INTERNAL_ERROR, - 0.0, + total_time, self.get_description_from_exception_info(sys.exc_info()), ) finally: self.remove_random_settings_from_env() + def _cleanup(self, passed): + args = self.testcase_args + + need_cleanup = not args.database + if need_cleanup and args.no_drop_if_fail: + need_cleanup = passed + + if not need_cleanup: + return + + time_passed = (datetime.now() - args.testcase_start_time).total_seconds() + timeout = max(args.timeout - time_passed, 20) + + self._cleanup_database(args, timeout) + shutil.rmtree(args.test_tmp_dir) + + def _cleanup_database(self, args, timeout): + database = args.testcase_database + + # Check if the test does not cleanup its tables. + # Only for newly added tests. Please extend this check to the old tests as well. + if self.case_file >= "02800": + leftover_tables = ( + clickhouse_execute( + args, + f"SHOW TABLES FROM {database}", + timeout=timeout, + settings={ + "log_comment": args.testcase_basename, + }, + ) + .decode() + .replace("\n", ", ") + ) + + if len(leftover_tables) != 0: + raise Exception( + f"The test should cleanup its tables ({leftover_tables}), otherwise it is inconvenient for running it locally." + ) + + drop_database_query = f"DROP DATABASE IF EXISTS {database}" + if args.replicated_database: + drop_database_query += " ON CLUSTER test_cluster_database_replicated" + + # It's possible to get an error "New table appeared in database being dropped or detached. Try again." + for _ in range(1, 60): + try: + clickhouse_execute( + args, + drop_database_query, + timeout=timeout, + settings={ + "log_comment": args.testcase_basename, + }, + ) + except HTTPError as e: + if need_retry(args, e.message, e.message, 0): + continue + raise + break + class TestSuite: @staticmethod @@ -1424,13 +1482,13 @@ class TestSuite: else: raise Exception(f"Unknown file_extension: {filename}") - def parse_tags_from_line(line, comment_sign): + def parse_tags_from_line(line, comment_sign) -> Set[str]: if not line.startswith(comment_sign): - return None + return set() tags_str = line[len(comment_sign) :].lstrip() # noqa: ignore E203 tags_prefix = "Tags:" if not tags_str.startswith(tags_prefix): - return None + return set() tags_str = tags_str[len(tags_prefix) :] # noqa: ignore E203 tags = tags_str.split(",") tags = {tag.strip() for tag in tags} @@ -1448,12 +1506,23 @@ class TestSuite: def load_tags_from_file(filepath): comment_sign = get_comment_sign(filepath) + need_query_params = False with open(filepath, "r", encoding="utf-8") as file: try: - line = find_tag_line(file) + tag_line = find_tag_line(file) except UnicodeDecodeError: return [] - return parse_tags_from_line(line, comment_sign) + try: + if filepath.endswith(".sql"): + for line in file: + if "{CLICKHOUSE_DATABASE" in line: + need_query_params = True + except UnicodeDecodeError: + pass + parsed_tags = parse_tags_from_line(tag_line, comment_sign) + if need_query_params: + parsed_tags.add("need-query-parameters") + return parsed_tags all_tags = {} start_time = datetime.now() @@ -2119,7 +2188,7 @@ def reportLogStats(args): print("\n") query = """ - SELECT message_format_string, count(), substr(any(message), 1, 120) AS any_message + SELECT message_format_string, count(), any(message) AS any_message FROM system.text_log WHERE (now() - toIntervalMinute(240)) < event_time AND (message NOT LIKE (replaceRegexpAll(message_format_string, '{[:.0-9dfx]*}', '%') AS s)) @@ -2448,7 +2517,7 @@ def parse_args(): parser.add_argument( "--no-drop-if-fail", action="store_true", - help="Do not drop database for test if test has failed (does not work if reference file mismatch)", + help="Do not drop database for test if test has failed", ) parser.add_argument( "--hide-db-name", diff --git a/tests/config/config.d/backups.xml b/tests/config/config.d/backups.xml index 48f7a256233..4da8edffd67 100644 --- a/tests/config/config.d/backups.xml +++ b/tests/config/config.d/backups.xml @@ -1,6 +1,13 @@ + + + + local + /var/lib/clickhouse/disks/backups/ + + + - default - /backups + backups diff --git a/tests/config/config.d/clusters.xml b/tests/config/config.d/clusters.xml index 031d6e64bc9..cfd4868f1dc 100644 --- a/tests/config/config.d/clusters.xml +++ b/tests/config/config.d/clusters.xml @@ -176,6 +176,38 @@ + + + false + + 127.0.0.1 + 9000 + + + 127.0.0.2 + 9000 + + + 127.0.0.3 + 9000 + + + + false + + 127.0.0.4 + 9000 + + + 127.0.0.5 + 9000 + + + 127.0.0.6 + 9000 + + + diff --git a/tests/config/config.d/enable_wait_for_shutdown_replicated_tables.xml b/tests/config/config.d/enable_wait_for_shutdown_replicated_tables.xml new file mode 100644 index 00000000000..504841296a8 --- /dev/null +++ b/tests/config/config.d/enable_wait_for_shutdown_replicated_tables.xml @@ -0,0 +1,5 @@ + + + 3000 + + diff --git a/tests/config/config.d/filesystem_caches_path.xml b/tests/config/config.d/filesystem_caches_path.xml new file mode 100644 index 00000000000..ca946db2e0a --- /dev/null +++ b/tests/config/config.d/filesystem_caches_path.xml @@ -0,0 +1,3 @@ + + /var/lib/clickhouse/filesystem_caches/ + diff --git a/tests/config/config.d/metadata_cache.xml b/tests/config/config.d/metadata_cache.xml deleted file mode 100644 index ecaba37d0b0..00000000000 --- a/tests/config/config.d/metadata_cache.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - 268435456 - true - - diff --git a/tests/config/config.d/s3_storage_policy_by_default.xml b/tests/config/config.d/s3_storage_policy_by_default.xml index 9685512a12a..dd93a317a77 100644 --- a/tests/config/config.d/s3_storage_policy_by_default.xml +++ b/tests/config/config.d/s3_storage_policy_by_default.xml @@ -6,14 +6,18 @@ http://localhost:11111/test/test/ clickhouse clickhouse - 1 - 22548578304 + + cache + 1Gi + cached_s3/ + s3 + -
s3
+
cached_s3
diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 8533fef9fc9..d976e46ff7b 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -72,6 +72,12 @@ + + + default + s3_disk + +
diff --git a/tests/config/config.d/text_log.xml b/tests/config/config.d/text_log.xml index dce4942d952..d3608c5439d 100644 --- a/tests/config/config.d/text_log.xml +++ b/tests/config/config.d/text_log.xml @@ -2,6 +2,6 @@ system text_log
- 7500 + 4000
diff --git a/tests/config/config.d/validate_tcp_client_information.xml b/tests/config/config.d/validate_tcp_client_information.xml new file mode 100644 index 00000000000..db7b644719a --- /dev/null +++ b/tests/config/config.d/validate_tcp_client_information.xml @@ -0,0 +1,3 @@ + + true + diff --git a/tests/config/install.sh b/tests/config/install.sh index d75a652f084..b65f2cc8dd1 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -21,7 +21,6 @@ ln -sf $SRC_PATH/config.d/text_log.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/custom_settings_prefixes.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_access_control_improvements.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/macros.xml $DEST_SERVER_PATH/config.d/ -ln -sf $SRC_PATH/config.d/disks.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/secure_ports.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/clusters.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/graphite.xml $DEST_SERVER_PATH/config.d/ @@ -34,7 +33,6 @@ ln -sf $SRC_PATH/config.d/keeper_port.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/logging_no_rotate.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/merge_tree.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/lost_forever_check.xml $DEST_SERVER_PATH/config.d/ -ln -sf $SRC_PATH/config.d/metadata_cache.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/tcp_with_proxy.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/prometheus.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/top_level_domains_lists.xml $DEST_SERVER_PATH/config.d/ @@ -43,7 +41,7 @@ ln -sf $SRC_PATH/config.d/transactions.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/encryption.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/CORS.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/zookeeper_log.xml $DEST_SERVER_PATH/config.d/ -ln -sf $SRC_PATH/config.d/logger_test.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/logger_trace.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/named_collection.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/ssl_certs.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/filesystem_cache_log.xml $DEST_SERVER_PATH/config.d/ @@ -58,7 +56,10 @@ ln -sf $SRC_PATH/config.d/display_name.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/reverse_dns_query_function.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/compressed_marks_and_index.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/disable_s3_env_credentials.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/enable_wait_for_shutdown_replicated_tables.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/backups.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/filesystem_caches_path.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/validate_tcp_client_information.xml $DEST_SERVER_PATH/config.d/ # Not supported with fasttest. if [ "${DEST_SERVER_PATH}" = "/etc/clickhouse-server" ] diff --git a/tests/config/users.d/session_log_test.xml b/tests/config/users.d/session_log_test.xml index daddaa6e4b9..cc2c2c5fcde 100644 --- a/tests/config/users.d/session_log_test.xml +++ b/tests/config/users.d/session_log_test.xml @@ -17,7 +17,7 @@ - + ::1 127.0.0.1 diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md index 440ae1aac1f..5b864babde3 100644 --- a/tests/fuzz/README.md +++ b/tests/fuzz/README.md @@ -1,4 +1,4 @@ -The list of funtions generated via following query +The list of functions generated via following query ``` clickhouse-client -q "select concat('\"', name, '\"') from system.functions union all select concat('\"', alias_to, '\"') from system.functions where alias_to != '' " > functions.dict @@ -10,5 +10,4 @@ The list of datatypes generated via following query: clickhouse-client -q "select concat('\"', name, '\"') from system.data_type_families union all select concat('\"', alias_to, '\"') from system.data_type_families where alias_to != '' " > datatypes.dict ``` - Then merge all dictionaries into one (all.dict) diff --git a/tests/instructions/easy_tasks_sorted_ru.md b/tests/instructions/easy_tasks_sorted_ru.md index 09ea48d0bd9..17e9708eef5 100644 --- a/tests/instructions/easy_tasks_sorted_ru.md +++ b/tests/instructions/easy_tasks_sorted_ru.md @@ -129,7 +129,7 @@ position с конца строки. Атомарно удаляет таблицу перед созданием новой, если такая была. -## * Приведение типов для IN (subquery). +## + Приведение типов для IN (subquery). `SELECT 1 IN (SELECT -1 UNION ALL SELECT 1)` @@ -205,12 +205,12 @@ https://clickhouse.com/docs/en/operations/table_engines/external_data/ ## Возможность задавать параметры соединений для табличных функций, движков таблиц и для реплик из отдельных разделов конфигурации. -## Настройка rollup_use_nulls. +## + Настройка rollup_use_nulls. + +Upd: it is named "group_by_use_nulls". ## + Настройка cast_keep_nullable. -## Функция bitEquals для сравнения произвольных типов данных побитово. +## Функция bitEquals для сравнения произвольных типов данных побитово ## Функция serialize для implementation specific non portable non backwards compatible сериализации любого типа данных в набор байт. - -## Функция bitEquals и оператор <=>. diff --git a/tests/integration/README.md b/tests/integration/README.md index f0160dcd444..af973d2b9fa 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -52,6 +52,8 @@ sudo -H pip install \ (highly not recommended) If you really want to use OS packages on modern debian/ubuntu instead of "pip": `sudo apt install -y docker docker-compose python3-pytest python3-dicttoxml python3-docker python3-pymysql python3-protobuf python3-pymongo python3-tzlocal python3-kazoo python3-psycopg2 kafka-python python3-pytest-timeout python3-minio` +Some tests have other dependencies, e.g. spark. See docker/test/integration/runner/Dockerfile for how to install those. See docker/test/integration/runner/dockerd-entrypoint.sh for environment variables that need to be set (e.g. JAVA_PATH). + If you want to run the tests under a non-privileged user, you must add this user to `docker` group: `sudo usermod -aG docker $USER` and re-login. (You must close all your sessions (for example, restart your computer)) To check, that you have access to Docker, run `docker ps`. @@ -90,7 +92,7 @@ plugins: repeat-0.9.1, xdist-2.5.0, forked-1.4.0, order-1.0.0, timeout-2.1.0 timeout: 900.0s timeout method: signal timeout func_only: False -collected 4 items +collected 4 items test_ssl_cert_authentication/test.py::test_https Copy common default production configuration from /clickhouse-config. Files: config.xml, users.xml PASSED diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index be4d019426a..5c3a7695119 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -9,6 +9,7 @@ import os import random import re import shutil +import string import subprocess import time import shlex @@ -313,6 +314,7 @@ class ClickhouseIntegrationTestsRunner: "clickhouse/mysql-java-client", "clickhouse/mysql-js-client", "clickhouse/mysql-php-client", + "clickhouse/nginx-dav", "clickhouse/postgresql-java-client", ] @@ -429,19 +431,12 @@ class ClickhouseIntegrationTestsRunner: def _get_all_tests(self, repo_path): image_cmd = self._get_runner_image_cmd(repo_path) - out_file = "all_tests.txt" + runner_opts = self._get_runner_opts() out_file_full = os.path.join(self.result_path, "runner_get_all_tests.log") cmd = ( - "cd {repo_path}/tests/integration && " - "timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} -- --setup-plan " - "| tee '{out_file_full}' | grep -F '::' | sed -r 's/ \(fixtures used:.*//g; s/^ *//g; s/ *$//g' " - "| grep -v -F 'SKIPPED' | sort --unique > {out_file}".format( - repo_path=repo_path, - runner_opts=self._get_runner_opts(), - image_cmd=image_cmd, - out_file=out_file, - out_file_full=out_file_full, - ) + f"cd {repo_path}/tests/integration && " + f"timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} -- --setup-plan " + f"| tee '{out_file_full}'" ) logging.info("Getting all tests with cmd '%s'", cmd) @@ -449,34 +444,19 @@ class ClickhouseIntegrationTestsRunner: cmd, shell=True ) - all_tests_file_path = "{repo_path}/tests/integration/{out_file}".format( - repo_path=repo_path, out_file=out_file - ) - if ( - not os.path.isfile(all_tests_file_path) - or os.path.getsize(all_tests_file_path) == 0 - ): - if os.path.isfile(out_file_full): - # log runner output - logging.info("runner output:") - with open(out_file_full, "r") as all_tests_full_file: - for line in all_tests_full_file: - line = line.rstrip() - if line: - logging.info("runner output: %s", line) - else: - logging.info("runner output '%s' is empty", out_file_full) + all_tests = set() + with open(out_file_full, "r", encoding="utf-8") as all_tests_fd: + for line in all_tests_fd: + if ( + line[0] in string.whitespace # test names at the start of lines + or "::test" not in line # test names contain '::test' + or "SKIPPED" in line # pytest.mark.skip/-if + ): + continue + all_tests.add(line.strip()) - raise Exception( - "There is something wrong with getting all tests list: file '{}' is empty or does not exist.".format( - all_tests_file_path - ) - ) + assert all_tests - all_tests = [] - with open(all_tests_file_path, "r") as all_tests_file: - for line in all_tests_file: - all_tests.append(line.strip()) return list(sorted(all_tests)) def _get_parallel_tests_skip_list(self, repo_path): diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index b5f7aababc9..6e76270c607 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -17,15 +17,15 @@ def tune_local_port_range(): # Lots of services uses non privileged ports: # - hdfs -- 50020/50070/... # - minio - # - mysql - # - psql - # - # So instead of tuning all these thirdparty services, let's simply - # prohibit using such ports for outgoing connections, this should fix - # possible "Address already in use" errors. # # NOTE: 5K is not enough, and sometimes leads to EADDRNOTAVAIL error. - run_and_check(["sysctl net.ipv4.ip_local_port_range='55000 65535'"], shell=True) + # NOTE: it is not inherited, so you may need to specify this in docker_compose_$SERVICE.yml + try: + run_and_check(["sysctl net.ipv4.ip_local_port_range='55000 65535'"], shell=True) + except Exception as ex: + logging.warning( + "Failed to run sysctl, tests may fail with EADDRINUSE %s", str(ex) + ) @pytest.fixture(autouse=True, scope="session") diff --git a/tests/integration/helpers/client.py b/tests/integration/helpers/client.py index fdeedb9a80d..8ba7b342020 100644 --- a/tests/integration/helpers/client.py +++ b/tests/integration/helpers/client.py @@ -144,6 +144,7 @@ class Client: user=None, password=None, database=None, + query_id=None, ): return self.get_query_request( sql, @@ -153,6 +154,7 @@ class Client: user=user, password=password, database=database, + query_id=query_id, ).get_answer_and_error() @@ -182,7 +184,8 @@ class CommandRequest: # we suppress stderror on client becase sometimes thread sanitizer # can print some debug information there env = {} - env["TSAN_OPTIONS"] = "verbosity=0" + env["ASAN_OPTIONS"] = "use_sigaltstack=0" + env["TSAN_OPTIONS"] = "use_sigaltstack=0 verbosity=0" self.process = sp.Popen( command, stdin=stdin_file, diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 0448eb2437f..e5afe14497d 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3199,7 +3199,6 @@ class ClickHouseInstance: ): self.name = name self.base_cmd = cluster.base_cmd - self.base_dir = base_path self.docker_id = cluster.get_instance_docker_id(self.name) self.cluster = cluster self.hostname = hostname if hostname is not None else self.name @@ -3477,6 +3476,7 @@ class ClickHouseInstance: user=None, password=None, database=None, + query_id=None, ): logging.debug(f"Executing query {sql} on {self.name}") return self.client.query_and_get_answer_with_error( @@ -3487,6 +3487,7 @@ class ClickHouseInstance: user=user, password=password, database=database, + query_id=query_id, ) # Connects to the instance via HTTP interface, sends a query and returns the answer @@ -4194,14 +4195,6 @@ class ClickHouseInstance: ["bash", "-c", f"sed -i 's/{replace}/{replacement}/g' {path_to_config}"] ) - def put_users_config(self, config_path): - """Put new config (useful if you cannot put it at the start)""" - - instance_config_dir = p.abspath(p.join(self.path, "configs")) - users_d_dir = p.abspath(p.join(instance_config_dir, "users.d")) - config_path = p.join(self.base_dir, config_path) - shutil.copy(config_path, users_d_dir) - def create_dir(self): """Create the instance directory and all the needed files there.""" diff --git a/tests/integration/helpers/keeper_config1.xml b/tests/integration/helpers/keeper_config1.xml index daacd55887d..f40ed9ac6fa 100644 --- a/tests/integration/helpers/keeper_config1.xml +++ b/tests/integration/helpers/keeper_config1.xml @@ -18,6 +18,8 @@ 15000 trace false + 2000 + 4000 diff --git a/tests/integration/helpers/keeper_config2.xml b/tests/integration/helpers/keeper_config2.xml index 53eb023dba5..d5bdb92a79d 100644 --- a/tests/integration/helpers/keeper_config2.xml +++ b/tests/integration/helpers/keeper_config2.xml @@ -18,6 +18,8 @@ 15000 trace false + 2000 + 4000 diff --git a/tests/integration/helpers/keeper_config3.xml b/tests/integration/helpers/keeper_config3.xml index 1db091c12bc..aa69b554660 100644 --- a/tests/integration/helpers/keeper_config3.xml +++ b/tests/integration/helpers/keeper_config3.xml @@ -18,6 +18,8 @@ 15000 trace false + 2000 + 4000 diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py index 93ea3fa74b7..a1d20e0392b 100644 --- a/tests/integration/helpers/keeper_utils.py +++ b/tests/integration/helpers/keeper_utils.py @@ -1,6 +1,185 @@ +import io +import subprocess import socket import time +import typing as tp +import contextlib +import select from kazoo.client import KazooClient +from helpers.cluster import ClickHouseCluster, ClickHouseInstance +from helpers.client import CommandRequest + + +def execute_keeper_client_query( + cluster: ClickHouseCluster, node: ClickHouseInstance, query: str +) -> str: + request = CommandRequest( + [ + cluster.server_bin_path, + "keeper-client", + "--host", + str(cluster.get_instance_ip(node.name)), + "--port", + str(cluster.zookeeper_port), + "-q", + query, + ], + stdin="", + ) + + return request.get_answer() + + +class KeeperException(Exception): + pass + + +class KeeperClient(object): + SEPARATOR = b"\a\a\a\a\n" + + def __init__(self, bin_path: str, host: str, port: int): + self.bin_path = bin_path + self.host = host + self.port = port + + self.proc = subprocess.Popen( + [ + bin_path, + "keeper-client", + "--host", + host, + "--port", + str(port), + "--log-level", + "error", + "--tests-mode", + "--no-confirmation", + ], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + self.poller = select.epoll() + self.poller.register(self.proc.stdout) + self.poller.register(self.proc.stderr) + + self._fd_nums = { + self.proc.stdout.fileno(): self.proc.stdout, + self.proc.stderr.fileno(): self.proc.stderr, + } + + self.stopped = False + + def execute_query(self, query: str, timeout: float = 10.0) -> str: + output = io.BytesIO() + + self.proc.stdin.write(query.encode() + b"\n") + self.proc.stdin.flush() + + events = self.poller.poll(timeout) + if not events: + raise TimeoutError(f"Keeper client returned no output") + + for fd_num, event in events: + if event & (select.EPOLLIN | select.EPOLLPRI): + file = self._fd_nums[fd_num] + + if file == self.proc.stdout: + while True: + chunk = file.readline() + if chunk.endswith(self.SEPARATOR): + break + + output.write(chunk) + + elif file == self.proc.stderr: + assert self.proc.stdout.readline() == self.SEPARATOR + raise KeeperException(self.proc.stderr.readline().strip().decode()) + + else: + raise ValueError(f"Failed to read from pipe. Flag {event}") + + data = output.getvalue().strip().decode() + return data + + def cd(self, path: str, timeout: float = 10.0): + self.execute_query(f"cd {path}", timeout) + + def ls(self, path: str, timeout: float = 10.0) -> list[str]: + return self.execute_query(f"ls {path}", timeout).split(" ") + + def create(self, path: str, value: str, timeout: float = 10.0): + self.execute_query(f"create {path} {value}", timeout) + + def get(self, path: str, timeout: float = 10.0) -> str: + return self.execute_query(f"get {path}", timeout) + + def exists(self, path: str, timeout: float = 10.0) -> bool: + return bool(int(self.execute_query(f"exists {path}", timeout))) + + def stop(self): + if not self.stopped: + self.stopped = True + self.proc.communicate(b"exit\n", timeout=10.0) + + def sync(self, path: str, timeout: float = 10.0): + self.execute_query(f"sync {path}", timeout) + + def touch(self, path: str, timeout: float = 10.0): + self.execute_query(f"touch {path}", timeout) + + def find_big_family(self, path: str, n: int = 10, timeout: float = 10.0) -> str: + return self.execute_query(f"find_big_family {path} {n}", timeout) + + def find_super_nodes(self, threshold: int, timeout: float = 10.0) -> str: + return self.execute_query(f"find_super_nodes {threshold}", timeout) + + def delete_stale_backups(self, timeout: float = 10.0) -> str: + return self.execute_query("delete_stale_backups", timeout) + + def reconfig( + self, + joining: tp.Optional[str], + leaving: tp.Optional[str], + new_members: tp.Optional[str], + timeout: float = 10.0, + ) -> str: + if bool(joining) + bool(leaving) + bool(new_members) != 1: + raise ValueError( + "Exactly one of joining, leaving or new_members must be specified" + ) + + if joining is not None: + operation = "add" + elif leaving is not None: + operation = "remove" + elif new_members is not None: + operation = "set" + else: + raise ValueError( + "At least one of joining, leaving or new_members must be specified" + ) + + return self.execute_query( + f"reconfig {operation} {joining or leaving or new_members}", timeout + ) + + @classmethod + @contextlib.contextmanager + def from_cluster( + cls, cluster: ClickHouseCluster, keeper_node: str, port: tp.Optional[int] = None + ) -> "KeeperClient": + client = cls( + cluster.server_bin_path, + cluster.get_instance_ip(keeper_node), + port or cluster.zookeeper_port, + ) + + try: + yield client + finally: + client.stop() def get_keeper_socket(cluster, node, port=9181): @@ -70,14 +249,14 @@ def get_fake_zk(cluster, node, timeout: float = 30.0) -> KazooClient: return _fake -def get_config_str(zk: KazooClient) -> str: +def get_config_str(zk: KeeperClient) -> str: """ Return decoded contents of /keeper/config node """ - return zk.get("/keeper/config")[0].decode("utf-8") + return zk.get("/keeper/config") -def wait_configs_equal(left_config: str, right_zk: KazooClient, timeout: float = 30.0): +def wait_configs_equal(left_config: str, right_zk: KeeperClient, timeout: float = 30.0): """ Check whether get /keeper/config result in left_config is equal to get /keeper/config on right_zk ZK connection. diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py index 8ff4f9e9203..206f960293f 100644 --- a/tests/integration/helpers/s3_mocks/broken_s3.py +++ b/tests/integration/helpers/s3_mocks/broken_s3.py @@ -7,11 +7,18 @@ import urllib.parse import http.server import socketserver import string +import socket +import struct INF_COUNT = 100000000 +def _and_then(value, func): + assert callable(func) + return None if value is None else func(value) + + class MockControl: def __init__(self, cluster, container, port): self._cluster = cluster @@ -30,8 +37,8 @@ class MockControl: ) assert response == "OK", response - def setup_error_at_object_upload(self, count=None, after=None): - url = f"http://localhost:{self._port}/mock_settings/error_at_object_upload?nothing=1" + def setup_action(self, when, count=None, after=None, action=None, action_args=None): + url = f"http://localhost:{self._port}/mock_settings/{when}?nothing=1" if count is not None: url += f"&count={count}" @@ -39,25 +46,12 @@ class MockControl: if after is not None: url += f"&after={after}" - response = self._cluster.exec_in_container( - self._cluster.get_container_id(self._container), - [ - "curl", - "-s", - url, - ], - nothrow=True, - ) - assert response == "OK", response + if action is not None: + url += f"&action={action}" - def setup_error_at_part_upload(self, count=None, after=None): - url = f"http://localhost:{self._port}/mock_settings/error_at_part_upload?nothing=1" - - if count is not None: - url += f"&count={count}" - - if after is not None: - url += f"&after={after}" + if action_args is not None: + for x in action_args: + url += f"&action_args={x}" response = self._cluster.exec_in_container( self._cluster.get_container_id(self._container), @@ -70,22 +64,14 @@ class MockControl: ) assert response == "OK", response - def setup_error_at_create_multi_part_upload(self, count=None): - url = f"http://localhost:{self._port}/mock_settings/error_at_create_multi_part_upload" + def setup_at_object_upload(self, **kwargs): + self.setup_action("at_object_upload", **kwargs) - if count is not None: - url += f"?count={count}" + def setup_at_part_upload(self, **kwargs): + self.setup_action("at_part_upload", **kwargs) - response = self._cluster.exec_in_container( - self._cluster.get_container_id(self._container), - [ - "curl", - "-s", - url, - ], - nothrow=True, - ) - assert response == "OK", response + def setup_at_create_multi_part_upload(self, **kwargs): + self.setup_action("at_create_multi_part_upload", **kwargs) def setup_fake_puts(self, part_length): response = self._cluster.exec_in_container( @@ -140,8 +126,14 @@ class MockControl: class _ServerRuntime: class SlowPut: def __init__( - self, probability_=None, timeout_=None, minimal_length_=None, count_=None + self, + lock, + probability_=None, + timeout_=None, + minimal_length_=None, + count_=None, ): + self.lock = lock self.probability = probability_ if probability_ is not None else 1 self.timeout = timeout_ if timeout_ is not None else 0.1 self.minimal_length = minimal_length_ if minimal_length_ is not None else 0 @@ -156,42 +148,135 @@ class _ServerRuntime: ) def get_timeout(self, content_length): - if content_length > self.minimal_length: - if self.count > 0: - if ( - _runtime.slow_put.probability == 1 - or random.random() <= _runtime.slow_put.probability - ): - self.count -= 1 - return _runtime.slow_put.timeout + with self.lock: + if content_length > self.minimal_length: + if self.count > 0: + if ( + _runtime.slow_put.probability == 1 + or random.random() <= _runtime.slow_put.probability + ): + self.count -= 1 + return _runtime.slow_put.timeout return None + class Expected500ErrorAction: + def inject_error(self, request_handler): + data = ( + '' + "" + "ExpectedError" + "mock s3 injected error" + "txfbd566d03042474888193-00608d7537" + "" + ) + request_handler.write_error(data) + + class RedirectAction: + def __init__(self, host="localhost", port=1): + self.dst_host = _and_then(host, str) + self.dst_port = _and_then(port, int) + + def inject_error(self, request_handler): + request_handler.redirect(host=self.dst_host, port=self.dst_port) + + class ConnectionResetByPeerAction: + def __init__(self, with_partial_data=None): + self.partial_data = "" + if with_partial_data is not None and with_partial_data == "1": + self.partial_data = ( + '\n' + "\n" + ) + + def inject_error(self, request_handler): + request_handler.read_all_input() + + if self.partial_data: + request_handler.send_response(200) + request_handler.send_header("Content-Type", "text/xml") + request_handler.send_header("Content-Length", 10000) + request_handler.end_headers() + request_handler.wfile.write(bytes(self.partial_data, "UTF-8")) + + time.sleep(1) + request_handler.connection.setsockopt( + socket.SOL_SOCKET, socket.SO_LINGER, struct.pack("ii", 1, 0) + ) + request_handler.connection.close() + + class BrokenPipeAction: + def inject_error(self, request_handler): + # partial read + self.rfile.read(50) + + time.sleep(1) + request_handler.connection.setsockopt( + socket.SOL_SOCKET, socket.SO_LINGER, struct.pack("ii", 1, 0) + ) + request_handler.connection.close() + + class ConnectionRefusedAction(RedirectAction): + pass + class CountAfter: - def __init__(self, count_=None, after_=None): + def __init__( + self, lock, count_=None, after_=None, action_=None, action_args_=[] + ): + self.lock = lock + self.count = count_ if count_ is not None else INF_COUNT self.after = after_ if after_ is not None else 0 + self.action = action_ + self.action_args = action_args_ + + if self.action == "connection_refused": + self.error_handler = _ServerRuntime.ConnectionRefusedAction() + elif self.action == "connection_reset_by_peer": + self.error_handler = _ServerRuntime.ConnectionResetByPeerAction( + *self.action_args + ) + elif self.action == "broken_pipe": + self.error_handler = _ServerRuntime.BrokenPipeAction() + elif self.action == "redirect_to": + self.error_handler = _ServerRuntime.RedirectAction(*self.action_args) + else: + self.error_handler = _ServerRuntime.Expected500ErrorAction() + + @staticmethod + def from_cgi_params(lock, params): + return _ServerRuntime.CountAfter( + lock=lock, + count_=_and_then(params.get("count", [None])[0], int), + after_=_and_then(params.get("after", [None])[0], int), + action_=params.get("action", [None])[0], + action_args_=params.get("action_args", []), + ) def __str__(self): - return f"count:{self.count} after:{self.after}" + return f"count:{self.count} after:{self.after} action:{self.action} action_args:{self.action_args}" def has_effect(self): - if self.after: - self.after -= 1 - if self.after == 0: - if self.count: - self.count -= 1 - return True - return False + with self.lock: + if self.after: + self.after -= 1 + if self.after == 0: + if self.count: + self.count -= 1 + return True + return False + + def inject_error(self, request_handler): + self.error_handler.inject_error(request_handler) def __init__(self): self.lock = threading.Lock() - self.error_at_part_upload = None - self.error_at_object_upload = None + self.at_part_upload = None + self.at_object_upload = None self.fake_put_when_length_bigger = None self.fake_uploads = dict() self.slow_put = None self.fake_multipart_upload = None - self.error_at_create_multi_part_upload = None + self.at_create_multi_part_upload = None def register_fake_upload(self, upload_id, key): with self.lock: @@ -205,23 +290,18 @@ class _ServerRuntime: def reset(self): with self.lock: - self.error_at_part_upload = None - self.error_at_object_upload = None + self.at_part_upload = None + self.at_object_upload = None self.fake_put_when_length_bigger = None self.fake_uploads = dict() self.slow_put = None self.fake_multipart_upload = None - self.error_at_create_multi_part_upload = None + self.at_create_multi_part_upload = None _runtime = _ServerRuntime() -def _and_then(value, func): - assert callable(func) - return None if value is None else func(value) - - def get_random_string(length): # choose from all lowercase letter letters = string.ascii_lowercase @@ -239,7 +319,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): def _ping(self): self._ok() - def _read_out(self): + def read_all_input(self): content_length = int(self.headers.get("Content-Length", 0)) to_read = content_length while to_read > 0: @@ -250,36 +330,36 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): str(self.rfile.read(size)) to_read -= size - def _redirect(self): - self._read_out() + def redirect(self, host=None, port=None): + if host is None and port is None: + host = self.server.upstream_host + port = self.server.upstream_port + + self.read_all_input() self.send_response(307) - url = ( - f"http://{self.server.upstream_host}:{self.server.upstream_port}{self.path}" - ) + url = f"http://{host}:{port}{self.path}" + self.log_message("redirect to %s", url) self.send_header("Location", url) self.end_headers() self.wfile.write(b"Redirected") - def _error(self, data): - self._read_out() + def write_error(self, data, content_length=None): + if content_length is None: + content_length = len(data) + self.log_message("write_error %s", data) + self.read_all_input() self.send_response(500) self.send_header("Content-Type", "text/xml") + self.send_header("Content-Length", str(content_length)) self.end_headers() - self.wfile.write(bytes(data, "UTF-8")) - - def _error_expected_500(self): - self._error( - '' - "" - "ExpectedError" - "mock s3 injected error" - "txfbd566d03042474888193-00608d7537" - "" - ) + if data: + self.wfile.write(bytes(data, "UTF-8")) def _fake_put_ok(self): - self._read_out() + self.log_message("fake put") + + self.read_all_input() self.send_response(200) self.send_header("Content-Type", "text/xml") @@ -288,7 +368,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.end_headers() def _fake_uploads(self, path, upload_id): - self._read_out() + self.read_all_input() parts = [x for x in path.split("/") if x] bucket = parts[0] @@ -310,7 +390,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.wfile.write(bytes(data, "UTF-8")) def _fake_post_ok(self, path): - self._read_out() + self.read_all_input() parts = [x for x in path.split("/") if x] bucket = parts[0] @@ -338,22 +418,22 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): path = [x for x in parts.path.split("/") if x] assert path[0] == "mock_settings", path if len(path) < 2: - return self._error("_mock_settings: wrong command") + return self.write_error("_mock_settings: wrong command") - if path[1] == "error_at_part_upload": + if path[1] == "at_part_upload": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) - _runtime.error_at_part_upload = _ServerRuntime.CountAfter( - count_=_and_then(params.get("count", [None])[0], int), - after_=_and_then(params.get("after", [None])[0], int), + _runtime.at_part_upload = _ServerRuntime.CountAfter.from_cgi_params( + _runtime.lock, params ) + self.log_message("set at_part_upload %s", _runtime.at_part_upload) return self._ok() - if path[1] == "error_at_object_upload": + if path[1] == "at_object_upload": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) - _runtime.error_at_object_upload = _ServerRuntime.CountAfter( - count_=_and_then(params.get("count", [None])[0], int), - after_=_and_then(params.get("after", [None])[0], int), + _runtime.at_object_upload = _ServerRuntime.CountAfter.from_cgi_params( + _runtime.lock, params ) + self.log_message("set at_object_upload %s", _runtime.at_object_upload) return self._ok() if path[1] == "fake_puts": @@ -361,11 +441,13 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): _runtime.fake_put_when_length_bigger = int( params.get("when_length_bigger", [1024 * 1024])[0] ) + self.log_message("set fake_puts %s", _runtime.fake_put_when_length_bigger) return self._ok() if path[1] == "slow_put": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) _runtime.slow_put = _ServerRuntime.SlowPut( + lock=_runtime.lock, minimal_length_=_and_then(params.get("minimal_length", [None])[0], int), probability_=_and_then(params.get("probability", [None])[0], float), timeout_=_and_then(params.get("timeout", [None])[0], float), @@ -376,20 +458,26 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): if path[1] == "setup_fake_multpartuploads": _runtime.fake_multipart_upload = True + self.log_message("set setup_fake_multpartuploads") return self._ok() - if path[1] == "error_at_create_multi_part_upload": + if path[1] == "at_create_multi_part_upload": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) - _runtime.error_at_create_multi_part_upload = int( - params.get("count", [INF_COUNT])[0] + _runtime.at_create_multi_part_upload = ( + _ServerRuntime.CountAfter.from_cgi_params(_runtime.lock, params) + ) + self.log_message( + "set at_create_multi_part_upload %s", + _runtime.at_create_multi_part_upload, ) return self._ok() if path[1] == "reset": _runtime.reset() + self.log_message("reset") return self._ok() - return self._error("_mock_settings: wrong command") + return self.write_error("_mock_settings: wrong command") def do_GET(self): if self.path == "/": @@ -398,7 +486,8 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): if self.path.startswith("/mock_settings"): return self._mock_settings() - return self._redirect() + self.log_message("get redirect") + return self.redirect() def do_PUT(self): content_length = int(self.headers.get("Content-Length", 0)) @@ -414,30 +503,52 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): upload_id = params.get("uploadId", [None])[0] if upload_id is not None: - if _runtime.error_at_part_upload is not None: - if _runtime.error_at_part_upload.has_effect(): - return self._error_expected_500() + if _runtime.at_part_upload is not None: + self.log_message( + "put at_part_upload %s, %s, %s", + _runtime.at_part_upload, + upload_id, + parts, + ) + + if _runtime.at_part_upload.has_effect(): + return _runtime.at_part_upload.inject_error(self) if _runtime.fake_multipart_upload: if _runtime.is_fake_upload(upload_id, parts.path): return self._fake_put_ok() else: - if _runtime.error_at_object_upload is not None: - if _runtime.error_at_object_upload.has_effect(): - return self._error_expected_500() + if _runtime.at_object_upload is not None: + if _runtime.at_object_upload.has_effect(): + self.log_message( + "put error_at_object_upload %s, %s", + _runtime.at_object_upload, + parts, + ) + return _runtime.at_object_upload.inject_error(self) if _runtime.fake_put_when_length_bigger is not None: if content_length > _runtime.fake_put_when_length_bigger: + self.log_message( + "put fake_put_when_length_bigger %s, %s, %s", + _runtime.fake_put_when_length_bigger, + content_length, + parts, + ) return self._fake_put_ok() - return self._redirect() + self.log_message( + "put redirect %s", + parts, + ) + return self.redirect() def do_POST(self): parts = urllib.parse.urlsplit(self.path) params = urllib.parse.parse_qs(parts.query, keep_blank_values=True) uploads = params.get("uploads", [None])[0] if uploads is not None: - if _runtime.error_at_create_multi_part_upload: - _runtime.error_at_create_multi_part_upload -= 1 - return self._error_expected_500() + if _runtime.at_create_multi_part_upload is not None: + if _runtime.at_create_multi_part_upload.has_effect(): + return _runtime.at_create_multi_part_upload.inject_error(self) if _runtime.fake_multipart_upload: upload_id = get_random_string(5) @@ -448,13 +559,13 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): if _runtime.is_fake_upload(upload_id, parts.path): return self._fake_post_ok(parts.path) - return self._redirect() + return self.redirect() def do_HEAD(self): - self._redirect() + self.redirect() def do_DELETE(self): - self._redirect() + self.redirect() class _ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer): diff --git a/tests/integration/helpers/s3_url_proxy_tests_util.py b/tests/integration/helpers/s3_url_proxy_tests_util.py new file mode 100644 index 00000000000..9059fda08ae --- /dev/null +++ b/tests/integration/helpers/s3_url_proxy_tests_util.py @@ -0,0 +1,88 @@ +import os +import time + + +def check_proxy_logs( + cluster, proxy_instance, protocol, bucket, http_methods={"POST", "PUT", "GET"} +): + for i in range(10): + logs = cluster.get_container_logs(proxy_instance) + # Check with retry that all possible interactions with Minio are present + for http_method in http_methods: + if ( + logs.find(http_method + f" {protocol}://minio1:9001/root/data/{bucket}") + >= 0 + ): + return + time.sleep(1) + else: + assert False, f"{http_methods} method not found in logs of {proxy_instance}" + + +def wait_resolver(cluster): + for i in range(10): + response = cluster.exec_in_container( + cluster.get_container_id("resolver"), + [ + "curl", + "-s", + f"http://resolver:8080/hostname", + ], + nothrow=True, + ) + if response == "proxy1" or response == "proxy2": + return + time.sleep(i) + else: + assert False, "Resolver is not up" + + +# Runs simple proxy resolver in python env container. +def run_resolver(cluster, current_dir): + container_id = cluster.get_container_id("resolver") + cluster.copy_file_to_container( + container_id, + os.path.join(current_dir, "proxy-resolver", "resolver.py"), + "resolver.py", + ) + cluster.exec_in_container(container_id, ["python", "resolver.py"], detach=True) + + wait_resolver(cluster) + + +def build_s3_endpoint(protocol, bucket): + return f"{protocol}://minio1:9001/root/data/{bucket}/test.csv" + + +def perform_simple_queries(node, minio_endpoint): + node.query( + f""" + INSERT INTO FUNCTION + s3('{minio_endpoint}', 'minio', 'minio123', 'CSV', 'key String, value String') + VALUES ('color','red'),('size','10') + """ + ) + + assert ( + node.query( + f"SELECT * FROM s3('{minio_endpoint}', 'minio', 'minio123', 'CSV') FORMAT Values" + ) + == "('color','red'),('size','10')" + ) + + assert ( + node.query( + f"SELECT * FROM s3('{minio_endpoint}', 'minio', 'minio123', 'CSV') FORMAT Values" + ) + == "('color','red'),('size','10')" + ) + + +def simple_test(cluster, proxies, protocol, bucket): + minio_endpoint = build_s3_endpoint(protocol, bucket) + node = cluster.instances[f"{bucket}"] + + perform_simple_queries(node, minio_endpoint) + + for proxy in proxies: + check_proxy_logs(cluster, proxy, protocol, bucket) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 407fe7d1b01..d056225fee4 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -48,6 +48,7 @@ "test_system_metrics/test.py::test_readonly_metrics", "test_system_replicated_fetches/test.py::test_system_replicated_fetches", "test_zookeeper_config_load_balancing/test.py::test_round_robin", + "test_zookeeper_fallback_session/test.py::test_fallback_session", "test_global_overcommit_tracker/test.py::test_global_overcommit", @@ -69,8 +70,27 @@ "test_server_reload/test.py::test_remove_tcp_port", "test_keeper_map/test.py::test_keeper_map_without_zk", + + "test_replicated_merge_tree_wait_on_shutdown/test.py::test_shutdown_and_wait", "test_http_failover/test.py::test_url_destination_host_with_multiple_addrs", "test_http_failover/test.py::test_url_invalid_hostname", - "test_http_failover/test.py::test_url_ip_change" + "test_http_failover/test.py::test_url_ip_change", + + "test_system_logs/test_system_logs.py::test_max_size_0", + "test_system_logs/test_system_logs.py::test_reserved_size_greater_max_size", + "test_system_flush_logs/test.py::test_log_buffer_size_rows_flush_threshold", + "test_system_flush_logs/test.py::test_log_max_size", + "test_crash_log/test.py::test_pkill_query_log", + "test_crash_log/test.py::test_pkill", + + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_tcp", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_postgres", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_mysql", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_http", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_http_named_session", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_grpc", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_tcp_and_others", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_setting_in_query", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_client_suggestions_load" ] diff --git a/tests/integration/runner b/tests/integration/runner index 1b902803741..4c2b1054538 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -331,18 +331,8 @@ if __name__ == "__main__": if args.docker_compose_images_tags is not None: for img_tag in args.docker_compose_images_tags: [image, tag] = img_tag.split(":") - if image == "clickhouse/mysql-golang-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_GOLANG_CLIENT_TAG", tag) - elif image == "clickhouse/dotnet-client": + if image == "clickhouse/dotnet-client": env_tags += "-e {}={} ".format("DOCKER_DOTNET_CLIENT_TAG", tag) - elif image == "clickhouse/mysql-java-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_JAVA_CLIENT_TAG", tag) - elif image == "clickhouse/mysql-js-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_JS_CLIENT_TAG", tag) - elif image == "clickhouse/mysql-php-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_PHP_CLIENT_TAG", tag) - elif image == "clickhouse/postgresql-java-client": - env_tags += "-e {}={} ".format("DOCKER_POSTGRESQL_JAVA_CLIENT_TAG", tag) elif image == "clickhouse/integration-helper": env_tags += "-e {}={} ".format("DOCKER_HELPER_TAG", tag) elif image == "clickhouse/integration-test": @@ -351,6 +341,18 @@ if __name__ == "__main__": env_tags += "-e {}={} ".format("DOCKER_KERBERIZED_HADOOP_TAG", tag) elif image == "clickhouse/kerberos-kdc": env_tags += "-e {}={} ".format("DOCKER_KERBEROS_KDC_TAG", tag) + elif image == "clickhouse/mysql-golang-client": + env_tags += "-e {}={} ".format("DOCKER_MYSQL_GOLANG_CLIENT_TAG", tag) + elif image == "clickhouse/mysql-java-client": + env_tags += "-e {}={} ".format("DOCKER_MYSQL_JAVA_CLIENT_TAG", tag) + elif image == "clickhouse/mysql-js-client": + env_tags += "-e {}={} ".format("DOCKER_MYSQL_JS_CLIENT_TAG", tag) + elif image == "clickhouse/mysql-php-client": + env_tags += "-e {}={} ".format("DOCKER_MYSQL_PHP_CLIENT_TAG", tag) + elif image == "clickhouse/nginx-dav": + env_tags += "-e {}={} ".format("DOCKER_NGINX_DAV_TAG", tag) + elif image == "clickhouse/postgresql-java-client": + env_tags += "-e {}={} ".format("DOCKER_POSTGRESQL_JAVA_CLIENT_TAG", tag) else: logging.info("Unknown image %s" % (image)) diff --git a/tests/integration/test_access_for_functions/test.py b/tests/integration/test_access_for_functions/test.py index be4d71502d2..5069468110c 100644 --- a/tests/integration/test_access_for_functions/test.py +++ b/tests/integration/test_access_for_functions/test.py @@ -22,7 +22,7 @@ def test_access_rights_for_function(): instance.query("CREATE USER A") instance.query("CREATE USER B") assert ( - "it's necessary to have grant CREATE FUNCTION ON *.*" + "it's necessary to have the grant CREATE FUNCTION ON *.*" in instance.query_and_get_error(create_function_query, user="A") ) @@ -32,7 +32,7 @@ def test_access_rights_for_function(): assert instance.query("SELECT MySum(1, 2)") == "3\n" assert ( - "it's necessary to have grant DROP FUNCTION ON *.*" + "it's necessary to have the grant DROP FUNCTION ON *.*" in instance.query_and_get_error("DROP FUNCTION MySum", user="B") ) @@ -44,7 +44,7 @@ def test_access_rights_for_function(): instance.query("REVOKE CREATE FUNCTION ON *.* FROM A") assert ( - "it's necessary to have grant CREATE FUNCTION ON *.*" + "it's necessary to have the grant CREATE FUNCTION ON *.*" in instance.query_and_get_error(create_function_query, user="A") ) diff --git a/tests/integration/test_alter_moving_garbage/test.py b/tests/integration/test_alter_moving_garbage/test.py index af9fffbb74d..21be46a7e1b 100644 --- a/tests/integration/test_alter_moving_garbage/test.py +++ b/tests/integration/test_alter_moving_garbage/test.py @@ -218,22 +218,32 @@ def test_delete_race_leftovers(cluster): time.sleep(5) # Check that we correctly deleted all outdated parts and no leftovers on s3 - known_remote_paths = set( - node.query( - f"SELECT remote_path FROM system.remote_data_paths WHERE disk_name = 's32'" - ).splitlines() - ) - - all_remote_paths = set( - obj.object_name - for obj in cluster.minio_client.list_objects( - cluster.minio_bucket, "data2/", recursive=True + # Do it with retries because we delete blobs in the background + # and it can be race condition between removing from remote_data_paths and deleting blobs + all_remote_paths = set() + known_remote_paths = set() + for i in range(3): + known_remote_paths = set( + node.query( + f"SELECT remote_path FROM system.remote_data_paths WHERE disk_name = 's32'" + ).splitlines() ) - ) - # Some blobs can be deleted after we listed remote_data_paths - # It's alright, thus we check only that all remote paths are known - # (in other words, all remote paths is subset of known paths) + all_remote_paths = set( + obj.object_name + for obj in cluster.minio_client.list_objects( + cluster.minio_bucket, "data2/", recursive=True + ) + ) + + # Some blobs can be deleted after we listed remote_data_paths + # It's alright, thus we check only that all remote paths are known + # (in other words, all remote paths is subset of known paths) + if all_remote_paths == {p for p in known_remote_paths if p in all_remote_paths}: + break + + time.sleep(1) + assert all_remote_paths == {p for p in known_remote_paths if p in all_remote_paths} # Check that we have all data diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/__init__.py b/tests/integration/test_backup_log/__init__.py similarity index 100% rename from tests/integration/test_keeper_reconfig_replace_leader_in_one_command/__init__.py rename to tests/integration/test_backup_log/__init__.py diff --git a/tests/integration/test_backup_log/configs/config.d/backups.xml b/tests/integration/test_backup_log/configs/config.d/backups.xml new file mode 100644 index 00000000000..5e2e3ee9d71 --- /dev/null +++ b/tests/integration/test_backup_log/configs/config.d/backups.xml @@ -0,0 +1,5 @@ + + + /backups + + diff --git a/tests/integration/test_backup_log/configs/config.xml b/tests/integration/test_backup_log/configs/config.xml new file mode 100644 index 00000000000..e2c81f4068e --- /dev/null +++ b/tests/integration/test_backup_log/configs/config.xml @@ -0,0 +1,8 @@ + + + system + backup_log
+ toYYYYMM(event_date) + 0 +
+
diff --git a/tests/integration/test_backup_log/test.py b/tests/integration/test_backup_log/test.py new file mode 100644 index 00000000000..a1c09d8e091 --- /dev/null +++ b/tests/integration/test_backup_log/test.py @@ -0,0 +1,58 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + + +cluster = ClickHouseCluster(__file__) + +instance = cluster.add_instance( + "instance", + main_configs=["configs/config.xml", "configs/config.d/backups.xml"], + stay_alive=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def backup_table(backup_name): + instance.query("CREATE DATABASE test") + instance.query("CREATE TABLE test.table(x UInt32) ENGINE=MergeTree ORDER BY x") + instance.query("INSERT INTO test.table SELECT number FROM numbers(10)") + return instance.query(f"BACKUP TABLE test.table TO {backup_name}").split("\t")[0] + + +def restore_table(backup_name): + return instance.query(f"RESTORE TABLE test.table FROM {backup_name}").split("\t")[0] + + +def test_backup_log(): + backup_name = "File('/backups/test_backup/')" + assert instance.query("SELECT * FROM system.tables WHERE name = 'backup_log'") == "" + + backup_id = backup_table(backup_name) + assert instance.query( + f"SELECT status, error FROM system.backup_log WHERE id='{backup_id}' ORDER BY event_date, event_time_microseconds" + ) == TSV([["CREATING_BACKUP", ""], ["BACKUP_CREATED", ""]]) + + instance.query("DROP TABLE test.table SYNC") + + restore_id = restore_table(backup_name) + assert instance.query( + f"SELECT status, error FROM system.backup_log WHERE id='{restore_id}' ORDER BY event_date, event_time_microseconds" + ) == TSV([["RESTORING", ""], ["RESTORED", ""]]) + + instance.restart_clickhouse() + + assert instance.query( + f"SELECT status, error FROM system.backup_log WHERE id='{backup_id}' ORDER BY event_date, event_time_microseconds" + ) == TSV([["CREATING_BACKUP", ""], ["BACKUP_CREATED", ""]]) + assert instance.query( + f"SELECT status, error FROM system.backup_log WHERE id='{restore_id}' ORDER BY event_date, event_time_microseconds" + ) == TSV([["RESTORING", ""], ["RESTORED", ""]]) diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index c19cca4126a..4a26a470aab 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -882,7 +882,7 @@ def test_required_privileges(): instance.query("CREATE USER u1") backup_name = new_backup_name() - expected_error = "necessary to have grant BACKUP ON test.table" + expected_error = "necessary to have the grant BACKUP ON test.table" assert expected_error in instance.query_and_get_error( f"BACKUP TABLE test.table TO {backup_name}", user="u1" ) @@ -890,12 +890,12 @@ def test_required_privileges(): instance.query("GRANT BACKUP ON test.table TO u1") instance.query(f"BACKUP TABLE test.table TO {backup_name}", user="u1") - expected_error = "necessary to have grant INSERT, CREATE TABLE ON test.table" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON test.table" assert expected_error in instance.query_and_get_error( f"RESTORE TABLE test.table FROM {backup_name}", user="u1" ) - expected_error = "necessary to have grant INSERT, CREATE TABLE ON test.table2" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON test.table2" assert expected_error in instance.query_and_get_error( f"RESTORE TABLE test.table AS test.table2 FROM {backup_name}", user="u1" ) @@ -907,7 +907,7 @@ def test_required_privileges(): instance.query("DROP TABLE test.table") - expected_error = "necessary to have grant INSERT, CREATE TABLE ON test.table" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON test.table" assert expected_error in instance.query_and_get_error( f"RESTORE ALL FROM {backup_name}", user="u1" ) @@ -1014,14 +1014,14 @@ def test_system_users_required_privileges(): backup_name = new_backup_name() - expected_error = "necessary to have grant BACKUP ON system.users" + expected_error = "necessary to have the grant BACKUP ON system.users" assert expected_error in instance.query_and_get_error( f"BACKUP TABLE system.users, TABLE system.roles TO {backup_name}", user="u2" ) instance.query("GRANT BACKUP ON system.users TO u2") - expected_error = "necessary to have grant BACKUP ON system.roles" + expected_error = "necessary to have the grant BACKUP ON system.roles" assert expected_error in instance.query_and_get_error( f"BACKUP TABLE system.users, TABLE system.roles TO {backup_name}", user="u2" ) @@ -1035,7 +1035,7 @@ def test_system_users_required_privileges(): instance.query("DROP ROLE r1") expected_error = ( - "necessary to have grant CREATE USER, CREATE ROLE, ROLE ADMIN ON *.*" + "necessary to have the grant CREATE USER, CREATE ROLE, ROLE ADMIN ON *.*" ) assert expected_error in instance.query_and_get_error( f"RESTORE ALL FROM {backup_name}", user="u2" @@ -1043,7 +1043,7 @@ def test_system_users_required_privileges(): instance.query("GRANT CREATE USER, CREATE ROLE, ROLE ADMIN ON *.* TO u2") - expected_error = "necessary to have grant SELECT ON test.* WITH GRANT OPTION" + expected_error = "necessary to have the grant SELECT ON test.* WITH GRANT OPTION" assert expected_error in instance.query_and_get_error( f"RESTORE ALL FROM {backup_name}", user="u2" ) @@ -1236,6 +1236,7 @@ def test_backup_all(exclude_system_log_tables): "transactions_info_log", "processors_profile_log", "asynchronous_insert_log", + "backup_log", ] exclude_from_backup += ["system." + table_name for table_name in log_tables] diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 39496b8a5c8..dfce2f15413 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -276,6 +276,37 @@ def test_table_with_parts_in_queue_considered_non_empty(): ) +def test_replicated_table_with_uuid_in_zkpath(): + node1.query( + "CREATE TABLE tbl ON CLUSTER 'cluster' (" + "x UInt8, y String" + ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/{uuid}','{replica}')" + "ORDER BY x" + ) + + node1.query("INSERT INTO tbl VALUES (1, 'AA')") + node2.query("INSERT INTO tbl VALUES (2, 'BB')") + + backup_name = new_backup_name() + node1.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}") + + # The table `tbl2` is expected to have a different UUID so it's ok to have both `tbl` and `tbl2` at the same time. + node2.query(f"RESTORE TABLE tbl AS tbl2 ON CLUSTER 'cluster' FROM {backup_name}") + + node1.query("INSERT INTO tbl2 VALUES (3, 'CC')") + + node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl") + node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl2") + + for instance in [node1, node2]: + assert instance.query("SELECT * FROM tbl ORDER BY x") == TSV( + [[1, "AA"], [2, "BB"]] + ) + assert instance.query("SELECT * FROM tbl2 ORDER BY x") == TSV( + [[1, "AA"], [2, "BB"], [3, "CC"]] + ) + + def test_replicated_table_with_not_synced_insert(): node1.query( "CREATE TABLE tbl ON CLUSTER 'cluster' (" @@ -561,7 +592,7 @@ def test_required_privileges(): node1.query("GRANT CLUSTER ON *.* TO u1") backup_name = new_backup_name() - expected_error = "necessary to have grant BACKUP ON default.tbl" + expected_error = "necessary to have the grant BACKUP ON default.tbl" assert expected_error in node1.query_and_get_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}", user="u1" ) @@ -571,7 +602,7 @@ def test_required_privileges(): node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC") - expected_error = "necessary to have grant INSERT, CREATE TABLE ON default.tbl2" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON default.tbl2" assert expected_error in node1.query_and_get_error( f"RESTORE TABLE tbl AS tbl2 ON CLUSTER 'cluster' FROM {backup_name}", user="u1" ) @@ -587,7 +618,7 @@ def test_required_privileges(): node1.query(f"DROP TABLE tbl2 ON CLUSTER 'cluster' SYNC") node1.query("REVOKE ALL FROM u1") - expected_error = "necessary to have grant INSERT, CREATE TABLE ON default.tbl" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON default.tbl" assert expected_error in node1.query_and_get_error( f"RESTORE ALL ON CLUSTER 'cluster' FROM {backup_name}", user="u1" ) @@ -607,7 +638,7 @@ def test_system_users(): node1.query("CREATE USER u2 SETTINGS allow_backup=false") node1.query("GRANT CLUSTER ON *.* TO u2") - expected_error = "necessary to have grant BACKUP ON system.users" + expected_error = "necessary to have the grant BACKUP ON system.users" assert expected_error in node1.query_and_get_error( f"BACKUP TABLE system.users ON CLUSTER 'cluster' TO {backup_name}", user="u2" ) @@ -619,14 +650,16 @@ def test_system_users(): node1.query("DROP USER u1") - expected_error = "necessary to have grant CREATE USER ON *.*" + expected_error = "necessary to have the grant CREATE USER ON *.*" assert expected_error in node1.query_and_get_error( f"RESTORE TABLE system.users ON CLUSTER 'cluster' FROM {backup_name}", user="u2" ) node1.query("GRANT CREATE USER ON *.* TO u2") - expected_error = "necessary to have grant SELECT ON default.tbl WITH GRANT OPTION" + expected_error = ( + "necessary to have the grant SELECT ON default.tbl WITH GRANT OPTION" + ) assert expected_error in node1.query_and_get_error( f"RESTORE TABLE system.users ON CLUSTER 'cluster' FROM {backup_name}", user="u2" ) diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index d0ce2e03016..c9f20333654 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -5,6 +5,7 @@ import time import concurrent from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV, assert_eq_with_retry +import re cluster = ClickHouseCluster(__file__) @@ -110,6 +111,73 @@ def create_and_fill_table(): nodes[i].query(f"INSERT INTO tbl SELECT number FROM numbers(40000000)") +def wait_for_fail_backup(node, backup_id, backup_name): + expected_errors = [ + "Concurrent backups not supported", + f"Backup {backup_name} already exists", + ] + status = node.query( + f"SELECT status FROM system.backups WHERE id == '{backup_id}'" + ).rstrip("\n") + # It is possible that the second backup was picked up first, and then the async backup + if status == "BACKUP_FAILED": + error = node.query( + f"SELECT error FROM system.backups WHERE id == '{backup_id}'" + ).rstrip("\n") + assert any([expected_error in error for expected_error in expected_errors]) + return + elif status == "CREATING_BACKUP": + assert_eq_with_retry( + node, + f"SELECT status FROM system.backups WHERE id = '{backup_id}'", + "BACKUP_FAILED", + sleep_time=2, + retry_count=50, + ) + error = node.query( + f"SELECT error FROM system.backups WHERE id == '{backup_id}'" + ).rstrip("\n") + assert re.search(f"Backup {backup_name} already exists", error) + return + else: + assert False, "Concurrent backups both passed, when one is expected to fail" + + +def wait_for_fail_restore(node, restore_id): + expected_errors = [ + "Concurrent restores not supported", + "Cannot restore the table default.tbl because it already contains some data", + ] + status = node.query( + f"SELECT status FROM system.backups WHERE id == '{restore_id}'" + ).rstrip("\n") + # It is possible that the second backup was picked up first, and then the async backup + if status == "RESTORE_FAILED": + error = node.query( + f"SELECT error FROM system.backups WHERE id == '{restore_id}'" + ).rstrip("\n") + assert any([expected_error in error for expected_error in expected_errors]) + return + elif status == "RESTORING": + assert_eq_with_retry( + node, + f"SELECT status FROM system.backups WHERE id = '{backup_id}'", + "RESTORE_FAILED", + sleep_time=2, + retry_count=50, + ) + error = node.query( + f"SELECT error FROM system.backups WHERE id == '{backup_id}'" + ).rstrip("\n") + assert re.search( + "Cannot restore the table default.tbl because it already contains some data", + error, + ) + return + else: + assert False, "Concurrent restores both passed, when one is expected to fail" + + # All the tests have concurrent backup/restores with same backup names # The same works with different backup names too. Since concurrency # check comes before backup name check, separate tests are not added for different names @@ -133,13 +201,17 @@ def test_concurrent_backups_on_same_node(): ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - error = nodes[0].query_and_get_error( + result, error = nodes[0].query_and_get_answer_with_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" ) + expected_errors = [ "Concurrent backups not supported", f"Backup {backup_name} already exists", ] + if not error: + wait_for_fail_backup(nodes[0], id, backup_name) + assert any([expected_error in error for expected_error in expected_errors]) assert_eq_with_retry( @@ -179,13 +251,18 @@ def test_concurrent_backups_on_different_nodes(): ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - error = nodes[0].query_and_get_error( + result, error = nodes[0].query_and_get_answer_with_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" ) + expected_errors = [ "Concurrent backups not supported", f"Backup {backup_name} already exists", ] + + if not error: + wait_for_fail_backup(nodes[1], id, backup_name) + assert any([expected_error in error for expected_error in expected_errors]) assert_eq_with_retry( @@ -224,13 +301,18 @@ def test_concurrent_restores_on_same_node(): ) assert status in ["RESTORING", "RESTORED"] - error = nodes[0].query_and_get_error( + result, error = nodes[0].query_and_get_answer_with_error( f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" ) + expected_errors = [ "Concurrent restores not supported", "Cannot restore the table default.tbl because it already contains some data", ] + + if not error: + wait_for_fail_restore(nodes[0], restore_id) + assert any([expected_error in error for expected_error in expected_errors]) assert_eq_with_retry( @@ -269,13 +351,18 @@ def test_concurrent_restores_on_different_node(): ) assert status in ["RESTORING", "RESTORED"] - error = nodes[1].query_and_get_error( + result, error = nodes[1].query_and_get_answer_with_error( f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" ) + expected_errors = [ "Concurrent restores not supported", "Cannot restore the table default.tbl because it already contains some data", ] + + if not error: + wait_for_fail_restore(nodes[0], restore_id) + assert any([expected_error in error for expected_error in expected_errors]) assert_eq_with_retry( diff --git a/tests/integration/test_backup_restore_s3/configs/disk_s3.xml b/tests/integration/test_backup_restore_s3/configs/disk_s3.xml index c1fd059bc67..d635e39e13f 100644 --- a/tests/integration/test_backup_restore_s3/configs/disk_s3.xml +++ b/tests/integration/test_backup_restore_s3/configs/disk_s3.xml @@ -21,6 +21,13 @@ minio123 33554432 + + cache + disk_s3 + /tmp/s3_cache/ + 1000000000 + 1 + @@ -37,11 +44,19 @@
+ + +
+ disk_s3_cache +
+
+
default disk_s3 disk_s3_plain + disk_s3_cache diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 8701bf0d832..f8ec39d240b 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -57,29 +57,42 @@ def get_events_for_query(query_id: str) -> Dict[str, int]: } +def format_settings(settings): + if not settings: + return "" + return "SETTINGS " + ",".join(f"{k}={v}" for k, v in settings.items()) + + def check_backup_and_restore( storage_policy, backup_destination, size=1000, - backup_name=None, + backup_settings=None, + restore_settings=None, + insert_settings=None, + optimize_table=True, ): + optimize_table_query = "OPTIMIZE TABLE data FINAL;" if optimize_table else "" + node.query( f""" DROP TABLE IF EXISTS data SYNC; CREATE TABLE data (key Int, value String, array Array(String)) Engine=MergeTree() ORDER BY tuple() SETTINGS storage_policy='{storage_policy}'; - INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT {size}; - OPTIMIZE TABLE data FINAL; + INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT {size} {format_settings(insert_settings)}; + {optimize_table_query} """ ) + try: backup_query_id = uuid.uuid4().hex node.query( - f"BACKUP TABLE data TO {backup_destination}", query_id=backup_query_id + f"BACKUP TABLE data TO {backup_destination} {format_settings(backup_settings)}", + query_id=backup_query_id, ) restore_query_id = uuid.uuid4().hex node.query( f""" - RESTORE TABLE data AS data_restored FROM {backup_destination}; + RESTORE TABLE data AS data_restored FROM {backup_destination} {format_settings(restore_settings)}; """, query_id=restore_query_id, ) @@ -114,6 +127,7 @@ def check_system_tables(): expected_disks = ( ("default", "local"), ("disk_s3", "s3"), + ("disk_s3_cache", "s3"), ("disk_s3_other_bucket", "s3"), ("disk_s3_plain", "s3_plain"), ) @@ -184,7 +198,6 @@ def test_backup_to_s3_multipart(): storage_policy, backup_destination, size=1000000, - backup_name=backup_name, ) assert node.contains_in_log( f"copyDataToS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}" @@ -312,3 +325,77 @@ def test_incremental_backup_append_table_def(): assert node.query("SELECT count(), sum(x) FROM data") == "100\t4950\n" assert "parts_to_throw_insert = 100" in node.query("SHOW CREATE TABLE data") + + +@pytest.mark.parametrize( + "in_cache_initially, allow_backup_read_cache, allow_s3_native_copy", + [ + (False, True, False), + (True, False, False), + (True, True, False), + (True, True, True), + ], +) +def test_backup_with_fs_cache( + in_cache_initially, allow_backup_read_cache, allow_s3_native_copy +): + storage_policy = "policy_s3_cache" + + backup_name = new_backup_name() + backup_destination = ( + f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" + ) + + insert_settings = { + "enable_filesystem_cache_on_write_operations": int(in_cache_initially) + } + + backup_settings = { + "read_from_filesystem_cache": int(allow_backup_read_cache), + "allow_s3_native_copy": int(allow_s3_native_copy), + } + + restore_settings = {"allow_s3_native_copy": int(allow_s3_native_copy)} + + backup_events, restore_events = check_backup_and_restore( + storage_policy, + backup_destination, + size=10, + insert_settings=insert_settings, + optimize_table=False, + backup_settings=backup_settings, + restore_settings=restore_settings, + ) + + # print(f"backup_events = {backup_events}") + # print(f"restore_events = {restore_events}") + + # BACKUP never updates the filesystem cache but it may read it if `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` allows that. + if allow_backup_read_cache and in_cache_initially: + assert backup_events["CachedReadBufferReadFromCacheBytes"] > 0 + assert not "CachedReadBufferReadFromSourceBytes" in backup_events + elif allow_backup_read_cache: + assert not "CachedReadBufferReadFromCacheBytes" in backup_events + assert backup_events["CachedReadBufferReadFromSourceBytes"] > 0 + else: + assert not "CachedReadBufferReadFromCacheBytes" in backup_events + assert not "CachedReadBufferReadFromSourceBytes" in backup_events + + assert not "CachedReadBufferCacheWriteBytes" in backup_events + assert not "CachedWriteBufferCacheWriteBytes" in backup_events + + # RESTORE doesn't use the filesystem cache during write operations. + # However while attaching parts it may use the cache while reading such files as "columns.txt" or "checksums.txt" or "primary.idx", + # see IMergeTreeDataPart::loadColumnsChecksumsIndexes() + if "CachedReadBufferReadFromSourceBytes" in restore_events: + assert ( + restore_events["CachedReadBufferReadFromSourceBytes"] + == restore_events["CachedReadBufferCacheWriteBytes"] + ) + + assert not "CachedReadBufferReadFromCacheBytes" in restore_events + + # "format_version.txt" is written when a table is created, + # see MergeTreeData::initializeDirectoriesAndFormatVersion() + if "CachedWriteBufferCacheWriteBytes" in restore_events: + assert restore_events["CachedWriteBufferCacheWriteBytes"] <= 1 diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/__init__.py b/tests/integration/test_backup_restore_storage_policy/__init__.py similarity index 100% rename from tests/integration/test_replicated_merge_tree_hdfs_zero_copy/__init__.py rename to tests/integration/test_backup_restore_storage_policy/__init__.py diff --git a/tests/integration/test_backup_restore_storage_policy/configs/storage_config.xml b/tests/integration/test_backup_restore_storage_policy/configs/storage_config.xml new file mode 100644 index 00000000000..de9f68cbe87 --- /dev/null +++ b/tests/integration/test_backup_restore_storage_policy/configs/storage_config.xml @@ -0,0 +1,33 @@ + + + + + local + /var/lib/disks/one/ + + + local + /var/lib/disks/two/ + + + + + + + one + + + + + + + two + + + + + + + /backups + + diff --git a/tests/integration/test_backup_restore_storage_policy/test.py b/tests/integration/test_backup_restore_storage_policy/test.py new file mode 100644 index 00000000000..4e9d309a220 --- /dev/null +++ b/tests/integration/test_backup_restore_storage_policy/test.py @@ -0,0 +1,76 @@ +import pytest +from helpers.cluster import ClickHouseCluster + + +backup_id_counter = 0 + +cluster = ClickHouseCluster(__file__) + +instance = cluster.add_instance( + "instance", + main_configs=["configs/storage_config.xml"], +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def cleanup_after_test(): + try: + yield + finally: + instance.query("DROP DATABASE IF EXISTS test") + + +def new_backup_name(): + global backup_id_counter + backup_id_counter += 1 + return f"File('/backups/{backup_id_counter}/')" + + +def create_table_backup(backup_name, storage_policy=None): + instance.query("CREATE DATABASE test") + create_query = "CREATE TABLE test.table(x UInt32) ENGINE=MergeTree ORDER BY x" + if storage_policy is not None: + create_query += f" SETTINGS storage_policy = '{storage_policy}'" + instance.query(create_query) + instance.query(f"INSERT INTO test.table SELECT number FROM numbers(10)") + instance.query(f"BACKUP TABLE test.table TO {backup_name}") + instance.query("DROP TABLE test.table SYNC") + + +def restore_table(backup_name, storage_policy=None): + restore_query = f"RESTORE TABLE test.table FROM {backup_name}" + if storage_policy is not None: + restore_query += f" SETTINGS storage_policy = '{storage_policy}'" + instance.query(restore_query) + + +@pytest.mark.parametrize( + "origin_policy, restore_policy, expected_policy", + [ + (None, "", "default"), + (None, None, "default"), + (None, "policy1", "policy1"), + ("policy1", "policy1", "policy1"), + ("policy1", "policy2", "policy2"), + ("policy1", "", "default"), + ("policy1", None, "policy1"), + ], +) +def test_storage_policies(origin_policy, restore_policy, expected_policy): + backup_name = new_backup_name() + create_table_backup(backup_name, origin_policy) + restore_table(backup_name, restore_policy) + + assert ( + instance.query("SELECT storage_policy FROM system.tables WHERE name='table'") + == f"{expected_policy}\n" + ) diff --git a/tests/integration/test_s3_with_proxy/__init__.py b/tests/integration/test_backup_s3_storage_class/__init__.py similarity index 100% rename from tests/integration/test_s3_with_proxy/__init__.py rename to tests/integration/test_backup_s3_storage_class/__init__.py diff --git a/tests/integration/test_backup_s3_storage_class/test.py b/tests/integration/test_backup_s3_storage_class/test.py new file mode 100644 index 00000000000..2b11f20afc6 --- /dev/null +++ b/tests/integration/test_backup_s3_storage_class/test.py @@ -0,0 +1,47 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + stay_alive=True, + with_minio=True, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_backup_s3_storage_class(started_cluster): + node.query( + """ + CREATE TABLE test_s3_storage_class + ( + `id` UInt64, + `value` String + ) + ENGINE = MergeTree + ORDER BY id; + """, + ) + node.query( + """ + INSERT INTO test_s3_storage_class VALUES (1, 'a'); + """, + ) + result = node.query( + """ + BACKUP TABLE test_s3_storage_class TO S3('http://minio1:9001/root/data', 'minio', 'minio123') + SETTINGS s3_storage_class='STANDARD'; + """ + ) + + minio = cluster.minio_client + lst = list(minio.list_objects(cluster.minio_bucket, "data/.backup")) + assert lst[0].storage_class == "STANDARD" diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index fa24b146fec..607ac74d725 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -143,10 +143,16 @@ def test_string_functions(start_cluster): "position", "substring", "CAST", + "getTypeSerializationStreams", # NOTE: no need to ignore now()/now64() since they will fail because they don't accept any argument # 22.8 Backward Incompatible Change: Extended range of Date32 "toDate32OrZero", "toDate32OrDefault", + # 23.9 changed the base64-handling library from Turbo base64 to aklomp-base64. They differ in the way they deal with base64 values + # that are not properly padded by '=', for example below test value v='foo'. (Depending on the specification/context, padding is + # mandatory or optional). The former lib produces a value based on implicit padding, the latter lib throws an error. + "FROM_BASE64", + "base64Decode", ] functions = filter(lambda x: x not in excludes, functions) diff --git a/tests/integration/test_broken_detached_part_clean_up/test.py b/tests/integration/test_broken_detached_part_clean_up/test.py index 9a70ebe0d48..bdf993ddedf 100644 --- a/tests/integration/test_broken_detached_part_clean_up/test.py +++ b/tests/integration/test_broken_detached_part_clean_up/test.py @@ -57,27 +57,30 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): ] ) - node.exec_in_container(["mkdir", f"{path_to_detached}../unexpected_all_42_1337_5"]) - node.exec_in_container( - [ - "touch", - "-t", - "1312031429.30", - f"{path_to_detached}../unexpected_all_42_1337_5", - ] - ) - result = node.exec_in_container( - ["stat", f"{path_to_detached}../unexpected_all_42_1337_5"] - ) - print(result) - assert "Modify: 2013-12-03" in result - node.exec_in_container( - [ - "mv", - f"{path_to_detached}../unexpected_all_42_1337_5", - f"{path_to_detached}unexpected_all_42_1337_5", - ] - ) + for name in [ + "unexpected_all_42_1337_5", + "deleting_all_123_456_7", + "covered-by-broken_all_12_34_5", + ]: + node.exec_in_container(["mkdir", f"{path_to_detached}../{name}"]) + node.exec_in_container( + [ + "touch", + "-t", + "1312031429.30", + f"{path_to_detached}../{name}", + ] + ) + result = node.exec_in_container(["stat", f"{path_to_detached}../{name}"]) + print(result) + assert "Modify: 2013-12-03" in result + node.exec_in_container( + [ + "mv", + f"{path_to_detached}../{name}", + f"{path_to_detached}{name}", + ] + ) result = node.query( f"CHECK TABLE {table}", settings={"check_query_single_value_result": 0} @@ -87,17 +90,20 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): node.query(f"DETACH TABLE {table}") node.query(f"ATTACH TABLE {table}") - result = node.exec_in_container(["ls", path_to_detached]) - print(result) - assert f"{expect_broken_prefix}_all_3_3_0" in result - assert "all_1_1_0" in result - assert "trash" in result - assert "broken_all_fake" in result - assert "unexpected_all_42_1337_5" in result - - time.sleep(15) - assert node.contains_in_log( - "Removed broken detached part unexpected_all_42_1337_5 due to a timeout" + node.wait_for_log_line( + "Removing detached part deleting_all_123_456_7", + timeout=90, + look_behind_lines=1000000, + ) + node.wait_for_log_line( + f"Removed broken detached part {expect_broken_prefix}_all_3_3_0 due to a timeout", + timeout=10, + look_behind_lines=1000000, + ) + node.wait_for_log_line( + "Removed broken detached part unexpected_all_42_1337_5 due to a timeout", + timeout=10, + look_behind_lines=1000000, ) result = node.exec_in_container(["ls", path_to_detached]) @@ -106,7 +112,16 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): assert "all_1_1_0" in result assert "trash" in result assert "broken_all_fake" in result + assert "covered-by-broken_all_12_34_5" in result assert "unexpected_all_42_1337_5" not in result + assert "deleting_all_123_456_7" not in result + + node.query( + f"ALTER TABLE {table} DROP DETACHED PART 'covered-by-broken_all_12_34_5'", + settings={"allow_drop_detached": 1}, + ) + result = node.exec_in_container(["ls", path_to_detached]) + assert "covered-by-broken_all_12_34_5" not in result node.query(f"DROP TABLE {table} SYNC") diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index a80ad93d53d..c40e2a31a8b 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -91,7 +91,7 @@ def get_counters(node, query_id, log_type="ExceptionWhileProcessing"): def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression): node = cluster.instances["node"] - broken_s3.setup_error_at_create_multi_part_upload() + broken_s3.setup_at_create_multi_part_upload() insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_FAIL_CREATE_MPU_{compression}" error = node.query_and_get_error( @@ -134,7 +134,7 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload( node = cluster.instances["node"] broken_s3.setup_fake_multpartuploads() - broken_s3.setup_error_at_part_upload(count=1, after=2) + broken_s3.setup_at_part_upload(count=1, after=2) insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_FAIL_UPLOAD_PART_{compression}" error = node.query_and_get_error( @@ -165,3 +165,302 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload( assert count_create_multi_part_uploads == 1 assert count_upload_parts >= 2 assert count_s3_errors >= 2 + + +def test_when_s3_connection_refused_is_retried(cluster, broken_s3): + node = cluster.instances["node"] + + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_part_upload(count=3, after=2, action="connection_refused") + + insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED" + node.query( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_refused_at_write_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters( + node, insert_query_id, log_type="QueryFinish" + ) + assert count_create_multi_part_uploads == 1 + assert count_upload_parts == 39 + assert count_s3_errors == 3 + + broken_s3.setup_at_part_upload(count=1000, after=2, action="connection_refused") + insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED_1" + error = node.query_and_get_error( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_refused_at_write_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + assert "Code: 499" in error, error + assert ( + "Poco::Exception. Code: 1000, e.code() = 111, Connection refused" in error + ), error + + +@pytest.mark.parametrize("send_something", [True, False]) +def test_when_s3_connection_reset_by_peer_at_upload_is_retried( + cluster, broken_s3, send_something +): + node = cluster.instances["node"] + + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_part_upload( + count=3, + after=2, + action="connection_reset_by_peer", + action_args=["1"] if send_something else ["0"], + ) + + insert_query_id = ( + f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_UPLOAD_{send_something}" + ) + node.query( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters( + node, insert_query_id, log_type="QueryFinish" + ) + + assert count_create_multi_part_uploads == 1 + assert count_upload_parts == 39 + assert count_s3_errors == 3 + + broken_s3.setup_at_part_upload( + count=1000, + after=2, + action="connection_reset_by_peer", + action_args=["1"] if send_something else ["0"], + ) + insert_query_id = ( + f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_UPLOAD_{send_something}_1" + ) + error = node.query_and_get_error( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + assert "Code: 1000" in error, error + assert ( + "DB::Exception: Connection reset by peer." in error + or "DB::Exception: Poco::Exception. Code: 1000, e.code() = 104, Connection reset by peer" + in error + ), error + + +@pytest.mark.parametrize("send_something", [True, False]) +def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( + cluster, broken_s3, send_something +): + node = cluster.instances["node"] + + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_create_multi_part_upload( + count=3, + after=0, + action="connection_reset_by_peer", + action_args=["1"] if send_something else ["0"], + ) + + insert_query_id = ( + f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_MULTIPARTUPLOAD_{send_something}" + ) + node.query( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_create_mpu_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters( + node, insert_query_id, log_type="QueryFinish" + ) + + assert count_create_multi_part_uploads == 1 + assert count_upload_parts == 39 + assert count_s3_errors == 3 + + broken_s3.setup_at_create_multi_part_upload( + count=1000, + after=0, + action="connection_reset_by_peer", + action_args=["1"] if send_something else ["0"], + ) + + insert_query_id = ( + f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_MULTIPARTUPLOAD_{send_something}_1" + ) + error = node.query_and_get_error( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_create_mpu_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + assert "Code: 1000" in error, error + assert ( + "DB::Exception: Connection reset by peer." in error + or "DB::Exception: Poco::Exception. Code: 1000, e.code() = 104, Connection reset by peer" + in error + ), error + + +def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): + node = cluster.instances["node"] + + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_part_upload( + count=3, + after=2, + action="broken_pipe", + ) + + insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD" + node.query( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=1000000, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters( + node, insert_query_id, log_type="QueryFinish" + ) + + assert count_create_multi_part_uploads == 1 + assert count_upload_parts == 7 + assert count_s3_errors == 3 + + broken_s3.setup_at_part_upload( + count=1000, + after=2, + action="broken_pipe", + ) + insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD_1" + error = node.query_and_get_error( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=1000000, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + assert "Code: 1000" in error, error + assert ( + "DB::Exception: Poco::Exception. Code: 1000, e.code() = 32, I/O error: Broken pipe" + in error + ), error diff --git a/tests/integration/test_cluster_all_replicas/configs/remote_servers.xml b/tests/integration/test_cluster_all_replicas/configs/remote_servers.xml index 8791c5f6ee8..3f01f4ad7c5 100644 --- a/tests/integration/test_cluster_all_replicas/configs/remote_servers.xml +++ b/tests/integration/test_cluster_all_replicas/configs/remote_servers.xml @@ -1,6 +1,6 @@ - + node1 @@ -11,6 +11,52 @@ 9000 - + + + + + node1 + 9000 + + + node2 + 9000 + + + node3 + 9000 + + + + + + + node1 + 9000 + + + node2 + 9000 + + + node3 + 9000 + + + + + node4 + 9000 + + + node5 + 9000 + + + node6 + 9000 + + + diff --git a/tests/integration/test_cluster_all_replicas/test.py b/tests/integration/test_cluster_all_replicas/test.py index 445eef64fcb..272e0183fe9 100644 --- a/tests/integration/test_cluster_all_replicas/test.py +++ b/tests/integration/test_cluster_all_replicas/test.py @@ -1,15 +1,12 @@ import pytest from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance( - "node1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True -) -node2 = cluster.add_instance( - "node2", main_configs=["configs/remote_servers.xml"], with_zookeeper=True -) +node1 = cluster.add_instance("node1", main_configs=["configs/remote_servers.xml"]) +node2 = cluster.add_instance("node2", main_configs=["configs/remote_servers.xml"]) @pytest.fixture(scope="module") @@ -21,14 +18,50 @@ def start_cluster(): cluster.shutdown() -def test_remote(start_cluster): +def test_cluster(start_cluster): assert ( node1.query( - """SELECT hostName() FROM clusterAllReplicas("two_shards", system.one)""" + "SELECT hostName() FROM clusterAllReplicas('one_shard_two_nodes', system.one)" ) == "node1\nnode2\n" ) assert ( - node1.query("""SELECT hostName() FROM cluster("two_shards", system.one)""") + node1.query("SELECT hostName() FROM cluster('one_shard_two_nodes', system.one)") == "node1\n" ) + assert ( + node2.query("SELECT hostName() FROM cluster('one_shard_two_nodes', system.one)") + == "node2\n" + ) + + +@pytest.mark.parametrize( + "cluster", + [ + pytest.param("one_shard_three_nodes"), + pytest.param("two_shards_three_nodes"), + ], +) +def test_skip_unavailable_replica(start_cluster, cluster): + assert ( + node1.query( + f"SELECT hostName() FROM clusterAllReplicas('{cluster}', system.one) settings skip_unavailable_shards=1" + ) + == "node1\nnode2\n" + ) + + +@pytest.mark.parametrize( + "cluster", + [ + pytest.param("one_shard_three_nodes"), + pytest.param("two_shards_three_nodes"), + ], +) +def test_error_on_unavailable_replica(start_cluster, cluster): + # clusterAllReplicas() consider each replica as shard + # so when skip_unavailable_shards=0 - any unavailable replica should lead to an error + with pytest.raises(QueryRuntimeException): + node1.query( + f"SELECT hostName() FROM clusterAllReplicas('{cluster}', system.one) settings skip_unavailable_shards=0" + ) diff --git a/tests/integration/test_cluster_copier/test_two_nodes.py b/tests/integration/test_cluster_copier/test_two_nodes.py index 2b6fcf6cac2..1bd3561f24f 100644 --- a/tests/integration/test_cluster_copier/test_two_nodes.py +++ b/tests/integration/test_cluster_copier/test_two_nodes.py @@ -595,24 +595,3 @@ def execute_task(started_cluster, task, cmd_options): task.check() finally: zk.delete(task.zk_task_path, recursive=True) - - -# Tests -@pytest.mark.skip(reason="Too flaky :(") -def test_different_schema(started_cluster): - execute_task(started_cluster, TaskWithDifferentSchema(started_cluster), []) - - -@pytest.mark.skip(reason="Too flaky :(") -def test_ttl_columns(started_cluster): - execute_task(started_cluster, TaskTTL(started_cluster), []) - - -@pytest.mark.skip(reason="Too flaky :(") -def test_skip_index(started_cluster): - execute_task(started_cluster, TaskSkipIndex(started_cluster), []) - - -@pytest.mark.skip(reason="Too flaky :(") -def test_ttl_move_to_volume(started_cluster): - execute_task(started_cluster, TaskTTLMoveToVolume(started_cluster), []) diff --git a/tests/integration/test_cluster_discovery/config/config.xml b/tests/integration/test_cluster_discovery/config/config.xml index 70cb010fe0e..a63ca3e5438 100644 --- a/tests/integration/test_cluster_discovery/config/config.xml +++ b/tests/integration/test_cluster_discovery/config/config.xml @@ -20,4 +20,8 @@
+ + + + diff --git a/tests/integration/test_composable_protocols/configs/config.xml b/tests/integration/test_composable_protocols/configs/config.xml index f42bff335ef..09a512eb5a4 100644 --- a/tests/integration/test_composable_protocols/configs/config.xml +++ b/tests/integration/test_composable_protocols/configs/config.xml @@ -61,4 +61,6 @@ + + true diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py index df74cfffa54..aa5a1e766e6 100644 --- a/tests/integration/test_composable_protocols/test.py +++ b/tests/integration/test_composable_protocols/test.py @@ -84,7 +84,7 @@ def test_connections(): assert execute_query_https(server.ip_address, 8444, "SELECT 1") == "1\n" - data = "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007default\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\vClickHouse \024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" + data = "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007default\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\21ClickHouse client\024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" assert ( netcat(server.ip_address, 9100, bytearray(data, "latin-1")).find( bytearray("Hello, world", "latin-1") @@ -92,7 +92,7 @@ def test_connections(): >= 0 ) - data_user_allowed = "PROXY TCP4 123.123.123.123 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007user123\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\vClickHouse \024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" + data_user_allowed = "PROXY TCP4 123.123.123.123 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007user123\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\21ClickHouse client\024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" assert ( netcat(server.ip_address, 9100, bytearray(data_user_allowed, "latin-1")).find( bytearray("Hello, world", "latin-1") @@ -100,7 +100,7 @@ def test_connections(): >= 0 ) - data_user_restricted = "PROXY TCP4 127.0.0.1 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007user123\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\vClickHouse \024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" + data_user_restricted = "PROXY TCP4 127.0.0.1 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007user123\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\21ClickHouse client\024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" assert ( netcat( server.ip_address, 9100, bytearray(data_user_restricted, "latin-1") diff --git a/tests/integration/test_compression_codec_read/test.py b/tests/integration/test_compression_codec_read/test.py index 38cd61e241d..b39e5147d38 100644 --- a/tests/integration/test_compression_codec_read/test.py +++ b/tests/integration/test_compression_codec_read/test.py @@ -11,6 +11,7 @@ node1 = cluster.add_instance( tag="20.8.11.17", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_concurrent_backups_s3/test.py b/tests/integration/test_concurrent_backups_s3/test.py index b29058865c0..312ebdba5e3 100644 --- a/tests/integration/test_concurrent_backups_s3/test.py +++ b/tests/integration/test_concurrent_backups_s3/test.py @@ -24,6 +24,7 @@ def start_cluster(): cluster.shutdown() +@pytest.mark.skip(reason="broken test") def test_concurrent_backups(start_cluster): node.query("DROP TABLE IF EXISTS s3_test SYNC") columns = [f"column_{i} UInt64" for i in range(1000)] diff --git a/tests/integration/test_concurrent_threads_soft_limit/test.py b/tests/integration/test_concurrent_threads_soft_limit/test.py index 2f76f44ddc2..d1e233ee12f 100644 --- a/tests/integration/test_concurrent_threads_soft_limit/test.py +++ b/tests/integration/test_concurrent_threads_soft_limit/test.py @@ -51,6 +51,7 @@ def test_concurrent_threads_soft_limit_default(started_cluster): ) +@pytest.mark.skip(reason="broken test") def test_concurrent_threads_soft_limit_defined_50(started_cluster): node2.query( "SELECT count(*) FROM numbers_mt(10000000)", @@ -65,6 +66,7 @@ def test_concurrent_threads_soft_limit_defined_50(started_cluster): ) +@pytest.mark.skip(reason="broken test") def test_concurrent_threads_soft_limit_defined_1(started_cluster): node3.query( "SELECT count(*) FROM numbers_mt(10000000)", @@ -82,6 +84,7 @@ def test_concurrent_threads_soft_limit_defined_1(started_cluster): # In config_limit_reached.xml there is concurrent_threads_soft_limit=10 # Background query starts in a separate thread to reach this limit. # When this limit is reached the foreground query gets less than 5 queries despite the fact that it has settings max_threads=5 +@pytest.mark.skip(reason="broken test") def test_concurrent_threads_soft_limit_limit_reached(started_cluster): def background_query(): try: diff --git a/tests/integration/test_concurrent_ttl_merges/configs/users.xml b/tests/integration/test_concurrent_ttl_merges/configs/users.xml new file mode 100644 index 00000000000..b0990ca3a60 --- /dev/null +++ b/tests/integration/test_concurrent_ttl_merges/configs/users.xml @@ -0,0 +1,7 @@ + + + + 0 + + + diff --git a/tests/integration/test_concurrent_ttl_merges/test.py b/tests/integration/test_concurrent_ttl_merges/test.py index 07e91dcbc9f..3a3981d65ba 100644 --- a/tests/integration/test_concurrent_ttl_merges/test.py +++ b/tests/integration/test_concurrent_ttl_merges/test.py @@ -7,10 +7,16 @@ from helpers.test_tools import assert_eq_with_retry, TSV cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/fast_background_pool.xml"], with_zookeeper=True + "node1", + main_configs=["configs/fast_background_pool.xml"], + user_configs=["configs/users.xml"], + with_zookeeper=True, ) node2 = cluster.add_instance( - "node2", main_configs=["configs/fast_background_pool.xml"], with_zookeeper=True + "node2", + main_configs=["configs/fast_background_pool.xml"], + user_configs=["configs/users.xml"], + with_zookeeper=True, ) @@ -97,10 +103,6 @@ def test_no_ttl_merges_in_busy_pool(started_cluster): rows_count.append(int(node1.query("SELECT count() FROM test_ttl").strip())) time.sleep(0.5) - # at least several seconds we didn't run any TTL merges and rows count equal - # to the original value - assert sum([1 for count in rows_count if count == 30]) > 4 - assert_eq_with_retry(node1, "SELECT COUNT() FROM test_ttl", "0") node1.query("DROP TABLE test_ttl SYNC") diff --git a/tests/integration/test_storage_nats/__init__.py b/tests/integration/test_config_decryption/__init__.py similarity index 100% rename from tests/integration/test_storage_nats/__init__.py rename to tests/integration/test_config_decryption/__init__.py diff --git a/tests/integration/test_config_decryption/configs/config.xml b/tests/integration/test_config_decryption/configs/config.xml new file mode 100644 index 00000000000..4b0d3a77659 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config.xml @@ -0,0 +1,15 @@ + + + + + 00112233445566778899aabbccddeeff + + + 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + + diff --git a/tests/integration/test_config_decryption/configs/config.yaml b/tests/integration/test_config_decryption/configs/config.yaml new file mode 100644 index 00000000000..1b20b65b652 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config.yaml @@ -0,0 +1,13 @@ +encryption_codecs: + aes_128_gcm_siv: + key_hex: 00112233445566778899aabbccddeeff + aes_256_gcm_siv: + key_hex: 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + +max_table_size_to_drop: + '#text': 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + '@encrypted_by': AES_128_GCM_SIV + +max_partition_size_to_drop: + '@encrypted_by': AES_256_GCM_SIV + '#text': 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 diff --git a/tests/integration/test_config_decryption/configs/config_invalid_chars.xml b/tests/integration/test_config_decryption/configs/config_invalid_chars.xml new file mode 100644 index 00000000000..53345b897dc --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_invalid_chars.xml @@ -0,0 +1,16 @@ + + + + + 00112233445566778899aabbccddeeff + + + 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + + + + + --96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + + diff --git a/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml b/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml new file mode 100644 index 00000000000..830c75f7378 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml @@ -0,0 +1,7 @@ + + + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + + diff --git a/tests/integration/test_config_decryption/configs/config_subnodes.xml b/tests/integration/test_config_decryption/configs/config_subnodes.xml new file mode 100644 index 00000000000..8213270f747 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_subnodes.xml @@ -0,0 +1,14 @@ + + + + + 00112233445566778899aabbccddeeff + + + + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + + + diff --git a/tests/integration/test_config_decryption/configs/config_wrong_method.xml b/tests/integration/test_config_decryption/configs/config_wrong_method.xml new file mode 100644 index 00000000000..b96c13d5105 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_wrong_method.xml @@ -0,0 +1,15 @@ + + + + + 00112233445566778899aabbccddeeff + + + 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + + diff --git a/tests/integration/test_config_decryption/test.py b/tests/integration/test_config_decryption/test.py new file mode 100644 index 00000000000..dd8cdc2e4e1 --- /dev/null +++ b/tests/integration/test_config_decryption/test.py @@ -0,0 +1,40 @@ +import pytest +import os +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance("node1", main_configs=["configs/config.xml"]) +node2 = cluster.add_instance("node2", main_configs=["configs/config.yaml"]) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def check_node(started_cluster, node): + assert ( + node.query( + "select value from system.server_settings where name ='max_table_size_to_drop'" + ) + == "60000000000\n" + ) + assert ( + node.query( + "select value from system.server_settings where name ='max_partition_size_to_drop'" + ) + == "40000000000\n" + ) + + +def test_successful_decryption_xml(started_cluster): + check_node(started_cluster, node1) + + +def test_successful_decryption_yaml(started_cluster): + check_node(started_cluster, node2) diff --git a/tests/integration/test_config_decryption/test_wrong_settings.py b/tests/integration/test_config_decryption/test_wrong_settings.py new file mode 100644 index 00000000000..c6987d12324 --- /dev/null +++ b/tests/integration/test_config_decryption/test_wrong_settings.py @@ -0,0 +1,37 @@ +import pytest +from helpers.cluster import ClickHouseCluster + + +def start_clickhouse(config, err_msg): + cluster = ClickHouseCluster(__file__) + node = cluster.add_instance("node", main_configs=[config]) + caught_exception = "" + try: + cluster.start() + except Exception as e: + caught_exception = str(e) + assert err_msg in caught_exception + + +def test_wrong_method(): + start_clickhouse( + "configs/config_wrong_method.xml", "Unknown encryption method. Got WRONG" + ) + + +def test_invalid_chars(): + start_clickhouse( + "configs/config_invalid_chars.xml", + "Cannot read encrypted text, check for valid characters", + ) + + +def test_no_encryption_key(): + start_clickhouse( + "configs/config_no_encryption_key.xml", + "There is no key 0 in config for AES_128_GCM_SIV encryption codec", + ) + + +def test_subnodes(): + start_clickhouse("configs/config_subnodes.xml", "cannot contain nested elements") diff --git a/tests/integration/test_storage_postgresql_replica/__init__.py b/tests/integration/test_config_hide_in_preprocessed/__init__.py similarity index 100% rename from tests/integration/test_storage_postgresql_replica/__init__.py rename to tests/integration/test_config_hide_in_preprocessed/__init__.py diff --git a/tests/integration/test_config_hide_in_preprocessed/configs/config.xml b/tests/integration/test_config_hide_in_preprocessed/configs/config.xml new file mode 100644 index 00000000000..aac5f572964 --- /dev/null +++ b/tests/integration/test_config_hide_in_preprocessed/configs/config.xml @@ -0,0 +1,12 @@ + + 2000 + 60000000000 + 40000000000 + + + value + value_2 + https://connection.url/ + + + diff --git a/tests/integration/test_config_hide_in_preprocessed/configs/users.xml b/tests/integration/test_config_hide_in_preprocessed/configs/users.xml new file mode 100644 index 00000000000..7f196179f80 --- /dev/null +++ b/tests/integration/test_config_hide_in_preprocessed/configs/users.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_config_hide_in_preprocessed/test.py b/tests/integration/test_config_hide_in_preprocessed/test.py new file mode 100644 index 00000000000..fd237063b18 --- /dev/null +++ b/tests/integration/test_config_hide_in_preprocessed/test.py @@ -0,0 +1,57 @@ +import pytest +import os +from helpers.cluster import ClickHouseCluster + + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", main_configs=["configs/config.xml"], user_configs=["configs/users.xml"] +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_hide_in_preprocessed(started_cluster): + assert ( + node.query( + "select value from system.server_settings where name ='max_thread_pool_free_size'" + ) + == "2000\n" + ) + assert ( + node.query( + "select value from system.server_settings where name ='max_table_size_to_drop'" + ) + == "60000000000\n" + ) + assert ( + node.query( + "select value from system.server_settings where name ='max_partition_size_to_drop'" + ) + == "40000000000\n" + ) + assert "key_1" in node.query("select collection from system.named_collections") + out = node.exec_in_container( + ["cat", "/var/lib/clickhouse/preprocessed_configs/config.xml"] + ) + assert ( + '2000' + not in out + ) + assert ( + '60000000000' + not in out + ) + assert ( + '40000000000' + in out + ) + assert '' not in out diff --git a/tests/integration/test_crash_log/configs/crash_log.xml b/tests/integration/test_crash_log/configs/crash_log.xml new file mode 100644 index 00000000000..f4fbfaba08e --- /dev/null +++ b/tests/integration/test_crash_log/configs/crash_log.xml @@ -0,0 +1,16 @@ + + + 1000000 + 1 + 1 + 1 + true + + + 1000000 + 100 + 100 + 100 + true + + diff --git a/tests/integration/test_crash_log/test.py b/tests/integration/test_crash_log/test.py index 9f6eca794b1..a5b82039a84 100644 --- a/tests/integration/test_crash_log/test.py +++ b/tests/integration/test_crash_log/test.py @@ -12,7 +12,9 @@ SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) def started_node(): cluster = helpers.cluster.ClickHouseCluster(__file__) try: - node = cluster.add_instance("node", stay_alive=True) + node = cluster.add_instance( + "node", main_configs=["configs/crash_log.xml"], stay_alive=True + ) cluster.start() yield node @@ -28,7 +30,7 @@ def send_signal(started_node, signal): def wait_for_clickhouse_stop(started_node): result = None - for attempt in range(60): + for attempt in range(120): time.sleep(1) pid = started_node.get_process_pid("clickhouse") if pid is None: @@ -55,3 +57,18 @@ def test_pkill(started_node): started_node.query("SELECT COUNT(*) FROM system.crash_log") == f"{crashes_count}\n" ) + + +def test_pkill_query_log(started_node): + for signal in ["SEGV", "4"]: + # force create query_log if it was not created + started_node.query("SYSTEM FLUSH LOGS") + started_node.query("TRUNCATE TABLE IF EXISTS system.query_log") + started_node.query("SELECT COUNT(*) FROM system.query_log") + # logs don't flush + assert started_node.query("SELECT COUNT(*) FROM system.query_log") == f"{0}\n" + + send_signal(started_node, signal) + wait_for_clickhouse_stop(started_node) + started_node.restart_clickhouse() + assert started_node.query("SELECT COUNT(*) FROM system.query_log") >= f"3\n" diff --git a/tests/integration/test_default_compression_codec/test.py b/tests/integration/test_default_compression_codec/test.py index c7c30f5eea4..c8b75ea9751 100644 --- a/tests/integration/test_default_compression_codec/test.py +++ b/tests/integration/test_default_compression_codec/test.py @@ -24,6 +24,7 @@ node3 = cluster.add_instance( tag="20.3.16", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node4 = cluster.add_instance("node4") diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py deleted file mode 100644 index 90287e19bd0..00000000000 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py +++ /dev/null @@ -1,99 +0,0 @@ -import os -import math -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/33006 - -pytestmark = pytest.mark.skip - -from .common import * - -from helpers.cluster import ClickHouseCluster -from helpers.dictionary import Field, Row, Dictionary, DictionaryStructure, Layout -from helpers.external_sources import SourceCassandra - -SOURCE = None -cluster = None -node = None -simple_tester = None -complex_tester = None -ranged_tester = None -test_name = "cassandra" - - -def setup_module(module): - global cluster - global node - global simple_tester - global complex_tester - global ranged_tester - - cluster = ClickHouseCluster(__file__) - - SOURCE = SourceCassandra( - "Cassandra", - None, - cluster.cassandra_port, - cluster.cassandra_host, - cluster.cassandra_port, - "", - "", - ) - - simple_tester = SimpleLayoutTester(test_name) - simple_tester.cleanup() - simple_tester.create_dictionaries(SOURCE) - - complex_tester = ComplexLayoutTester(test_name) - complex_tester.create_dictionaries(SOURCE) - - ranged_tester = RangedLayoutTester(test_name) - ranged_tester.create_dictionaries(SOURCE) - # Since that all .xml configs were created - - main_configs = [] - main_configs.append(os.path.join("configs", "disable_ssl_verification.xml")) - - dictionaries = simple_tester.list_dictionaries() - - node = cluster.add_instance( - "cass_node", - main_configs=main_configs, - dictionaries=dictionaries, - with_cassandra=True, - ) - - -def teardown_module(module): - simple_tester.cleanup() - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - - simple_tester.prepare(cluster) - complex_tester.prepare(cluster) - ranged_tester.prepare(cluster) - - yield cluster - - finally: - cluster.shutdown() - - -@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE)) -def test_simple(started_cluster, layout_name): - simple_tester.execute(layout_name, node) - - -@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX)) -def test_complex(started_cluster, layout_name): - complex_tester.execute(layout_name, node) - - -@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED)) -def test_ranged(started_cluster, layout_name): - ranged_tester.execute(layout_name, node) diff --git a/tests/integration/test_dictionaries_complex_key_cache_string/test.py b/tests/integration/test_dictionaries_complex_key_cache_string/test.py index ae9cd4e7c91..bca7122a2a8 100644 --- a/tests/integration/test_dictionaries_complex_key_cache_string/test.py +++ b/tests/integration/test_dictionaries_complex_key_cache_string/test.py @@ -27,71 +27,3 @@ def started_cluster(): yield cluster finally: cluster.shutdown() - - -@pytest.mark.skip(reason="SSD cache test can run on disk only") -@pytest.mark.parametrize("type", ["memory", "ssd"]) -def test_memory_consumption(started_cluster, type): - node = started_cluster.instances[f"node_{type}"] - node.query( - "insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format( - "w" * 8 - ) - ) - node.query( - "insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format( - "x" * 16 - ) - ) - node.query( - "insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format( - "y" * 32 - ) - ) - node.query( - "insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format( - "z" * 64 - ) - ) - - # Fill dictionary - node.query( - "select dictGetString('radars', 'client_id', tuple(toString(number))) from numbers(0, 5000)" - ) - - allocated_first = int( - node.query( - "select bytes_allocated from system.dictionaries where name = 'radars'" - ).strip() - ) - - alloc_array = [] - for i in range(5): - node.query( - "select dictGetString('radars', 'client_id', tuple(toString(number))) from numbers(0, 5000)" - ) - - allocated = int( - node.query( - "select bytes_allocated from system.dictionaries where name = 'radars'" - ).strip() - ) - alloc_array.append(allocated) - - # size doesn't grow - assert all(allocated_first >= a for a in alloc_array) - - for i in range(5): - node.query( - "select dictGetString('radars', 'client_id', tuple(toString(number))) from numbers(0, 5000)" - ) - - allocated = int( - node.query( - "select bytes_allocated from system.dictionaries where name = 'radars'" - ).strip() - ) - alloc_array.append(allocated) - - # size doesn't grow - assert all(allocated_first >= a for a in alloc_array) diff --git a/tests/integration/test_dictionaries_update_and_reload/test.py b/tests/integration/test_dictionaries_update_and_reload/test.py index a973b697d0d..99d08f1b6ea 100644 --- a/tests/integration/test_dictionaries_update_and_reload/test.py +++ b/tests/integration/test_dictionaries_update_and_reload/test.py @@ -281,7 +281,7 @@ def test_reload_after_fail_in_cache_dictionary(started_cluster): query_and_get_error = instance.query_and_get_error # Can't get a value from the cache dictionary because the source (table `test.xypairs`) doesn't respond. - expected_error = "Table test.xypairs doesn't exist" + expected_error = "Table test.xypairs does not exist" update_error = "Could not update cache dictionary cache_xypairs now" assert expected_error in query_and_get_error( "SELECT dictGetUInt64('cache_xypairs', 'y', toUInt64(1))" diff --git a/tests/integration/test_dictionary_allow_read_expired_keys/test_default_reading.py b/tests/integration/test_dictionary_allow_read_expired_keys/test_default_reading.py index 85c45d5df3c..921fb4e4154 100644 --- a/tests/integration/test_dictionary_allow_read_expired_keys/test_default_reading.py +++ b/tests/integration/test_dictionary_allow_read_expired_keys/test_default_reading.py @@ -38,7 +38,6 @@ def started_cluster(): cluster.shutdown() -# @pytest.mark.skip(reason="debugging") def test_default_reading(started_cluster): assert None != dictionary_node.get_process_pid( "clickhouse" diff --git a/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py b/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py index 92d681698bc..6eedf63f95c 100644 --- a/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py +++ b/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py @@ -53,7 +53,6 @@ def started_cluster(): cluster.shutdown() -# @pytest.mark.skip(reason="debugging") def test_return_real_values(started_cluster): assert None != dictionary_node.get_process_pid( "clickhouse" diff --git a/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get.py b/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get.py index 05f638ec337..8e45af44640 100644 --- a/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get.py +++ b/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get.py @@ -38,7 +38,6 @@ def started_cluster(): cluster.shutdown() -# @pytest.mark.skip(reason="debugging") def test_simple_dict_get(started_cluster): assert None != dictionary_node.get_process_pid( "clickhouse" diff --git a/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get_or_default.py b/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get_or_default.py index 1da8fd3325a..8d6242f4711 100644 --- a/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get_or_default.py +++ b/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get_or_default.py @@ -38,7 +38,6 @@ def started_cluster(): cluster.shutdown() -# @pytest.mark.skip(reason="debugging") def test_simple_dict_get_or_default(started_cluster): assert None != dictionary_node.get_process_pid( "clickhouse" diff --git a/tests/integration/test_dictionary_custom_settings/test.py b/tests/integration/test_dictionary_custom_settings/test.py index 715219ceb87..eb394da8bb6 100644 --- a/tests/integration/test_dictionary_custom_settings/test.py +++ b/tests/integration/test_dictionary_custom_settings/test.py @@ -2,6 +2,8 @@ import os import pytest from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + DICTIONARY_FILES = [ "configs/dictionaries/FileSourceConfig.xml", @@ -78,5 +80,11 @@ def test_work(start_cluster): assert caught_exception.find("Limit for result exceeded") != -1 - assert query("SELECT dictGetString('test_http', 'first', toUInt64(1))") == "\\'a\n" - assert query("SELECT dictGetString('test_http', 'second', toUInt64(1))") == '"b\n' + # It is possible that the HTTP server takes long time to start accepting connections + + assert_eq_with_retry( + instance, "SELECT dictGetString('test_http', 'first', toUInt64(1))", "\\'a\n" + ) + assert_eq_with_retry( + instance, "SELECT dictGetString('test_http', 'second', toUInt64(1))", '"b\n' + ) diff --git a/tests/integration/test_disabled_access_control_improvements/test_row_policy.py b/tests/integration/test_disabled_access_control_improvements/test_row_policy.py index 509b4de1a37..b620e88e7eb 100644 --- a/tests/integration/test_disabled_access_control_improvements/test_row_policy.py +++ b/tests/integration/test_disabled_access_control_improvements/test_row_policy.py @@ -76,7 +76,7 @@ def test_introspection(): "mydb", "filtered_table1", "6068883a-0e9d-f802-7e22-0144f8e66d3c", - "users.xml", + "users_xml", "1", 0, 0, @@ -89,7 +89,7 @@ def test_introspection(): "mydb", "filtered_table2", "c019e957-c60b-d54e-cc52-7c90dac5fb01", - "users.xml", + "users_xml", "1", 0, 0, @@ -102,7 +102,7 @@ def test_introspection(): "mydb", "filtered_table3", "4cb080d0-44e8-dbef-6026-346655143628", - "users.xml", + "users_xml", "1", 0, 0, @@ -115,7 +115,7 @@ def test_introspection(): "mydb", "local", "5b23c389-7e18-06bf-a6bc-dd1afbbc0a97", - "users.xml", + "users_xml", "a = 1", 0, 0, @@ -128,7 +128,7 @@ def test_introspection(): "mydb", "filtered_table1", "9e8a8f62-4965-2b5e-8599-57c7b99b3549", - "users.xml", + "users_xml", "a = 1", 0, 0, @@ -141,7 +141,7 @@ def test_introspection(): "mydb", "filtered_table2", "cffae79d-b9bf-a2ef-b798-019c18470b25", - "users.xml", + "users_xml", "a + b < 1 or c - d > 5", 0, 0, @@ -154,7 +154,7 @@ def test_introspection(): "mydb", "filtered_table3", "12fc5cef-e3da-3940-ec79-d8be3911f42b", - "users.xml", + "users_xml", "c = 1", 0, 0, @@ -167,7 +167,7 @@ def test_introspection(): "mydb", "local", "cdacaeb5-1d97-f99d-2bb0-4574f290629c", - "users.xml", + "users_xml", "1", 0, 0, diff --git a/tests/integration/test_disabled_access_control_improvements/test_select_from_system_tables.py b/tests/integration/test_disabled_access_control_improvements/test_select_from_system_tables.py index 5d760c9fc2c..894464fd813 100644 --- a/tests/integration/test_disabled_access_control_improvements/test_select_from_system_tables.py +++ b/tests/integration/test_disabled_access_control_improvements/test_select_from_system_tables.py @@ -43,7 +43,7 @@ def test_system_db(): assert node.query("SELECT count() FROM system.tables WHERE name='table2'") == "1\n" assert node.query("SELECT count()>0 FROM system.settings", user="another") == "1\n" - expected_error = "necessary to have grant SHOW USERS ON *.*" + expected_error = "necessary to have the grant SHOW USERS ON *.*" assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.users", user="another" ) @@ -62,7 +62,7 @@ def test_system_db(): ) assert node.query("SELECT count()>0 FROM system.settings", user="sqluser") == "1\n" - expected_error = "necessary to have grant SHOW USERS ON *.*" + expected_error = "necessary to have the grant SHOW USERS ON *.*" assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.users", user="sqluser" ) diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index d62adfb3343..2edc05c4d7e 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -13,6 +13,7 @@ def cluster(): "node1", main_configs=["configs/storage_conf.xml"], with_nginx=True, + allow_analyzer=False, ) cluster.add_instance( "node2", @@ -20,12 +21,14 @@ def cluster(): with_nginx=True, stay_alive=True, with_zookeeper=True, + allow_analyzer=False, ) cluster.add_instance( "node3", main_configs=["configs/storage_conf_web.xml"], with_nginx=True, with_zookeeper=True, + allow_analyzer=False, ) cluster.add_instance( @@ -36,6 +39,7 @@ def cluster(): with_installed_binary=True, image="clickhouse/clickhouse-server", tag="22.8.14.53", + allow_analyzer=False, ) cluster.start() @@ -143,7 +147,6 @@ def test_usage(cluster, node_name): def test_incorrect_usage(cluster): - node1 = cluster.instances["node1"] node2 = cluster.instances["node3"] global uuids node2.query( @@ -185,7 +188,7 @@ def test_cache(cluster, node_name): (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'cached_web'; """.format( - i, uuids[i], i, i + i, uuids[i] ) ) diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index 2428c53854e..97d5da787cd 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -114,9 +114,9 @@ def test_disks_app_func_cp(started_cluster): "/usr/bin/clickhouse", "disks", "copy", - "--diskFrom", + "--disk-from", "test1", - "--diskTo", + "--disk-to", "test2", ".", ".", diff --git a/tests/integration/test_distributed_backward_compatability/test.py b/tests/integration/test_distributed_backward_compatability/test.py index cb51142d249..c48a7ad1fa1 100644 --- a/tests/integration/test_distributed_backward_compatability/test.py +++ b/tests/integration/test_distributed_backward_compatability/test.py @@ -11,6 +11,7 @@ node_old = cluster.add_instance( tag="20.8.9.6", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node_new = cluster.add_instance( "node2", diff --git a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py index faa38af6533..e66631460f7 100644 --- a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py +++ b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py @@ -68,7 +68,7 @@ def test_distributed_directory_monitor_split_batch_on_failure_OFF(started_cluste settings={ # max_memory_usage is the limit for the batch on the remote node # (local query should not be affected since 30MB is enough for 100K rows) - "max_memory_usage": "30Mi", + "max_memory_usage": "20Mi", "max_untracked_memory": "0", }, ) diff --git a/tests/integration/test_distributed_insert_backward_compatibility/test.py b/tests/integration/test_distributed_insert_backward_compatibility/test.py index ad61a2ad6f5..1e566d5e2da 100644 --- a/tests/integration/test_distributed_insert_backward_compatibility/test.py +++ b/tests/integration/test_distributed_insert_backward_compatibility/test.py @@ -14,6 +14,7 @@ node_dist = cluster.add_instance( tag="21.11.9.1", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 6dd25789f36..1aeaddcf3c5 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -33,6 +33,7 @@ backward = make_instance( # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 tag="23.2.3", with_installed_binary=True, + allow_analyzer=False, ) users = pytest.mark.parametrize( @@ -109,10 +110,6 @@ def start_cluster(): cluster.shutdown() -def query_with_id(node, id_, query, **kwargs): - return node.query("WITH '{}' AS __id {}".format(id_, query), **kwargs) - - # @return -- [user, initial_user] def get_query_user_info(node, query_pattern): node.query("SYSTEM FLUSH LOGS") @@ -333,7 +330,7 @@ def test_secure_disagree_insert(): @users def test_user_insecure_cluster(user, password): id_ = "query-dist_insecure-" + user - query_with_id(n1, id_, "SELECT * FROM dist_insecure", user=user, password=password) + n1.query(f"SELECT *, '{id_}' FROM dist_insecure", user=user, password=password) assert get_query_user_info(n1, id_) == [ user, user, @@ -344,7 +341,7 @@ def test_user_insecure_cluster(user, password): @users def test_user_secure_cluster(user, password): id_ = "query-dist_secure-" + user - query_with_id(n1, id_, "SELECT * FROM dist_secure", user=user, password=password) + n1.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) assert get_query_user_info(n1, id_) == [user, user] assert get_query_user_info(n2, id_) == [user, user] @@ -352,16 +349,14 @@ def test_user_secure_cluster(user, password): @users def test_per_user_inline_settings_insecure_cluster(user, password): id_ = "query-ddl-settings-dist_insecure-" + user - query_with_id( - n1, - id_, - """ - SELECT * FROM dist_insecure - SETTINGS - prefer_localhost_replica=0, - max_memory_usage_for_user=1e9, - max_untracked_memory=0 - """, + n1.query( + f""" + SELECT *, '{id_}' FROM dist_insecure + SETTINGS + prefer_localhost_replica=0, + max_memory_usage_for_user=1e9, + max_untracked_memory=0 + """, user=user, password=password, ) @@ -371,16 +366,14 @@ def test_per_user_inline_settings_insecure_cluster(user, password): @users def test_per_user_inline_settings_secure_cluster(user, password): id_ = "query-ddl-settings-dist_secure-" + user - query_with_id( - n1, - id_, - """ - SELECT * FROM dist_secure - SETTINGS - prefer_localhost_replica=0, - max_memory_usage_for_user=1e9, - max_untracked_memory=0 - """, + n1.query( + f""" + SELECT *, '{id_}' FROM dist_secure + SETTINGS + prefer_localhost_replica=0, + max_memory_usage_for_user=1e9, + max_untracked_memory=0 + """, user=user, password=password, ) @@ -392,10 +385,8 @@ def test_per_user_inline_settings_secure_cluster(user, password): @users def test_per_user_protocol_settings_insecure_cluster(user, password): id_ = "query-protocol-settings-dist_insecure-" + user - query_with_id( - n1, - id_, - "SELECT * FROM dist_insecure", + n1.query( + f"SELECT *, '{id_}' FROM dist_insecure", user=user, password=password, settings={ @@ -410,10 +401,8 @@ def test_per_user_protocol_settings_insecure_cluster(user, password): @users def test_per_user_protocol_settings_secure_cluster(user, password): id_ = "query-protocol-settings-dist_secure-" + user - query_with_id( - n1, - id_, - "SELECT * FROM dist_secure", + n1.query( + f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password, settings={ @@ -430,8 +419,8 @@ def test_per_user_protocol_settings_secure_cluster(user, password): @users def test_user_secure_cluster_with_backward(user, password): id_ = "with-backward-query-dist_secure-" + user - query_with_id( - n1, id_, "SELECT * FROM dist_secure_backward", user=user, password=password + n1.query( + f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password ) assert get_query_user_info(n1, id_) == [user, user] assert get_query_user_info(backward, id_) == [user, user] @@ -440,13 +429,7 @@ def test_user_secure_cluster_with_backward(user, password): @users def test_user_secure_cluster_from_backward(user, password): id_ = "from-backward-query-dist_secure-" + user - query_with_id( - backward, - id_, - "SELECT * FROM dist_secure_backward", - user=user, - password=password, - ) + backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) assert get_query_user_info(n1, id_) == [user, user] assert get_query_user_info(backward, id_) == [user, user] diff --git a/tests/integration/test_drop_is_lock_free/test.py b/tests/integration/test_drop_is_lock_free/test.py index 8d92d784226..61d52a1d9b1 100644 --- a/tests/integration/test_drop_is_lock_free/test.py +++ b/tests/integration/test_drop_is_lock_free/test.py @@ -104,7 +104,7 @@ def test_query_is_lock_free(lock_free_query, exclusive_table): select_handler = node.get_query_request( f""" - SELECT sleepEachRow(3) FROM {exclusive_table}; + SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0; """, query_id=query_id, ) @@ -125,7 +125,7 @@ def test_query_is_lock_free(lock_free_query, exclusive_table): SELECT count() FROM {exclusive_table}; """ ) - assert f"Table default.{exclusive_table} doesn't exist" in result + assert f"Table default.{exclusive_table} does not exist" in result else: assert 0 == int( node.query( @@ -173,7 +173,7 @@ def test_query_is_permanent(transaction, permanent, exclusive_table): select_handler = node.get_query_request( f""" - SELECT sleepEachRow(3) FROM {exclusive_table}; + SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0; """, query_id=query_id, ) diff --git a/tests/integration/test_drop_replica/test.py b/tests/integration/test_drop_replica/test.py index 0941e664982..e0928c6ab08 100644 --- a/tests/integration/test_drop_replica/test.py +++ b/tests/integration/test_drop_replica/test.py @@ -159,11 +159,11 @@ def test_drop_replica(start_cluster): for i in range(1, 5): node_1_1.query("DETACH DATABASE test{}".format(i)) - assert "doesn't exist" in node_1_3.query_and_get_error( + assert "does not exist" in node_1_3.query_and_get_error( "SYSTEM DROP REPLICA 'node_1_1' FROM TABLE test.test_table" ) - assert "doesn't exist" in node_1_3.query_and_get_error( + assert "does not exist" in node_1_3.query_and_get_error( "SYSTEM DROP REPLICA 'node_1_1' FROM DATABASE test1" ) diff --git a/tests/integration/test_executable_user_defined_function/functions/test_function_config.xml b/tests/integration/test_executable_user_defined_function/functions/test_function_config.xml index d35cb173dd0..99efd99ab31 100644 --- a/tests/integration/test_executable_user_defined_function/functions/test_function_config.xml +++ b/tests/integration/test_executable_user_defined_function/functions/test_function_config.xml @@ -322,4 +322,76 @@ input_parameter.py {test_parameter:UInt64} + + executable + test_function_always_error_throw_python + String + + UInt64 + + TabSeparated + input_always_error.py + throw + + + + executable + test_function_always_error_log_python + String + + UInt64 + + TabSeparated + input_always_error.py + log + + + + executable + test_function_always_error_log_first_python + String + + UInt64 + + TabSeparated + input_log_error.py + log_first + + + + executable + test_function_always_error_log_last_python + String + + UInt64 + + TabSeparated + input_log_error.py + log_last + + + + executable + test_function_exit_error_ignore_python + String + + UInt64 + + TabSeparated + input_exit_error.py + 0 + + + + executable + test_function_exit_error_fail_python + String + + UInt64 + + TabSeparated + input_exit_error.py + + + diff --git a/tests/integration/test_executable_user_defined_function/test.py b/tests/integration/test_executable_user_defined_function/test.py index 1f4e14470c3..ccc06a7b43a 100644 --- a/tests/integration/test_executable_user_defined_function/test.py +++ b/tests/integration/test_executable_user_defined_function/test.py @@ -1,6 +1,7 @@ import os import sys import time +import uuid import pytest @@ -285,3 +286,51 @@ def test_executable_function_parameter_python(started_cluster): node.query("SELECT test_function_parameter_python(2)(toUInt64(1))") == "Parameter 2 key 1\n" ) + + +def test_executable_function_always_error_python(started_cluster): + skip_test_msan(node) + try: + node.query("SELECT test_function_always_error_throw_python(1)") + assert False, "Exception have to be thrown" + except Exception as ex: + assert "DB::Exception: Executable generates stderr: Fake error" in str(ex) + + query_id = uuid.uuid4().hex + assert ( + node.query("SELECT test_function_always_error_log_python(1)", query_id=query_id) + == "Key 1\n" + ) + assert node.contains_in_log( + f"{{{query_id}}} TimeoutReadBufferFromFileDescriptor: Executable generates stderr: Fake error" + ) + + query_id = uuid.uuid4().hex + assert ( + node.query( + "SELECT test_function_always_error_log_first_python(1)", query_id=query_id + ) + == "Key 1\n" + ) + assert node.contains_in_log( + f"{{{query_id}}} TimeoutReadBufferFromFileDescriptor: Executable generates stderr at the beginning: {'a' * (3 * 1024)}{'b' * 1024}\n" + ) + + query_id = uuid.uuid4().hex + assert ( + node.query( + "SELECT test_function_always_error_log_last_python(1)", query_id=query_id + ) + == "Key 1\n" + ) + assert node.contains_in_log( + f"{{{query_id}}} TimeoutReadBufferFromFileDescriptor: Executable generates stderr at the end: {'b' * 1024}{'c' * (3 * 1024)}\n" + ) + + assert node.query("SELECT test_function_exit_error_ignore_python(1)") == "Key 1\n" + + try: + node.query("SELECT test_function_exit_error_fail_python(1)") + assert False, "Exception have to be thrown" + except Exception as ex: + assert "DB::Exception: Child process was exited with return code 1" in str(ex) diff --git a/tests/integration/test_executable_user_defined_function/user_scripts/input_always_error.py b/tests/integration/test_executable_user_defined_function/user_scripts/input_always_error.py new file mode 100755 index 00000000000..f50c1235e3c --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/user_scripts/input_always_error.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == "__main__": + print("Fake error", file=sys.stderr) + sys.stderr.flush() + for line in sys.stdin: + print("Key " + line, end="") + sys.stdout.flush() diff --git a/tests/integration/test_executable_user_defined_function/user_scripts/input_exit_error.py b/tests/integration/test_executable_user_defined_function/user_scripts/input_exit_error.py new file mode 100755 index 00000000000..036d69a8297 --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/user_scripts/input_exit_error.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == "__main__": + for line in sys.stdin: + print("Key " + line, end="") + sys.stdout.flush() + + sys.exit(1) diff --git a/tests/integration/test_executable_user_defined_function/user_scripts/input_log_error.py b/tests/integration/test_executable_user_defined_function/user_scripts/input_log_error.py new file mode 100755 index 00000000000..d622f44f75d --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/user_scripts/input_log_error.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == "__main__": + print(f"{'a' * (3 * 1024)}{'b' * (3 * 1024)}{'c' * (3 * 1024)}", file=sys.stderr) + sys.stderr.flush() + for line in sys.stdin: + print("Key " + line, end="") + sys.stdout.flush() diff --git a/tests/integration/test_extreme_deduplication/test.py b/tests/integration/test_extreme_deduplication/test.py index 03451933171..3632369154a 100644 --- a/tests/integration/test_extreme_deduplication/test.py +++ b/tests/integration/test_extreme_deduplication/test.py @@ -71,73 +71,3 @@ def test_deduplication_window_in_seconds(started_cluster): assert TSV.toMat(node.query("SELECT count() FROM simple"))[0][0] == "3" node1.query("""DROP TABLE simple ON CLUSTER test_cluster""") - - -# Currently this test just reproduce incorrect behavior that sould be fixed -@pytest.mark.skip(reason="Flapping test") -def test_deduplication_works_in_case_of_intensive_inserts(started_cluster): - inserters = [] - fetchers = [] - - node1.query( - """ - CREATE TABLE simple ON CLUSTER test_cluster (date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/simple', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id""" - ) - - node1.query("INSERT INTO simple VALUES (0, 0)") - - for node in nodes: - host = node.ip_address - - inserters.append( - CommandRequest( - ["/bin/bash"], - timeout=10, - stdin=""" -set -e -for i in `seq 1000`; do - {} --host {} -q "INSERT INTO simple VALUES (0, 0)" -done -""".format( - cluster.get_client_cmd(), host - ), - ) - ) - - fetchers.append( - CommandRequest( - ["/bin/bash"], - timeout=10, - stdin=""" -set -e -for i in `seq 1000`; do - res=`{} --host {} -q "SELECT count() FROM simple"` - if [[ $? -ne 0 || $res -ne 1 ]]; then - echo "Selected $res elements! Host: {}" 1>&2 - exit -1 - fi; -done -""".format( - cluster.get_client_cmd(), host, node.name - ), - ) - ) - - # There were not errors during INSERTs - for inserter in inserters: - try: - inserter.get_answer() - except QueryTimeoutExceedException: - # Only timeout is accepted - pass - - # There were not errors during SELECTs - for fetcher in fetchers: - try: - fetcher.get_answer() - except QueryTimeoutExceedException: - # Only timeout is accepted - pass - - node1.query("""DROP TABLE simple ON CLUSTER test_cluster""") diff --git a/tests/integration/test_file_schema_inference_cache/test.py b/tests/integration/test_file_schema_inference_cache/test.py index b8f6ac51186..dcbae3f2606 100755 --- a/tests/integration/test_file_schema_inference_cache/test.py +++ b/tests/integration/test_file_schema_inference_cache/test.py @@ -22,50 +22,39 @@ def start_cluster(): cluster.shutdown() -def get_profile_event_for_query(node, query, profile_event): +def check_profile_event_for_query(node, file, profile_event, amount=1): node.query("system flush logs") - query = query.replace("'", "\\'") - return int( - node.query( - f"select ProfileEvents['{profile_event}'] from system.query_log where query='{query}' and type = 'QueryFinish' order by query_start_time_microseconds desc limit 1" + query_pattern = f"file('{file}'".replace("'", "\\'") + assert ( + int( + node.query( + f"select ProfileEvents['{profile_event}'] from system.query_log where query like '%{query_pattern}%' and query not like '%ProfileEvents%' and type = 'QueryFinish' order by query_start_time_microseconds desc limit 1" + ) ) + == amount ) def check_cache_misses(node, file, amount=1): - assert ( - get_profile_event_for_query( - node, f"desc file('{file}')", "SchemaInferenceCacheMisses" - ) - == amount - ) + check_profile_event_for_query(node, file, "SchemaInferenceCacheMisses", amount) def check_cache_hits(node, file, amount=1): - assert ( - get_profile_event_for_query( - node, f"desc file('{file}')", "SchemaInferenceCacheHits" - ) - == amount - ) + check_profile_event_for_query(node, file, "SchemaInferenceCacheHits", amount) def check_cache_invalidations(node, file, amount=1): - assert ( - get_profile_event_for_query( - node, f"desc file('{file}')", "SchemaInferenceCacheInvalidations" - ) - == amount + check_profile_event_for_query( + node, file, "SchemaInferenceCacheInvalidations", amount ) def check_cache_evictions(node, file, amount=1): - assert ( - get_profile_event_for_query( - node, f"desc file('{file}')", "SchemaInferenceCacheEvictions" - ) - == amount - ) + check_profile_event_for_query(node, file, "SchemaInferenceCacheEvictions", amount) + + +def check_cache_num_rows_hits(node, file, amount=1): + check_profile_event_for_query(node, file, "SchemaInferenceCacheNumRowsHits", amount) def check_cache(node, expected_files): @@ -149,3 +138,62 @@ def test(start_cluster): node.query("desc file('data*.jsonl')") check_cache_misses(node, "data*.jsonl", 4) + + node.query("system drop schema cache") + check_cache(node, []) + + node.query("insert into function file('data.csv') select * from numbers(100)") + time.sleep(1) + + res = node.query("select count() from file('data.csv', auto, 'x UInt64')") + assert int(res) == 100 + check_cache(node, ["data.csv"]) + check_cache_misses(node, "data.csv") + + res = node.query("select count() from file('data.csv', auto, 'x UInt64')") + assert int(res) == 100 + check_cache_hits(node, "data.csv") + + node.query( + "insert into function file('data.csv', auto, 'x UInt64') select * from numbers(100)" + ) + time.sleep(1) + + res = node.query("select count() from file('data.csv', auto, 'x UInt64')") + assert int(res) == 200 + check_cache_invalidations(node, "data.csv") + + node.query("insert into function file('data1.csv') select * from numbers(100)") + time.sleep(1) + + res = node.query("select count() from file('data1.csv', auto, 'x UInt64')") + assert int(res) == 100 + check_cache(node, ["data.csv", "data1.csv"]) + check_cache_misses(node, "data1.csv") + + res = node.query("select count() from file('data1.csv', auto, 'x UInt64')") + assert int(res) == 100 + check_cache_hits(node, "data1.csv") + + res = node.query("select count() from file('data*.csv', auto, 'x UInt64')") + assert int(res) == 300 + check_cache_hits(node, "data*.csv", 2) + + node.query("system drop schema cache for file") + check_cache(node, []) + + res = node.query("select count() from file('data*.csv', auto, 'x UInt64')") + assert int(res) == 300 + check_cache_misses(node, "data*.csv", 2) + + node.query("system drop schema cache for file") + check_cache(node, []) + + node.query("insert into function file('data.parquet') select * from numbers(100)") + time.sleep(1) + + res = node.query("select count() from file('data.parquet')") + assert int(res) == 100 + check_cache_misses(node, "data.parquet") + check_cache_hits(node, "data.parquet") + check_cache_num_rows_hits(node, "data.parquet") diff --git a/tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.reference b/tests/integration/test_filesystem_cache/__init__.py similarity index 100% rename from tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.reference rename to tests/integration/test_filesystem_cache/__init__.py diff --git a/tests/integration/test_filesystem_cache/config.d/storage_conf.xml b/tests/integration/test_filesystem_cache/config.d/storage_conf.xml new file mode 100644 index 00000000000..b614815b34f --- /dev/null +++ b/tests/integration/test_filesystem_cache/config.d/storage_conf.xml @@ -0,0 +1,10 @@ + + + + + local_blob_storage + / + + + + diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py new file mode 100644 index 00000000000..be7b12946a7 --- /dev/null +++ b/tests/integration/test_filesystem_cache/test.py @@ -0,0 +1,89 @@ +import logging +import time +import os + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.mock_servers import start_s3_mock, start_mock_servers +from helpers.utility import generate_values, replace_config, SafeThread + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node", + main_configs=[ + "config.d/storage_conf.xml", + ], + stay_alive=True, + ) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +@pytest.mark.parametrize("node_name", ["node"]) +def test_parallel_cache_loading_on_startup(cluster, node_name): + node = cluster.instances[node_name] + node.query( + """ + DROP TABLE IF EXISTS test SYNC; + + CREATE TABLE test (key UInt32, value String) + Engine=MergeTree() + ORDER BY value + SETTINGS disk = disk( + type = cache, + path = 'paralel_loading_test', + disk = 'hdd_blob', + max_file_segment_size = '1Ki', + boundary_alignemt = '1Ki', + max_size = '1Gi', + max_elements = 10000000, + load_metadata_threads = 30); + + SYSTEM DROP FILESYSTEM CACHE; + INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000; + SELECT * FROM test FORMAT Null; + """ + ) + assert int(node.query("SELECT count() FROM system.filesystem_cache")) > 0 + assert int(node.query("SELECT max(size) FROM system.filesystem_cache")) == 1024 + count = int(node.query("SELECT count() FROM test")) + + cache_count = int( + node.query("SELECT count() FROM system.filesystem_cache WHERE size > 0") + ) + cache_state = node.query( + "SELECT key, file_segment_range_begin, size FROM system.filesystem_cache WHERE size > 0 ORDER BY key, file_segment_range_begin, size" + ) + + node.restart_clickhouse() + + assert cache_count == int(node.query("SELECT count() FROM system.filesystem_cache")) + assert cache_state == node.query( + "SELECT key, file_segment_range_begin, size FROM system.filesystem_cache ORDER BY key, file_segment_range_begin, size" + ) + + assert node.contains_in_log("Loading filesystem cache with 30 threads") + assert int(node.query("SELECT count() FROM system.filesystem_cache")) > 0 + assert int(node.query("SELECT max(size) FROM system.filesystem_cache")) == 1024 + assert ( + int( + node.query( + "SELECT value FROM system.events WHERE event = 'FilesystemCacheLoadMetadataMicroseconds'" + ) + ) + > 0 + ) + node.query("SELECT * FROM test FORMAT Null") + assert count == int(node.query("SELECT count() FROM test")) diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index ee5d4b5df93..35f4d81b66a 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -188,7 +188,7 @@ def test_grant_all_on_table(): instance.query("SHOW GRANTS FOR B") == "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER TABLE, ALTER VIEW, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, " "DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, SHOW ROW POLICIES, " - "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, " + "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, " "SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.table TO B\n" ) instance.query("REVOKE ALL ON test.table FROM B", user="A") @@ -449,7 +449,7 @@ def test_introspection(): ] ) - expected_error = "necessary to have grant SHOW USERS" + expected_error = "necessary to have the grant SHOW USERS" assert expected_error in instance.query_and_get_error("SHOW GRANTS FOR B", user="A") expected_access1 = ( @@ -471,7 +471,7 @@ def test_introspection(): [ [ "A", - "local directory", + "local_directory", "no_password", "{}", "['::/0']", @@ -484,7 +484,7 @@ def test_introspection(): ], [ "B", - "local directory", + "local_directory", "no_password", "{}", "['::/0']", @@ -556,7 +556,7 @@ def test_grant_with_replace_option(): ) expected_error = ( - "it's necessary to have grant INSERT ON test.table WITH GRANT OPTION" + "it's necessary to have the grant INSERT ON test.table WITH GRANT OPTION" ) assert expected_error in instance.query_and_get_error( "GRANT INSERT ON test.table TO B WITH REPLACE OPTION", user="A" @@ -568,7 +568,7 @@ def test_grant_with_replace_option(): instance.query("GRANT INSERT ON test.table TO A WITH GRANT OPTION") expected_error = ( - "it's necessary to have grant SELECT ON test.table WITH GRANT OPTION" + "it's necessary to have the grant SELECT ON test.table WITH GRANT OPTION" ) assert expected_error in instance.query_and_get_error( "GRANT INSERT ON test.table TO B WITH REPLACE OPTION", user="A" diff --git a/tests/integration/test_graphite_merge_tree/test.py b/tests/integration/test_graphite_merge_tree/test.py index c4364a03fd9..7c19888df6b 100644 --- a/tests/integration/test_graphite_merge_tree/test.py +++ b/tests/integration/test_graphite_merge_tree/test.py @@ -509,6 +509,6 @@ CREATE TABLE test.graphite_not_created ) # The order of retentions is not guaranteed - assert "age and precision should only grow up: " in str(exc.value) + assert "Age and precision should only grow up: " in str(exc.value) assert "36000:600" in str(exc.value) assert "72000:300" in str(exc.value) diff --git a/tests/integration/test_groupBitmapAnd_on_distributed/test.py b/tests/integration/test_groupBitmapAnd_on_distributed/test.py index 4dbc81236e7..8cf7e0fb2c1 100644 --- a/tests/integration/test_groupBitmapAnd_on_distributed/test.py +++ b/tests/integration/test_groupBitmapAnd_on_distributed/test.py @@ -5,13 +5,22 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node1", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node2 = cluster.add_instance( - "node2", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node2", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node3 = cluster.add_instance( - "node3", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node3", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node4 = cluster.add_instance( "node4", @@ -19,6 +28,7 @@ node4 = cluster.add_instance( image="yandex/clickhouse-server", tag="21.5", with_zookeeper=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py b/tests/integration/test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py index 774b7708587..115e6009801 100644 --- a/tests/integration/test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py +++ b/tests/integration/test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py @@ -5,13 +5,22 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node1", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node2 = cluster.add_instance( - "node2", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node2", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node3 = cluster.add_instance( - "node3", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node3", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node4 = cluster.add_instance( "node4", @@ -20,6 +29,7 @@ node4 = cluster.add_instance( tag="21.6", with_installed_binary=True, with_zookeeper=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_group_array_element_size/__init__.py b/tests/integration/test_group_array_element_size/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml b/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml new file mode 100644 index 00000000000..7a9cda7ccbd --- /dev/null +++ b/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml @@ -0,0 +1,3 @@ + + 10 + diff --git a/tests/integration/test_group_array_element_size/test.py b/tests/integration/test_group_array_element_size/test.py new file mode 100644 index 00000000000..86b1d5feeee --- /dev/null +++ b/tests/integration/test_group_array_element_size/test.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", + main_configs=["configs/group_array_max_element_size.xml"], + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def test_max_exement_size(started_cluster): + node1.query( + "CREATE TABLE tab3 (x AggregateFunction(groupArray, Array(UInt8))) ENGINE = MergeTree ORDER BY tuple()" + ) + node1.query("insert into tab3 select groupArrayState([zero]) from zeros(10)") + assert node1.query("select length(groupArrayMerge(x)) from tab3") == "10\n" + + # First query should always fail + with pytest.raises(Exception, match=r"Too large array size"): + node1.query("insert into tab3 select groupArrayState([zero]) from zeros(11)") + + node1.replace_in_config( + "/etc/clickhouse-server/config.d/group_array_max_element_size.xml", + "10", + "11", + ) + + node1.restart_clickhouse() + + node1.query("insert into tab3 select groupArrayState([zero]) from zeros(11)") + assert node1.query("select length(groupArrayMerge(x)) from tab3") == "21\n" + + node1.replace_in_config( + "/etc/clickhouse-server/config.d/group_array_max_element_size.xml", + "11", + "10", + ) + + node1.restart_clickhouse() + + with pytest.raises(Exception, match=r"Too large array size"): + node1.query("select length(groupArrayMerge(x)) from tab3") + + node1.replace_in_config( + "/etc/clickhouse-server/config.d/group_array_max_element_size.xml", + "10", + "11", + ) + + node1.restart_clickhouse() + + assert node1.query("select length(groupArrayMerge(x)) from tab3") == "21\n" diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index dead4d447ec..efc7d98e820 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -352,9 +352,13 @@ def test_authentication(): def test_logs(): - logs = query_and_get_logs("SELECT 1", settings={"send_logs_level": "debug"}) - assert "SELECT 1" in logs - assert "Read 1 rows" in logs + query = "SELECT has(groupArray(number), 42) FROM numbers(1000000) SETTINGS max_block_size=100000" + logs = query_and_get_logs( + query, + settings={"send_logs_level": "debug"}, + ) + assert query in logs + assert "Read 1000000 rows" in logs assert "Peak memory usage" in logs diff --git a/tests/integration/test_hedged_requests/test.py b/tests/integration/test_hedged_requests/test.py index 18ea3e50619..6f5d5706da4 100644 --- a/tests/integration/test_hedged_requests/test.py +++ b/tests/integration/test_hedged_requests/test.py @@ -203,9 +203,6 @@ def update_configs( def test_stuck_replica(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - update_configs() cluster.pause_container("node_1") @@ -236,16 +233,13 @@ def test_stuck_replica(started_cluster): def test_long_query(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - update_configs() # Restart to reset pool states. NODES["node"].restart_clickhouse() result = NODES["node"].query( - "select hostName(), max(id + sleep(1.5)) from distributed settings max_block_size = 1, max_threads = 1;" + "select hostName(), max(id + sleep(1.5)) from distributed settings max_block_size = 1, max_threads = 1, max_distributed_connections = 1;" ) assert TSV(result) == TSV("node_1\t99") @@ -255,18 +249,12 @@ def test_long_query(started_cluster): def test_send_table_status_sleep(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - update_configs(node_1_sleep_in_send_tables_status=sleep_time) check_query(expected_replica="node_2") check_changing_replica_events(1) def test_send_table_status_sleep2(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - update_configs( node_1_sleep_in_send_tables_status=sleep_time, node_2_sleep_in_send_tables_status=sleep_time, @@ -276,18 +264,12 @@ def test_send_table_status_sleep2(started_cluster): def test_send_data(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - update_configs(node_1_sleep_in_send_data=sleep_time) check_query(expected_replica="node_2") check_changing_replica_events(1) def test_send_data2(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - update_configs( node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_data=sleep_time ) @@ -296,9 +278,6 @@ def test_send_data2(started_cluster): def test_combination1(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - update_configs( node_1_sleep_in_send_tables_status=sleep_time, node_2_sleep_in_send_data=sleep_time, @@ -308,9 +287,6 @@ def test_combination1(started_cluster): def test_combination2(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - update_configs( node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_tables_status=sleep_time, @@ -320,9 +296,6 @@ def test_combination2(started_cluster): def test_combination3(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - update_configs( node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_tables_status=1000, @@ -333,9 +306,6 @@ def test_combination3(started_cluster): def test_combination4(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - update_configs( node_1_sleep_in_send_tables_status=1000, node_1_sleep_in_send_data=sleep_time, @@ -347,9 +317,6 @@ def test_combination4(started_cluster): def test_receive_timeout1(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - # Check the situation when first two replicas get receive timeout # in establishing connection, but the third replica is ok. update_configs( @@ -362,9 +329,6 @@ def test_receive_timeout1(started_cluster): def test_receive_timeout2(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - # Check the situation when first replica get receive timeout # in packet receiving but there are replicas in process of # connection establishing. @@ -378,9 +342,6 @@ def test_receive_timeout2(started_cluster): def test_initial_receive_timeout(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - # Check the situation when replicas don't respond after # receiving query (so, no packets were send to initiator) update_configs( @@ -399,9 +360,6 @@ def test_initial_receive_timeout(started_cluster): def test_async_connect(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - update_configs() NODES["node"].restart_clickhouse() @@ -414,7 +372,7 @@ def test_async_connect(started_cluster): ) NODES["node"].query( - "SELECT hostName(), id FROM distributed_connect ORDER BY id LIMIT 1 SETTINGS prefer_localhost_replica = 0, connect_timeout_with_failover_ms=5000, async_query_sending_for_remote=0, max_threads=1" + "SELECT hostName(), id FROM distributed_connect ORDER BY id LIMIT 1 SETTINGS prefer_localhost_replica = 0, connect_timeout_with_failover_ms=5000, async_query_sending_for_remote=0, max_threads=1, max_distributed_connections=1" ) check_changing_replica_events(2) check_if_query_sending_was_not_suspended() @@ -423,7 +381,7 @@ def test_async_connect(started_cluster): NODES["node"].restart_clickhouse() NODES["node"].query( - "SELECT hostName(), id FROM distributed_connect ORDER BY id LIMIT 1 SETTINGS prefer_localhost_replica = 0, connect_timeout_with_failover_ms=5000, async_query_sending_for_remote=1, max_threads=1" + "SELECT hostName(), id FROM distributed_connect ORDER BY id LIMIT 1 SETTINGS prefer_localhost_replica = 0, connect_timeout_with_failover_ms=5000, async_query_sending_for_remote=1, max_threads=1, max_distributed_connections=1" ) check_changing_replica_events(2) check_if_query_sending_was_suspended() @@ -432,9 +390,6 @@ def test_async_connect(started_cluster): def test_async_query_sending(started_cluster): - if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip("Hedged requests don't work under Thread Sanitizer") - update_configs( node_1_sleep_after_receiving_query=5000, node_2_sleep_after_receiving_query=5000, @@ -459,13 +414,13 @@ def test_async_query_sending(started_cluster): NODES["node"].query( "SELECT hostName(), id FROM distributed_query_sending ORDER BY id LIMIT 1 SETTINGS" - " prefer_localhost_replica = 0, async_query_sending_for_remote=0, max_threads = 1" + " prefer_localhost_replica = 0, async_query_sending_for_remote=0, max_threads = 1, max_distributed_connections=1" ) check_if_query_sending_was_not_suspended() NODES["node"].query( "SELECT hostName(), id FROM distributed_query_sending ORDER BY id LIMIT 1 SETTINGS" - " prefer_localhost_replica = 0, async_query_sending_for_remote=1, max_threads = 1" + " prefer_localhost_replica = 0, async_query_sending_for_remote=1, max_threads = 1, max_distributed_connections=1" ) check_if_query_sending_was_suspended() diff --git a/tests/integration/test_hive_query/configs/config.xml b/tests/integration/test_hive_query/configs/config.xml deleted file mode 100644 index 7de1391e56c..00000000000 --- a/tests/integration/test_hive_query/configs/config.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - localhost - 9000 - - - - - - - true - /tmp/clickhouse_local_cache,/tmp/clickhouse_local_cache1 - 207374182400 - 1048576 - - - - /etc/clickhouse-server/extra_conf.d/hdfs-site.xml - - - - system - metric_log
- 1000 - 1000 -
- -
diff --git a/tests/integration/test_hive_query/configs/hdfs-site.xml b/tests/integration/test_hive_query/configs/hdfs-site.xml deleted file mode 100644 index 82c525ea414..00000000000 --- a/tests/integration/test_hive_query/configs/hdfs-site.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - dfs.replication - 1 - - diff --git a/tests/integration/test_hive_query/data/prepare_hive_data.sh b/tests/integration/test_hive_query/data/prepare_hive_data.sh deleted file mode 100755 index 495ea201870..00000000000 --- a/tests/integration/test_hive_query/data/prepare_hive_data.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -hive -e "create database test" - -hive -e "drop table if exists test.demo; create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; " -hive -e "drop table if exists test.parquet_demo; create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'" -hive -e "drop table if exists test.demo_text; create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text " -hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text" - -hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;" -hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;" - -hive -e "drop table if exists test.test_hive_types; CREATE TABLE test.test_hive_types( f_tinyint tinyint, f_smallint smallint, f_int int, f_integer int, f_bigint bigint, f_float float, f_double double, f_decimal decimal(10,0), f_timestamp timestamp, f_date date, f_string string, f_varchar varchar(100), f_char char(100), f_bool boolean, f_array_int array, f_array_string array, f_array_float array, f_map_int map, f_map_string map, f_map_float map, f_struct struct>) PARTITIONED BY( day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';" - -hive -e "insert into test.test_hive_types partition(day='2022-02-20') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-20 14:47:04', '2022-02-20', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3), 'd', named_struct('x', 10, 'y', 'xyz')); insert into test.test_hive_types partition(day='2022-02-19') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-19 14:47:04', '2022-02-19', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3), 'd', named_struct('x', 11, 'y', 'abc'));" - diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py deleted file mode 100644 index 791ae03f9f6..00000000000 --- a/tests/integration/test_hive_query/test.py +++ /dev/null @@ -1,498 +0,0 @@ -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/43541 - -pytestmark = pytest.mark.skip - -import logging -import os - -import time -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -logging.getLogger().setLevel(logging.INFO) -logging.getLogger().addHandler(logging.StreamHandler()) - -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster = ClickHouseCluster(__file__) - cluster.add_instance( - "h0_0_0", - main_configs=["configs/config.xml"], - extra_configs=["configs/hdfs-site.xml", "data/prepare_hive_data.sh"], - with_hive=True, - ) - - logging.info("Starting cluster ...") - cluster.start() - cluster.copy_file_to_container( - "roottesthivequery_hdfs1_1", - "/ClickHouse/tests/integration/test_hive_query/data/prepare_hive_data.sh", - "/prepare_hive_data.sh", - ) - cluster.exec_in_container( - "roottesthivequery_hdfs1_1", ["bash", "-c", "bash /prepare_hive_data.sh"] - ) - yield cluster - finally: - cluster.shutdown() - - -def test_create_parquet_table(started_cluster): - logging.info("Start testing creating hive table ...") - node = started_cluster.instances["h0_0_0"] - test_passed = False - for i in range(10): - node.query("set input_format_parquet_allow_missing_columns = true") - result = node.query( - """ -DROP TABLE IF EXISTS default.demo_parquet; -CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - logging.info("create result {}".format(result)) - if result.strip() == "": - test_passed = True - break - time.sleep(60) - assert test_passed - - -def test_create_parquet_table_1(started_cluster): - logging.info("Start testing creating hive table ...") - node = started_cluster.instances["h0_0_0"] - for i in range(10): - node.query("set input_format_parquet_allow_missing_columns = true") - result = node.query( - """ -DROP TABLE IF EXISTS default.demo_parquet_parts; -CREATE TABLE default.demo_parquet_parts (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String), `hour` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'parquet_demo') PARTITION BY(day, hour); - """ - ) - logging.info("create result {}".format(result)) - if result.strip() == "": - test_passed = True - break - time.sleep(60) - assert test_passed - - -def test_create_orc_table(started_cluster): - logging.info("Start testing creating hive table ...") - node = started_cluster.instances["h0_0_0"] - test_passed = False - for i in range(10): - result = node.query( - """ - DROP TABLE IF EXISTS default.demo_orc; - CREATE TABLE default.demo_orc (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day) - """ - ) - logging.info("create result {}".format(result)) - if result.strip() == "": - test_passed = True - break - time.sleep(60) - - assert test_passed - - -def test_create_text_table(started_cluster): - logging.info("Start testing creating hive table ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.demo_text; - CREATE TABLE default.demo_text (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_text') PARTITION BY (tuple()) - """ - ) - logging.info("create result {}".format(result)) - - assert result.strip() == "" - - -def test_parquet_groupby(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT day, count(*) FROM default.demo_parquet group by day order by day - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -def test_parquet_in_filter(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT count(*) FROM default.demo_parquet_parts where day = '2021-11-05' and hour in ('00') - """ - ) - expected_result = """2 -""" - logging.info("query result:{}".format(result)) - assert result == expected_result - - -def test_orc_groupby(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.demo_orc; - CREATE TABLE default.demo_orc (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day); - SELECT day, count(*) FROM default.demo_orc group by day order by day - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -@pytest.mark.parametrize( - "table,use_local_cache_for_remote_storage,enable_orc_file_minmax_index,enable_orc_stripe_minmax_index", - [ - pytest.param( - "demo_orc_no_cache_no_index", - "false", - "false", - "false", - id="demo_orc_no_cache_no_index", - ), - pytest.param( - "demo_orc_with_cache_no_index", - "true", - "false", - "false", - id="demo_orc_with_cache_no_index", - ), - pytest.param( - "demo_orc_no_cache_file_index", - "false", - "true", - "false", - id="demo_orc_no_cache_file_index", - ), - pytest.param( - "demo_orc_with_cache_file_index", - "true", - "true", - "false", - id="demo_orc_with_cache_file_index", - ), - pytest.param( - "demo_orc_no_cache_stripe_index", - "false", - "true", - "true", - id="demo_orc_no_cache_stripe_index", - ), - pytest.param( - "demo_orc_with_cache_stripe_index", - "true", - "true", - "true", - id="demo_orc_with_cache_stripe_index", - ), - ], -) -def test_orc_minmax_index( - started_cluster, - table, - use_local_cache_for_remote_storage, - enable_orc_file_minmax_index, - enable_orc_stripe_minmax_index, -): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.{table}; - CREATE TABLE default.{table} (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day) - SETTINGS enable_orc_file_minmax_index = {enable_orc_file_minmax_index}, enable_orc_stripe_minmax_index = {enable_orc_stripe_minmax_index}; - """.format( - table=table, - enable_orc_file_minmax_index=enable_orc_file_minmax_index, - enable_orc_stripe_minmax_index=enable_orc_stripe_minmax_index, - ) - ) - assert result.strip() == "" - - for i in range(2): - result = node.query( - """ - SELECT day, id, score FROM default.{table} where day >= '2021-11-05' and day <= '2021-11-16' and score >= 15 and score <= 30 order by day, id - SETTINGS use_local_cache_for_remote_storage = {use_local_cache_for_remote_storage} - """.format( - table=table, - use_local_cache_for_remote_storage=use_local_cache_for_remote_storage, - ) - ) - - assert ( - result - == """2021-11-05 abd 15 -2021-11-16 aaa 22 -""" - ) - - -@pytest.mark.parametrize( - "table,use_local_cache_for_remote_storage,enable_parquet_rowgroup_minmax_index", - [ - pytest.param( - "demo_parquet_no_cache_no_index", - "false", - "false", - id="demo_parquet_no_cache_no_index", - ), - pytest.param( - "demo_parquet_with_cache_no_index", - "true", - "false", - id="demo_parquet_with_cache_no_index", - ), - pytest.param( - "demo_parquet_no_cache_rowgroup_index", - "false", - "true", - id="demo_parquet_no_cache_rowgroup_index", - ), - pytest.param( - "demo_parquet_with_cache_rowgroup_index", - "true", - "true", - id="demo_parquet_with_cache_rowgroup_index", - ), - ], -) -def test_parquet_minmax_index( - started_cluster, - table, - use_local_cache_for_remote_storage, - enable_parquet_rowgroup_minmax_index, -): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.{table}; - CREATE TABLE default.{table} (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - SETTINGS enable_parquet_rowgroup_minmax_index = {enable_parquet_rowgroup_minmax_index} - """.format( - table=table, - enable_parquet_rowgroup_minmax_index=enable_parquet_rowgroup_minmax_index, - ) - ) - assert result.strip() == "" - - for i in range(2): - result = node.query( - """ - SELECT day, id, score FROM default.{table} where day >= '2021-11-05' and day <= '2021-11-16' and score >= 15 and score <= 30 order by day, id - SETTINGS use_local_cache_for_remote_storage = {use_local_cache_for_remote_storage} - """.format( - table=table, - use_local_cache_for_remote_storage=use_local_cache_for_remote_storage, - ) - ) - - assert ( - result - == """2021-11-05 abd 15 -2021-11-16 aaa 22 -""" - ) - - -def test_hive_columns_prunning(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT count(*) FROM default.demo_parquet_parts where day = '2021-11-05' - """ - ) - expected_result = """4 -""" - logging.info("query result:{}".format(result)) - assert result == expected_result - - -def test_text_count(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.demo_orc; - CREATE TABLE default.demo_orc (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day); - SELECT day, count(*) FROM default.demo_orc group by day order by day SETTINGS format_csv_delimiter = '\x01' - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -def test_parquet_groupby_with_cache(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT day, count(*) FROM default.demo_parquet group by day order by day - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -def test_parquet_groupby_by_hive_function(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT day, count(*) FROM hive('thrift://hivetest:9083', 'test', 'demo', '`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)', 'day') group by day order by day - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -def test_cache_read_bytes(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - CREATE TABLE IF NOT EXISTS default.demo_parquet_1 (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - test_passed = False - for i in range(10): - result = node.query( - """ - SELECT * FROM default.demo_parquet_1 settings input_format_parquet_allow_missing_columns = true - """ - ) - node.query("system flush logs") - result = node.query( - "select sum(ProfileEvent_ExternalDataSourceLocalCacheReadBytes) from system.metric_log where ProfileEvent_ExternalDataSourceLocalCacheReadBytes > 0" - ) - if result.strip() == "0": - logging.info("ProfileEvent_ExternalDataSourceLocalCacheReadBytes == 0") - time.sleep(10) - continue - test_passed = True - break - assert test_passed - - -def test_cache_dir_use(started_cluster): - node = started_cluster.instances["h0_0_0"] - result0 = node.exec_in_container( - ["bash", "-c", "ls /tmp/clickhouse_local_cache | wc -l"] - ) - result1 = node.exec_in_container( - ["bash", "-c", "ls /tmp/clickhouse_local_cache1 | wc -l"] - ) - assert result0 != "0" and result1 != "0" - - -def test_hive_struct_type(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - CREATE TABLE IF NOT EXISTS default.test_hive_types (`f_tinyint` Int8, `f_smallint` Int16, `f_int` Int32, `f_integer` Int32, `f_bigint` Int64, `f_float` Float32, `f_double` Float64, `f_decimal` Float64, `f_timestamp` DateTime, `f_date` Date, `f_string` String, `f_varchar` String, `f_char` String, `f_bool` Boolean, `f_array_int` Array(Int32), `f_array_string` Array(String), `f_array_float` Array(Float32), `f_map_int` Map(String, Int32), `f_map_string` Map(String, String), `f_map_float` Map(String, Float32), `f_struct` Tuple(a String, b Int32, c Float32, d Tuple(x Int32, y String)), `day` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'test_hive_types') PARTITION BY (day) - """ - ) - result = node.query( - """ - SELECT * FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 - """ - ) - expected_result = """1 2 3 4 5 6.11 7.22 8 2022-02-20 14:47:04 2022-02-20 hello world hello world hello world true [1,2,3] ['hello world','hello world'] [1.1,1.2] {'a':100,'b':200,'c':300} {'a':'aa','b':'bb','c':'cc'} {'a':111.1,'b':222.2,'c':333.3} ('aaa',200,333.3,(10,'xyz')) 2022-02-20""" - assert result.strip() == expected_result - - result = node.query( - """ - SELECT day, f_struct.a, f_struct.d.x FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 - """ - ) - expected_result = """2022-02-20 aaa 10""" - - -def test_table_alter_add(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query("DROP TABLE IF EXISTS default.demo_parquet_1") - result = node.query( - """ -CREATE TABLE IF NOT EXISTS default.demo_parquet_1 (`score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - result = node.query( - """ -ALTER TABLE default.demo_parquet_1 ADD COLUMN id Nullable(String) FIRST - """ - ) - result = node.query("""DESC default.demo_parquet_1 FORMAT TSV""") - - expected_result = "id\tNullable(String)\t\t\t\t\t\nscore\tNullable(Int32)\t\t\t\t\t\nday\tNullable(String)" - assert result.strip() == expected_result - - -def test_table_alter_drop(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query("DROP TABLE IF EXISTS default.demo_parquet_1") - result = node.query( - """ -CREATE TABLE IF NOT EXISTS default.demo_parquet_1 (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - result = node.query( - """ -ALTER TABLE default.demo_parquet_1 DROP COLUMN id - """ - ) - - result = node.query("""DESC default.demo_parquet_1 FORMAT TSV""") - expected_result = """score\tNullable(Int32)\t\t\t\t\t\nday\tNullable(String)""" - assert result.strip() == expected_result - - -def test_table_alter_comment(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query("DROP TABLE IF EXISTS default.demo_parquet_1") - result = node.query( - """ -CREATE TABLE IF NOT EXISTS default.demo_parquet_1 (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - - result = node.query( - """ALTER TABLE default.demo_parquet_1 COMMENT COLUMN id 'Text comment'""" - ) - result = node.query("""DESC default.demo_parquet_1 FORMAT TSV""") - expected_result = """id\tNullable(String)\t\t\tText comment\t\t\nscore\tNullable(Int32)\t\t\t\t\t\nday\tNullable(String)""" - assert result.strip() == expected_result diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml b/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml index 7a2141e6c7e..6ca61fb3c44 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml +++ b/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml @@ -1,4 +1,4 @@ - + @@ -8,4 +8,4 @@ default - \ No newline at end of file + \ No newline at end of file diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml b/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml index 58ef55cd3f3..9c27c612f63 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml +++ b/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml @@ -1,5 +1,5 @@ - + :: 0.0.0.0 1 - + diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com index 9beb415c290..6c6e4cbee2e 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com +++ b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com @@ -1 +1 @@ -filled in runtime, but needs to exist in order to be volume mapped in docker \ No newline at end of file +filled in runtime, but needs to exist in order to be volume mapped in docker diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml index 7a2141e6c7e..9329c8dbde2 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml @@ -1,4 +1,4 @@ - + @@ -8,4 +8,4 @@ default - \ No newline at end of file + diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml index 58ef55cd3f3..9c27c612f63 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml @@ -1,5 +1,5 @@ - + :: 0.0.0.0 1 - + diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py index fe69d72c1c7..70419f95dd3 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py @@ -9,7 +9,7 @@ server_ip = sys.argv[2] mutex = threading.Lock() success_counter = 0 number_of_threads = 100 -number_of_iterations = 100 +number_of_iterations = 50 def perform_request(): diff --git a/tests/integration/test_insert_into_distributed/test.py b/tests/integration/test_insert_into_distributed/test.py index 3bee2149387..09a14c11727 100644 --- a/tests/integration/test_insert_into_distributed/test.py +++ b/tests/integration/test_insert_into_distributed/test.py @@ -246,7 +246,7 @@ def test_inserts_local(started_cluster): def test_inserts_single_replica_local_internal_replication(started_cluster): with pytest.raises( - QueryRuntimeException, match="Table default.single_replicated doesn't exist" + QueryRuntimeException, match="Table default.single_replicated does not exist" ): node1.query( "INSERT INTO distributed_one_replica_internal_replication VALUES ('2000-01-01', 1)", @@ -279,7 +279,8 @@ def test_inserts_single_replica_internal_replication(started_cluster): def test_inserts_single_replica_no_internal_replication(started_cluster): try: with pytest.raises( - QueryRuntimeException, match="Table default.single_replicated doesn't exist" + QueryRuntimeException, + match="Table default.single_replicated does not exist", ): node1.query( "INSERT INTO distributed_one_replica_no_internal_replication VALUES ('2000-01-01', 1)", diff --git a/tests/integration/test_insert_into_distributed_through_materialized_view/test.py b/tests/integration/test_insert_into_distributed_through_materialized_view/test.py index a5f92002450..b1eb0df2d43 100644 --- a/tests/integration/test_insert_into_distributed_through_materialized_view/test.py +++ b/tests/integration/test_insert_into_distributed_through_materialized_view/test.py @@ -108,74 +108,6 @@ def test_reconnect(started_cluster): assert remote.query("SELECT count(*) FROM local1").strip() == "3" -@pytest.mark.skip(reason="Flapping test") -def test_inserts_batching(started_cluster): - instance = instance_test_inserts_batching - - with PartitionManager() as pm: - pm.partition_instances(instance, remote) - - instance.query("INSERT INTO local2_source(d, x) VALUES ('2000-01-01', 1)") - # Sleep a bit so that this INSERT forms a batch of its own. - time.sleep(0.2) - - instance.query("INSERT INTO local2_source(x, d) VALUES (2, '2000-01-01')") - - for i in range(3, 7): - instance.query( - "INSERT INTO local2_source(d, x) VALUES ('2000-01-01', {})".format(i) - ) - - for i in range(7, 9): - instance.query( - "INSERT INTO local2_source(x, d) VALUES ({}, '2000-01-01')".format(i) - ) - - instance.query("INSERT INTO local2_source(d, x) VALUES ('2000-01-01', 9)") - - # After ALTER the structure of the saved blocks will be different - instance.query("DROP TABLE local2_view") - instance.query("ALTER TABLE distributed ADD COLUMN s String") - - # Memory Engine doesn't support ALTER so we just DROP/CREATE everything - instance.query("DROP TABLE local2_source") - instance.query( - "CREATE TABLE local2_source (d Date, x UInt32, s String) ENGINE = Memory" - ) - instance.query( - "CREATE MATERIALIZED VIEW local2_view to distributed AS SELECT d,x,s FROM local2_source" - ) - - for i in range(10, 13): - instance.query( - "INSERT INTO local2_source(d, x) VALUES ('2000-01-01', {})".format(i) - ) - - time.sleep(1.0) - - result = remote.query( - "SELECT _part, groupArray(x) FROM local2 GROUP BY _part ORDER BY _part" - ) - - # Explanation: as merges are turned off on remote instance, active parts in local2 table correspond 1-to-1 - # to inserted blocks. - # Batches of max 3 rows are formed as min_insert_block_size_rows = 3. - # Blocks: - # 1. Failed batch that is retried with the same contents. - # 2. Full batch of inserts regardless of the order of columns thanks to the view. - # 3. Full batch of inserts regardless order of columns thanks to the view. - # 4. Full batch of inserts after ALTER (that have different block structure). - # 5. What was left to insert before ALTER. - expected = """\ -20000101_20000101_1_1_0 [1] -20000101_20000101_2_2_0 [2,3,4] -20000101_20000101_3_3_0 [5,6,7] -20000101_20000101_4_4_0 [10,11,12] -20000101_20000101_5_5_0 [8,9] -""" - assert TSV(result) == TSV(expected) - - def test_inserts_local(started_cluster): instance = instance_test_inserts_local_cluster instance.query("INSERT INTO local_source VALUES ('2000-01-01', 1)") diff --git a/tests/integration/test_kafka_bad_messages/test.py b/tests/integration/test_kafka_bad_messages/test.py index da3cf36c853..1633f230f83 100644 --- a/tests/integration/test_kafka_bad_messages/test.py +++ b/tests/integration/test_kafka_bad_messages/test.py @@ -90,7 +90,9 @@ def producer_serializer(x): return x.encode() if isinstance(x, str) else x -def kafka_produce(kafka_cluster, topic, messages, timestamp=None, retries=15): +def kafka_produce( + kafka_cluster, topic, messages, timestamp=None, retries=15, partition=None +): logging.debug( "kafka_produce server:{}:{} topic:{}".format( "localhost", kafka_cluster.kafka_port, topic @@ -100,7 +102,9 @@ def kafka_produce(kafka_cluster, topic, messages, timestamp=None, retries=15): kafka_cluster.kafka_port, producer_serializer, retries ) for message in messages: - producer.send(topic=topic, value=message, timestamp_ms=timestamp) + producer.send( + topic=topic, value=message, timestamp_ms=timestamp, partition=partition + ) producer.flush() @@ -115,7 +119,7 @@ def kafka_cluster(): cluster.shutdown() -def test_bad_messages_parsing(kafka_cluster): +def test_bad_messages_parsing_stream(kafka_cluster): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -244,7 +248,7 @@ struct Message f""" DROP TABLE IF EXISTS view; DROP TABLE IF EXISTS kafka; - + CREATE TABLE kafka (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', @@ -253,9 +257,9 @@ struct Message kafka_format = 'CapnProto', kafka_handle_error_mode='stream', kafka_schema='schema_test_errors:Message'; - + CREATE MATERIALIZED VIEW view Engine=Log AS - SELECT _error FROM kafka WHERE length(_error) != 0 ; + SELECT _error FROM kafka WHERE length(_error) != 0; """ ) @@ -279,6 +283,112 @@ struct Message kafka_delete_topic(admin_client, "CapnProto_err") +def test_bad_messages_parsing_exception(kafka_cluster, max_retries=20): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + for format_name in [ + "Avro", + "JSONEachRow", + ]: + print(format_name) + + kafka_create_topic(admin_client, f"{format_name}_err") + + instance.query( + f""" + DROP TABLE IF EXISTS view_{format_name}; + DROP TABLE IF EXISTS kafka_{format_name}; + DROP TABLE IF EXISTS kafka; + + CREATE TABLE kafka_{format_name} (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{format_name}_err', + kafka_group_name = '{format_name}', + kafka_format = '{format_name}', + kafka_num_consumers = 1; + + CREATE MATERIALIZED VIEW view_{format_name} Engine=Log AS + SELECT * FROM kafka_{format_name}; + """ + ) + + kafka_produce( + kafka_cluster, f"{format_name}_err", ["qwertyuiop", "asdfghjkl", "zxcvbnm"] + ) + + expected_result = """avro::Exception: Invalid data file. Magic does not match: : while parsing Kafka message (topic: Avro_err, partition: 0, offset: 0)\\'|1|1|1|default|kafka_Avro +Cannot parse input: expected \\'{\\' before: \\'qwertyuiop\\': while parsing Kafka message (topic: JSONEachRow_err, partition: 0, offset: 0|1|1|1|default|kafka_JSONEachRow +""" + # filter out stacktrace in exceptions.text[1] because it is hardly stable enough + result_system_kafka_consumers = instance.query_with_retry( + """ + SELECT substr(exceptions.text[1], 1, 131), length(exceptions.text) > 1 AND length(exceptions.text) < 15, length(exceptions.time) > 1 AND length(exceptions.time) < 15, abs(dateDiff('second', exceptions.time[1], now())) < 40, database, table FROM system.kafka_consumers WHERE table in('kafka_Avro', 'kafka_JSONEachRow') ORDER BY table, assignments.partition_id[1] + """, + retry_count=max_retries, + sleep_time=1, + check_callback=lambda res: res.replace("\t", "|") == expected_result, + ) + + assert result_system_kafka_consumers.replace("\t", "|") == expected_result + + for format_name in [ + "Avro", + "JSONEachRow", + ]: + kafka_delete_topic(admin_client, f"{format_name}_err") + + +def test_bad_messages_to_mv(kafka_cluster, max_retries=20): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + kafka_create_topic(admin_client, "tomv") + + instance.query( + f""" + DROP TABLE IF EXISTS kafka_materialized; + DROP TABLE IF EXISTS kafka_consumer; + DROP TABLE IF EXISTS kafka1; + + CREATE TABLE kafka1 (key UInt64, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'tomv', + kafka_group_name = 'tomv', + kafka_format = 'JSONEachRow', + kafka_num_consumers = 1; + + CREATE TABLE kafka_materialized(`key` UInt64, `value` UInt64) ENGINE = Log; + + CREATE MATERIALIZED VIEW kafka_consumer TO kafka_materialized + (`key` UInt64, `value` UInt64) AS + SELECT key, CAST(value, 'UInt64') AS value + FROM kafka1; + """ + ) + + kafka_produce(kafka_cluster, "tomv", ['{"key":10, "value":"aaa"}']) + + expected_result = """Code: 6. DB::Exception: Cannot parse string \\'aaa\\' as UInt64: syntax error at begin of string. Note: there are toUInt64OrZero and to|1|1|1|default|kafka1 +""" + result_system_kafka_consumers = instance.query_with_retry( + """ + SELECT substr(exceptions.text[1], 1, 131), length(exceptions.text) > 1 AND length(exceptions.text) < 15, length(exceptions.time) > 1 AND length(exceptions.time) < 15, abs(dateDiff('second', exceptions.time[1], now())) < 40, database, table FROM system.kafka_consumers WHERE table='kafka1' ORDER BY table, assignments.partition_id[1] + """, + retry_count=max_retries, + sleep_time=1, + check_callback=lambda res: res.replace("\t", "|") == expected_result, + ) + + assert result_system_kafka_consumers.replace("\t", "|") == expected_result + + kafka_delete_topic(admin_client, "tomv") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_keeper_client/test.py b/tests/integration/test_keeper_client/test.py index 00c7908eeed..c82917372b8 100644 --- a/tests/integration/test_keeper_client/test.py +++ b/tests/integration/test_keeper_client/test.py @@ -1,6 +1,7 @@ import pytest -from helpers.client import CommandRequest from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +from helpers.keeper_utils import KeeperClient cluster = ClickHouseCluster(__file__) @@ -13,7 +14,7 @@ node = cluster.add_instance( ) -@pytest.fixture(scope="module") +@pytest.fixture(scope="module", autouse=True) def started_cluster(): try: cluster.start() @@ -23,41 +24,108 @@ def started_cluster(): cluster.shutdown() -def test_base_commands(started_cluster): - _ = started_cluster +@pytest.fixture(scope="function") +def client(started_cluster): + with KeeperClient.from_cluster(cluster, "zoo1") as keeper_client: + yield keeper_client - command = CommandRequest( + +def test_big_family(client: KeeperClient): + client.touch("/test_big_family") + client.touch("/test_big_family/1") + client.touch("/test_big_family/1/1") + client.touch("/test_big_family/1/2") + client.touch("/test_big_family/1/3") + client.touch("/test_big_family/1/4") + client.touch("/test_big_family/1/5") + client.touch("/test_big_family/2") + client.touch("/test_big_family/2/1") + client.touch("/test_big_family/2/2") + client.touch("/test_big_family/2/3") + + response = client.find_big_family("/test_big_family") + + assert response == TSV( [ - started_cluster.server_bin_path, - "keeper-client", - "--host", - str(cluster.get_instance_ip("zoo1")), - "--port", - str(cluster.zookeeper_port), - "-q", - "create test_create_zk_node1 testvalue1;create test_create_zk_node_2 testvalue2;get test_create_zk_node1;", - ], - stdin="", + ["/test_big_family/1", "5"], + ["/test_big_family/2", "3"], + ["/test_big_family/2/3", "0"], + ["/test_big_family/2/2", "0"], + ["/test_big_family/2/1", "0"], + ["/test_big_family/1/5", "0"], + ["/test_big_family/1/4", "0"], + ["/test_big_family/1/3", "0"], + ["/test_big_family/1/2", "0"], + ["/test_big_family/1/1", "0"], + ] ) - assert command.get_answer() == "testvalue1\n" + response = client.find_big_family("/test_big_family", 1) - -def test_four_letter_word_commands(started_cluster): - _ = started_cluster - - command = CommandRequest( + assert response == TSV( [ - started_cluster.server_bin_path, - "keeper-client", - "--host", - str(cluster.get_instance_ip("zoo1")), - "--port", - str(cluster.zookeeper_port), - "-q", - "ruok", - ], - stdin="", + ["/test_big_family/1", "5"], + ] ) - assert command.get_answer() == "imok\n" + +def test_find_super_nodes(client: KeeperClient): + client.touch("/test_find_super_nodes") + client.touch("/test_find_super_nodes/1") + client.touch("/test_find_super_nodes/1/1") + client.touch("/test_find_super_nodes/1/2") + client.touch("/test_find_super_nodes/1/3") + client.touch("/test_find_super_nodes/1/4") + client.touch("/test_find_super_nodes/1/5") + client.touch("/test_find_super_nodes/2") + client.touch("/test_find_super_nodes/2/1") + client.touch("/test_find_super_nodes/2/2") + client.touch("/test_find_super_nodes/2/3") + client.touch("/test_find_super_nodes/2/4") + + client.cd("/test_find_super_nodes") + + response = client.find_super_nodes(4) + assert response == TSV( + [ + ["/test_find_super_nodes/1", "5"], + ["/test_find_super_nodes/2", "4"], + ] + ) + + +def test_delete_stale_backups(client: KeeperClient): + client.touch("/clickhouse") + client.touch("/clickhouse/backups") + client.touch("/clickhouse/backups/1") + client.touch("/clickhouse/backups/1/stage") + client.touch("/clickhouse/backups/1/stage/alive123") + client.touch("/clickhouse/backups/2") + client.touch("/clickhouse/backups/2/stage") + client.touch("/clickhouse/backups/2/stage/dead123") + + response = client.delete_stale_backups() + + assert response == ( + 'Found backup "/clickhouse/backups/1", checking if it\'s active\n' + 'Backup "/clickhouse/backups/1" is active, not going to delete\n' + 'Found backup "/clickhouse/backups/2", checking if it\'s active\n' + 'Backup "/clickhouse/backups/2" is not active, deleting it' + ) + + assert client.ls("/clickhouse/backups") == ["1"] + + +def test_base_commands(client: KeeperClient): + client.create("/test_create_zk_node1", "testvalue1") + client.create("/test_create_zk_node_2", "testvalue2") + assert client.get("/test_create_zk_node1") == "testvalue1" + + client.create("/123", "1=2") + client.create("/123/321", "'foo;bar'") + assert client.get("/123") == "1=2" + assert client.get("/123/321") == "foo;bar" + + +def test_four_letter_word_commands(client: KeeperClient): + assert client.execute_query("ruok") == "imok" diff --git a/tests/integration/test_keeper_disks/configs/enable_keeper_snapshot.xml b/tests/integration/test_keeper_disks/configs/enable_keeper_snapshot.xml new file mode 100644 index 00000000000..7bda2af9a50 --- /dev/null +++ b/tests/integration/test_keeper_disks/configs/enable_keeper_snapshot.xml @@ -0,0 +1,57 @@ + + + + + hdfs + hdfs://hdfs1:9000/ + + + local + /var/lib/clickhouse/coordination/logs/ + + + s3_plain + http://minio1:9001/root/logs/ + minio + minio123 + + + local + /var/lib/clickhouse/coordination/snapshots/ + + + s3_plain + http://minio1:9001/root/snapshots/ + minio + minio123 + + + + + + false + 9181 + 1 + false + + + 5000 + 10000 + trace + 10000 + 10 + 1 + 3 + + + + + + + 1 + node + 9234 + + + + \ No newline at end of file diff --git a/tests/integration/test_keeper_disks/test.py b/tests/integration/test_keeper_disks/test.py index 86682bcde01..e41837b89b4 100644 --- a/tests/integration/test_keeper_disks/test.py +++ b/tests/integration/test_keeper_disks/test.py @@ -8,14 +8,22 @@ import os CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", +node_logs = cluster.add_instance( + "node_logs", main_configs=["configs/enable_keeper.xml"], stay_alive=True, with_minio=True, with_hdfs=True, ) +node_snapshot = cluster.add_instance( + "node_snapshot", + main_configs=["configs/enable_keeper_snapshot.xml"], + stay_alive=True, + with_minio=True, + with_hdfs=True, +) + from kazoo.client import KazooClient, KazooState @@ -46,7 +54,7 @@ def stop_zk(zk): pass -def stop_clickhouse(cluster, cleanup_disks): +def stop_clickhouse(cluster, node, cleanup_disks): node.stop_clickhouse() if not cleanup_disks: @@ -72,8 +80,8 @@ def stop_clickhouse(cluster, cleanup_disks): ) -def setup_storage(cluster, storage_config, cleanup_disks): - stop_clickhouse(cluster, cleanup_disks) +def setup_storage(cluster, node, storage_config, cleanup_disks): + stop_clickhouse(cluster, node, cleanup_disks) node.copy_file_to_container( os.path.join(CURRENT_TEST_DIR, "configs/enable_keeper.xml"), "/etc/clickhouse-server/config.d/enable_keeper.xml", @@ -87,9 +95,10 @@ def setup_storage(cluster, storage_config, cleanup_disks): keeper_utils.wait_until_connected(cluster, node) -def setup_local_storage(cluster): +def setup_local_storage(cluster, node): setup_storage( cluster, + node, "log_local<\\/log_storage_disk>" "snapshot_local<\\/snapshot_storage_disk>", cleanup_disks=True, @@ -107,30 +116,30 @@ def list_s3_objects(cluster, prefix=""): ] -def get_local_files(path): +def get_local_files(path, node): files = node.exec_in_container(["ls", path]).strip().split("\n") files.sort() return files -def get_local_logs(): - return get_local_files("/var/lib/clickhouse/coordination/logs") +def get_local_logs(node): + return get_local_files("/var/lib/clickhouse/coordination/logs", node) -def get_local_snapshots(): - return get_local_files("/var/lib/clickhouse/coordination/snapshots") +def get_local_snapshots(node): + return get_local_files("/var/lib/clickhouse/coordination/snapshots", node) def test_supported_disk_types(started_cluster): - node.stop_clickhouse() - node.start_clickhouse() - node.contains_in_log("Disk type 'hdfs' is not supported for Keeper") + node_logs.stop_clickhouse() + node_logs.start_clickhouse() + node_logs.contains_in_log("Disk type 'hdfs' is not supported for Keeper") def test_logs_with_disks(started_cluster): - setup_local_storage(started_cluster) + setup_local_storage(started_cluster, node_logs) - node_zk = get_fake_zk("node") + node_zk = get_fake_zk("node_logs") try: node_zk.create("/test") for _ in range(30): @@ -138,10 +147,11 @@ def test_logs_with_disks(started_cluster): stop_zk(node_zk) - previous_log_files = get_local_logs() + previous_log_files = get_local_logs(node_logs) setup_storage( started_cluster, + node_logs, "log_s3_plain<\\/log_storage_disk>" "log_local<\\/latest_log_storage_disk>" "snapshot_local<\\/snapshot_storage_disk>", @@ -151,13 +161,13 @@ def test_logs_with_disks(started_cluster): # all but the latest log should be on S3 s3_log_files = list_s3_objects(started_cluster, "logs/") assert set(s3_log_files) == set(previous_log_files[:-1]) - local_log_files = get_local_logs() + local_log_files = get_local_logs(node_logs) assert len(local_log_files) == 1 assert local_log_files[0] == previous_log_files[-1] previous_log_files = s3_log_files + local_log_files - node_zk = get_fake_zk("node") + node_zk = get_fake_zk("node_logs") for _ in range(30): node_zk.create("/test/somenode", b"somedata", sequence=True) @@ -165,7 +175,7 @@ def test_logs_with_disks(started_cluster): stop_zk(node_zk) log_files = list_s3_objects(started_cluster, "logs/") - local_log_files = get_local_logs() + local_log_files = get_local_logs(node_logs) assert len(local_log_files) == 1 log_files.extend(local_log_files) @@ -175,16 +185,17 @@ def test_logs_with_disks(started_cluster): setup_storage( started_cluster, + node_logs, "log_s3_plain<\\/old_log_storage_disk>" "log_local<\\/log_storage_disk>" "snapshot_local<\\/snapshot_storage_disk>", cleanup_disks=False, ) - local_log_files = get_local_logs() + local_log_files = get_local_logs(node_logs) assert set(local_log_files) == set(previous_log_files) - node_zk = get_fake_zk("node") + node_zk = get_fake_zk("node_logs") for child in node_zk.get_children("/test"): assert node_zk.get(f"/test/{child}")[0] == b"somedata" @@ -194,9 +205,9 @@ def test_logs_with_disks(started_cluster): def test_snapshots_with_disks(started_cluster): - setup_local_storage(started_cluster) + setup_local_storage(started_cluster, node_snapshot) - node_zk = get_fake_zk("node") + node_zk = get_fake_zk("node_snapshot") try: node_zk.create("/test2") for _ in range(30): @@ -204,15 +215,16 @@ def test_snapshots_with_disks(started_cluster): stop_zk(node_zk) - snapshot_idx = keeper_utils.send_4lw_cmd(cluster, node, "csnp") - node.wait_for_log_line( + snapshot_idx = keeper_utils.send_4lw_cmd(cluster, node_snapshot, "csnp") + node_snapshot.wait_for_log_line( f"Created persistent snapshot {snapshot_idx}", look_behind_lines=1000 ) - previous_snapshot_files = get_local_snapshots() + previous_snapshot_files = get_local_snapshots(node_snapshot) setup_storage( started_cluster, + node_snapshot, "snapshot_s3_plain<\\/snapshot_storage_disk>" "snapshot_local<\\/latest_snapshot_storage_disk>" "log_local<\\/log_storage_disk>", @@ -222,26 +234,26 @@ def test_snapshots_with_disks(started_cluster): ## all but the latest log should be on S3 s3_snapshot_files = list_s3_objects(started_cluster, "snapshots/") assert set(s3_snapshot_files) == set(previous_snapshot_files[:-1]) - local_snapshot_files = get_local_snapshots() + local_snapshot_files = get_local_snapshots(node_snapshot) assert len(local_snapshot_files) == 1 assert local_snapshot_files[0] == previous_snapshot_files[-1] previous_snapshot_files = s3_snapshot_files + local_snapshot_files - node_zk = get_fake_zk("node") + node_zk = get_fake_zk("node_snapshot") for _ in range(30): node_zk.create("/test2/somenode", b"somedata", sequence=True) stop_zk(node_zk) - snapshot_idx = keeper_utils.send_4lw_cmd(cluster, node, "csnp") - node.wait_for_log_line( + snapshot_idx = keeper_utils.send_4lw_cmd(cluster, node_snapshot, "csnp") + node_snapshot.wait_for_log_line( f"Created persistent snapshot {snapshot_idx}", look_behind_lines=1000 ) snapshot_files = list_s3_objects(started_cluster, "snapshots/") - local_snapshot_files = get_local_snapshots() + local_snapshot_files = get_local_snapshots(node_snapshot) assert len(local_snapshot_files) == 1 snapshot_files.extend(local_snapshot_files) @@ -250,16 +262,17 @@ def test_snapshots_with_disks(started_cluster): setup_storage( started_cluster, + node_snapshot, "snapshot_s3_plain<\\/old_snapshot_storage_disk>" "snapshot_local<\\/snapshot_storage_disk>" "log_local<\\/log_storage_disk>", cleanup_disks=False, ) - local_snapshot_files = get_local_snapshots() + local_snapshot_files = get_local_snapshots(node_snapshot) assert set(local_snapshot_files) == set(previous_snapshot_files) - node_zk = get_fake_zk("node") + node_zk = get_fake_zk("node_snapshot") for child in node_zk.get_children("/test2"): assert node_zk.get(f"/test2/{child}")[0] == b"somedata" diff --git a/tests/integration/test_keeper_reconfig_add/test.py b/tests/integration/test_keeper_reconfig_add/test.py index 2c2da7403a1..724bfdef492 100644 --- a/tests/integration/test_keeper_reconfig_add/test.py +++ b/tests/integration/test_keeper_reconfig_add/test.py @@ -1,11 +1,10 @@ #!/usr/bin/env python3 import pytest -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, ClickHouseInstance import helpers.keeper_utils as ku import os -from kazoo.client import KazooClient -from kazoo.exceptions import BadArgumentsException +import typing as tp cluster = ClickHouseCluster(__file__) CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") @@ -19,11 +18,7 @@ part_of_cluster = "now this node is the part of cluster" zk1, zk2, zk3 = None, None, None -def get_fake_zk(node): - return ku.get_fake_zk(cluster, node) - - -@pytest.fixture(scope="module") +@pytest.fixture(scope="module", autouse=True) def started_cluster(): try: cluster.start() @@ -43,21 +38,28 @@ def started_cluster(): yield cluster finally: + conn: tp.Optional[ku.KeeperClient] for conn in [zk1, zk2, zk3]: - if conn: + if conn is not None: conn.stop() - conn.close() cluster.shutdown() -def test_reconfig_add(started_cluster): +def create_client(node: ClickHouseInstance): + return ku.KeeperClient( + cluster.server_bin_path, cluster.get_instance_ip(node.name), 9181 + ) + + +def test_reconfig_add(): """ Add a node to another node. Then add another node to two. """ + global zk1, zk2, zk3 + zk1 = create_client(node1) - zk1 = get_fake_zk(node1) - config = ku.get_config_str(zk1) + config = zk1.get("/keeper/config") print("Initial config", config) assert len(config.split("\n")) == 1 @@ -65,24 +67,20 @@ def test_reconfig_add(started_cluster): assert "node2" not in config assert "node3" not in config - with pytest.raises(BadArgumentsException): + with pytest.raises(ku.KeeperException): # duplicate id with different endpoint zk1.reconfig(joining="server.1=localhost:1337", leaving=None, new_members=None) - with pytest.raises(BadArgumentsException): + with pytest.raises(ku.KeeperException): # duplicate endpoint zk1.reconfig(joining="server.8=node1:9234", leaving=None, new_members=None) for i in range(100): - zk1.create(f"/test_three_{i}", b"somedata") + zk1.create(f"/test_three_{i}", "somedata") node2.start_clickhouse() - config, _ = zk1.reconfig( - joining="server.2=node2:9234", leaving=None, new_members=None - ) + config = zk1.reconfig(joining="server.2=node2:9234", leaving=None, new_members=None) ku.wait_until_connected(cluster, node2) - - config = config.decode("utf-8") print("After adding 2", config) assert len(config.split("\n")) == 2 @@ -90,12 +88,12 @@ def test_reconfig_add(started_cluster): assert "node2" in config assert "node3" not in config - zk2 = get_fake_zk(node2) + zk2 = create_client(node2) ku.wait_configs_equal(config, zk2) for i in range(100): - assert zk2.exists(f"/test_three_{i}") is not None - zk2.create(f"/test_three_{100 + i}", b"somedata") + assert zk2.exists(f"/test_three_{i}") + zk2.create(f"/test_three_{100 + i}", "somedata") # Why not both? # One node will process add_srv request, other will pull out updated config, apply @@ -107,23 +105,19 @@ def test_reconfig_add(started_cluster): assert node2.contains_in_log(part_of_cluster) zk1.stop() - zk1.close() - zk1 = get_fake_zk(node1) + zk1 = create_client(node1) zk1.sync("/test_three_0") for i in range(200): - assert zk1.exists(f"/test_three_{i}") is not None + assert zk1.exists(f"/test_three_{i}") for i in range(100): - zk2.create(f"/test_four_{i}", b"somedata") + zk2.create(f"/test_four_{i}", "somedata") node3.start_clickhouse() - config, _ = zk2.reconfig( - joining="server.3=node3:9234", leaving=None, new_members=None - ) + config = zk2.reconfig(joining="server.3=node3:9234", leaving=None, new_members=None) ku.wait_until_connected(cluster, node3) - config = config.decode("utf-8") print("After adding 3", config) assert len(config.split("\n")) == 3 @@ -131,25 +125,23 @@ def test_reconfig_add(started_cluster): assert "node2" in config assert "node3" in config - zk3 = get_fake_zk(node3) + zk3 = create_client(node3) ku.wait_configs_equal(config, zk3) for i in range(100): - assert zk3.exists(f"/test_four_{i}") is not None - zk3.create(f"/test_four_{100 + i}", b"somedata") + assert zk3.exists(f"/test_four_{i}") + zk3.create(f"/test_four_{100 + i}", "somedata") zk1.stop() - zk1.close() - zk1 = get_fake_zk(node1) + zk1 = create_client(node1) zk1.sync("/test_four_0") zk2.stop() - zk2.close() - zk2 = get_fake_zk(node2) + zk2 = create_client(node2) zk2.sync("/test_four_0") for i in range(200): - assert zk1.exists(f"/test_four_{i}") is not None - assert zk2.exists(f"/test_four_{i}") is not None + assert zk1.exists(f"/test_four_{i}") + assert zk2.exists(f"/test_four_{i}") assert node3.contains_in_log(part_of_cluster) diff --git a/tests/integration/test_keeper_reconfig_remove/test.py b/tests/integration/test_keeper_reconfig_remove/test.py index fb0a9472df3..d23d771edba 100644 --- a/tests/integration/test_keeper_reconfig_remove/test.py +++ b/tests/integration/test_keeper_reconfig_remove/test.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 +import subprocess import pytest -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, ClickHouseInstance import helpers.keeper_utils as ku import os -from kazoo.client import KazooClient -from kazoo.exceptions import BadVersionException, BadArgumentsException +import typing as tp cluster = ClickHouseCluster(__file__) CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") @@ -23,16 +23,18 @@ def started_cluster(): cluster.start() yield cluster finally: + conn: tp.Optional[ku.KeeperClient] for conn in [zk1, zk2, zk3]: if conn: conn.stop() - conn.close() cluster.shutdown() -def get_fake_zk(node): - return ku.get_fake_zk(cluster, node) +def create_client(node: ClickHouseInstance): + return ku.KeeperClient( + cluster.server_bin_path, cluster.get_instance_ip(node.name), 9181 + ) def test_reconfig_remove_followers_from_3(started_cluster): @@ -42,9 +44,9 @@ def test_reconfig_remove_followers_from_3(started_cluster): Check that remaining node is in standalone mode. """ - zk1 = get_fake_zk(node1) - config, _ = zk1.get("/keeper/config") - config = config.decode("utf-8") + global zk1, zk2, zk3 + zk1 = create_client(node1) + config = zk1.get("/keeper/config") print("Initial config", config) assert len(config.split("\n")) == 3 @@ -52,36 +54,33 @@ def test_reconfig_remove_followers_from_3(started_cluster): assert "node2" in config assert "node3" in config - with pytest.raises(BadVersionException): - zk1.reconfig(joining=None, leaving="1", new_members=None, from_config=20) - with pytest.raises(BadArgumentsException): + with pytest.raises(ValueError): zk1.reconfig(joining=None, leaving=None, new_members=None) - with pytest.raises(BadArgumentsException): + with pytest.raises(ku.KeeperException): # bulk reconfiguration is not supported zk1.reconfig(joining=None, leaving=None, new_members="3") - with pytest.raises(BadArgumentsException): + with pytest.raises(ValueError): zk1.reconfig(joining="1", leaving="1", new_members="3") - with pytest.raises(BadArgumentsException): + with pytest.raises(ku.KeeperException): # at least one node must be left zk1.reconfig(joining=None, leaving="1,2,3", new_members=None) for i in range(100): - zk1.create(f"/test_two_{i}", b"somedata") + zk1.create(f"/test_two_{i}", "somedata") - zk2 = get_fake_zk(node2) + zk2 = create_client(node2) zk2.sync("/test_two_0") ku.wait_configs_equal(config, zk2) - zk3 = get_fake_zk(node3) + zk3 = create_client(node3) zk3.sync("/test_two_0") ku.wait_configs_equal(config, zk3) for i in range(100): - assert zk2.exists(f"test_two_{i}") is not None - assert zk3.exists(f"test_two_{i}") is not None + assert zk2.exists(f"test_two_{i}") + assert zk3.exists(f"test_two_{i}") - config, _ = zk1.reconfig(joining=None, leaving="3", new_members=None) - config = config.decode("utf-8") + config = zk1.reconfig(joining=None, leaving="3", new_members=None) print("After removing 3", config) assert len(config.split("\n")) == 2 @@ -90,35 +89,26 @@ def test_reconfig_remove_followers_from_3(started_cluster): assert "node3" not in config zk2.stop() - zk2.close() - zk2 = get_fake_zk(node2) + zk2 = create_client(node2) ku.wait_configs_equal(config, zk2) for i in range(100): - assert zk2.exists(f"test_two_{i}") is not None - zk2.create(f"/test_two_{100 + i}", b"otherdata") + assert zk2.exists(f"test_two_{i}") + zk2.create(f"/test_two_{100 + i}", "otherdata") zk1.stop() - zk1.close() - zk1 = get_fake_zk(node1) + zk1 = create_client(node1) zk1.sync("/test_two_0") for i in range(200): - assert zk1.exists(f"test_two_{i}") is not None - - with pytest.raises(Exception): - zk3.stop() - zk3.close() - zk3 = get_fake_zk(node3) - zk3.sync("/test_two_0") + assert zk1.exists(f"test_two_{i}") assert node3.contains_in_log(log_msg_removed) for i in range(100): - zk2.create(f"/test_two_{200 + i}", b"otherdata") + zk2.create(f"/test_two_{200 + i}", "otherdata") - config, _ = zk1.reconfig(joining=None, leaving="2", new_members=None) - config = config.decode("utf-8") + config = zk1.reconfig(joining=None, leaving="2", new_members=None) print("After removing 2", config) assert len(config.split("\n")) == 1 @@ -127,19 +117,12 @@ def test_reconfig_remove_followers_from_3(started_cluster): assert "node3" not in config zk1.stop() - zk1.close() - zk1 = get_fake_zk(node1) + zk1 = create_client(node1) zk1.sync("/test_two_0") for i in range(300): - assert zk1.exists(f"test_two_{i}") is not None - - with pytest.raises(Exception): - zk2.stop() - zk2.close() - zk2 = get_fake_zk(node2) - zk2.sync("/test_two_0") + assert zk1.exists(f"test_two_{i}") assert not node1.contains_in_log(log_msg_removed) assert node2.contains_in_log(log_msg_removed) - assert "Mode: standalone" in zk1.command(b"stat") + assert "Mode: standalone" in zk1.execute_query("stat") diff --git a/tests/integration/test_keeper_reconfig_remove_many/test.py b/tests/integration/test_keeper_reconfig_remove_many/test.py index ec0d8b95eff..0f9d2fce374 100644 --- a/tests/integration/test_keeper_reconfig_remove_many/test.py +++ b/tests/integration/test_keeper_reconfig_remove_many/test.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 import pytest -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, ClickHouseInstance import helpers.keeper_utils as ku import os -from kazoo.client import KazooClient, KazooState -from kazoo.exceptions import BadVersionException, BadArgumentsException +import typing as tp + cluster = ClickHouseCluster(__file__) CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") @@ -26,49 +26,51 @@ def started_cluster(): cluster.start() yield cluster finally: + conn: tp.Optional[ku.KeeperClient] for conn in [zk1, zk2, zk3, zk4, zk5]: if conn: conn.stop() - conn.close() cluster.shutdown() -def get_fake_zk(node): - return ku.get_fake_zk(cluster, node) +def create_client(node: ClickHouseInstance): + return ku.KeeperClient( + cluster.server_bin_path, cluster.get_instance_ip(node.name), 9181 + ) def test_reconfig_remove_2_and_leader(started_cluster): """ Remove 2 followers from a cluster of 5. Remove leader from 3 nodes. """ + global zk1, zk2, zk3, zk4, zk5 - zk1 = get_fake_zk(node1) + zk1 = create_client(node1) config = ku.get_config_str(zk1) print("Initial config", config) assert len(config.split("\n")) == 5 for i in range(100): - zk1.create(f"/test_two_{i}", b"somedata") + zk1.create(f"/test_two_{i}", "somedata") - zk4 = get_fake_zk(node4) + zk4 = create_client(node4) zk4.sync("/test_two_0") ku.wait_configs_equal(config, zk4) - zk5 = get_fake_zk(node5) + zk5 = create_client(node5) zk5.sync("/test_two_0") ku.wait_configs_equal(config, zk5) for i in range(100): - assert zk4.exists(f"test_two_{i}") is not None - assert zk5.exists(f"test_two_{i}") is not None + assert zk4.exists(f"test_two_{i}") + assert zk5.exists(f"test_two_{i}") - zk4.create(f"/test_two_{100 + i}", b"otherdata") + zk4.create(f"/test_two_{100 + i}", "otherdata") - zk2 = get_fake_zk(node2) - config, _ = zk2.reconfig(joining=None, leaving="4,5", new_members=None) - config = config.decode("utf-8") + zk2 = create_client(node2) + config = zk2.reconfig(joining=None, leaving="4,5", new_members=None) print("After removing 4,5", config) assert len(config.split("\n")) == 3 @@ -79,27 +81,14 @@ def test_reconfig_remove_2_and_leader(started_cluster): assert "node5" not in config zk1.stop() - zk1.close() - zk1 = get_fake_zk(node1) + zk1 = create_client(node1) zk1.sync("/test_two_0") ku.wait_configs_equal(config, zk1) for i in range(200): - assert zk1.exists(f"test_two_{i}") is not None - assert zk2.exists(f"test_two_{i}") is not None - - with pytest.raises(Exception): - zk4.stop() - zk4.close() - zk4 = get_fake_zk(node4) - zk4.sync("/test_two_0") - - with pytest.raises(Exception): - zk5.stop() - zk5.close() - zk5 = get_fake_zk(node5) - zk5.sync("/test_two_0") + assert zk1.exists(f"test_two_{i}") + assert zk2.exists(f"test_two_{i}") assert not node1.contains_in_log(log_msg_removed) assert not node2.contains_in_log(log_msg_removed) @@ -110,11 +99,10 @@ def test_reconfig_remove_2_and_leader(started_cluster): assert ku.is_leader(cluster, node1) for i in range(100): - zk1.create(f"/test_leader_{i}", b"somedata") + zk1.create(f"/test_leader_{i}", "somedata") # when a leader gets a remove request, it must yield leadership - config, _ = zk1.reconfig(joining=None, leaving="1", new_members=None) - config = config.decode("utf-8") + config = zk1.reconfig(joining=None, leaving="1", new_members=None) print("After removing 1 (leader)", config) assert len(config.split("\n")) == 2 @@ -125,24 +113,17 @@ def test_reconfig_remove_2_and_leader(started_cluster): assert "node5" not in config zk2.stop() - zk2.close() - zk2 = get_fake_zk(node2) + zk2 = create_client(node2) zk2.sync("/test_leader_0") ku.wait_configs_equal(config, zk2) - zk3 = get_fake_zk(node3) + zk3 = create_client(node3) zk3.sync("/test_leader_0") ku.wait_configs_equal(config, zk3) for i in range(100): - assert zk2.exists(f"test_leader_{i}") is not None - assert zk3.exists(f"test_leader_{i}") is not None - - with pytest.raises(Exception): - zk1.stop() - zk1.close() - zk1 = get_fake_zk(node1) - zk1.sync("/test_leader_0") + assert zk2.exists(f"test_leader_{i}") + assert zk3.exists(f"test_leader_{i}") assert node1.contains_in_log(log_msg_removed) assert not node2.contains_in_log(log_msg_removed) diff --git a/tests/integration/test_keeper_reconfig_replace_leader/test.py b/tests/integration/test_keeper_reconfig_replace_leader/test.py index ca1ec3a0c92..76a8eb092e2 100644 --- a/tests/integration/test_keeper_reconfig_replace_leader/test.py +++ b/tests/integration/test_keeper_reconfig_replace_leader/test.py @@ -1,11 +1,10 @@ #!/usr/bin/env python3 import pytest -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, ClickHouseInstance from os.path import join, dirname, realpath -import time import helpers.keeper_utils as ku -from kazoo.client import KazooClient, KazooState +import typing as tp cluster = ClickHouseCluster(__file__) CONFIG_DIR = join(dirname(realpath(__file__)), "configs") @@ -31,24 +30,26 @@ def started_cluster(): yield cluster finally: + conn: tp.Optional[ku.KeeperClient] for conn in [zk1, zk2, zk3, zk4]: if conn: conn.stop() - conn.close() cluster.shutdown() -def get_fake_zk(node): - return ku.get_fake_zk(cluster, node) +def create_client(node: ClickHouseInstance): + return ku.KeeperClient( + cluster.server_bin_path, cluster.get_instance_ip(node.name), 9181 + ) def test_reconfig_replace_leader(started_cluster): """ Remove leader from a cluster of 3 and add a new node via two commands. """ - - zk1 = get_fake_zk(node1) + global zk1, zk2, zk3, zk4 + zk1 = create_client(node1) config = ku.get_config_str(zk1) assert len(config.split("\n")) == 3 @@ -58,23 +59,22 @@ def test_reconfig_replace_leader(started_cluster): assert "node4" not in config for i in range(100): - zk1.create(f"/test_four_{i}", b"somedata") + zk1.create(f"/test_four_{i}", "somedata") - zk2 = get_fake_zk(node2) + zk2 = create_client(node2) zk2.sync("/test_four_0") ku.wait_configs_equal(config, zk2) - zk3 = get_fake_zk(node3) + zk3 = create_client(node3) zk3.sync("/test_four_0") ku.wait_configs_equal(config, zk3) for i in range(100): - assert zk2.exists(f"/test_four_{i}") is not None - assert zk3.exists(f"/test_four_{i}") is not None + assert zk2.exists(f"/test_four_{i}") + assert zk3.exists(f"/test_four_{i}") assert ku.is_leader(cluster, node1) - config, _ = zk2.reconfig(joining=None, leaving="1", new_members=None) - config = config.decode("utf-8") + config = zk2.reconfig(joining=None, leaving="1", new_members=None) print("After removing 1 (leader)", config) assert len(config.split("\n")) == 2 @@ -83,17 +83,10 @@ def test_reconfig_replace_leader(started_cluster): assert "node3" in config assert "node4" not in config - with pytest.raises(Exception): - zk1.stop() - zk1.close() - zk1 = get_fake_zk(node1) - zk1.sync("/test_four_0") + ku.wait_configs_equal(config, zk2) node4.start_clickhouse() - config, _ = zk2.reconfig( - joining="server.4=node4:9234", leaving=None, new_members=None - ) - config = config.decode("utf-8") + config = zk2.reconfig(joining="server.4=node4:9234", leaving=None, new_members=None) ku.wait_until_connected(cluster, node4) print("After adding 4", config) @@ -103,22 +96,20 @@ def test_reconfig_replace_leader(started_cluster): assert "node3" in config assert "node4" in config - zk4 = get_fake_zk(node4) + zk4 = create_client(node4) ku.wait_configs_equal(config, zk4) for i in range(100): - assert zk4.exists(f"test_four_{i}") is not None - zk4.create(f"/test_four_{100 + i}", b"somedata") + assert zk4.exists(f"test_four_{i}") + zk4.create(f"/test_four_{100 + i}", "somedata") zk2.stop() - zk2.close() - zk2 = get_fake_zk(node2) + zk2 = create_client(node2) zk2.sync("/test_four_0") ku.wait_configs_equal(config, zk2) zk3.stop() - zk3.close() - zk3 = get_fake_zk(node3) + zk3 = create_client(node3) zk3.sync("/test_four_0") ku.wait_configs_equal(config, zk3) diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper1.xml deleted file mode 100644 index 71f3403aca3..00000000000 --- a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper1.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - true - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper2.xml deleted file mode 100644 index faefb4d1102..00000000000 --- a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper2.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - true - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper3.xml deleted file mode 100644 index 80a9caa92c2..00000000000 --- a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper3.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - true - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper4.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper4.xml deleted file mode 100644 index 9fd88fe5d63..00000000000 --- a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper4.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - 9181 - 4 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - true - - - 5000 - 10000 - trace - - - - 2 node2 9234 - 3 node3 9234 - 4 node4 9234 - - - diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py deleted file mode 100644 index e23d0674c12..00000000000 --- a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/env python3 - -import pytest -from helpers.cluster import ClickHouseCluster -from os.path import join, dirname, realpath -import time -import helpers.keeper_utils as ku -from kazoo.client import KazooClient, KazooState - -cluster = ClickHouseCluster(__file__) -CONFIG_DIR = join(dirname(realpath(__file__)), "configs") - -node1 = cluster.add_instance("node1", main_configs=["configs/keeper1.xml"]) -node2 = cluster.add_instance("node2", main_configs=["configs/keeper2.xml"]) -node3 = cluster.add_instance("node3", main_configs=["configs/keeper3.xml"]) -node4 = cluster.add_instance("node4", stay_alive=True) -zk1, zk2, zk3, zk4 = None, None, None, None - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - - node4.stop_clickhouse() - node4.copy_file_to_container( - join(CONFIG_DIR, "keeper4.xml"), - "/etc/clickhouse-server/config.d/keeper.xml", - ) - - yield cluster - - finally: - for conn in [zk1, zk2, zk3, zk4]: - if conn: - conn.stop() - conn.close() - - cluster.shutdown() - - -def get_fake_zk(node): - return ku.get_fake_zk(cluster, node) - - -def test_reconfig_replace_leader_in_one_command(started_cluster): - """ - Remove leader from a cluster of 3 and add a new node to this cluster in a single command - """ - - zk1 = get_fake_zk(node1) - config = ku.get_config_str(zk1) - - assert len(config.split("\n")) == 3 - assert "node1" in config - assert "node2" in config - assert "node3" in config - assert "node4" not in config - - for i in range(100): - zk1.create(f"/test_four_{i}", b"somedata") - - zk2 = get_fake_zk(node2) - zk2.sync("/test_four_0") - ku.wait_configs_equal(config, zk2) - - zk3 = get_fake_zk(node3) - zk3.sync("/test_four_0") - ku.wait_configs_equal(config, zk3) - - for i in range(100): - assert zk2.exists(f"/test_four_{i}") is not None - assert zk3.exists(f"/test_four_{i}") is not None - - assert ku.is_leader(cluster, node1) - node4.start_clickhouse() - config, _ = zk2.reconfig( - joining="server.4=node4:9234", leaving="1", new_members=None - ) - config = config.decode("utf-8") - - print("After removing 1 and adding 4", config) - assert len(config.split("\n")) == 3 - assert "node1" not in config - assert "node2" in config - assert "node3" in config - assert "node4" in config - - ku.wait_until_connected(cluster, node4) - time.sleep(1) - - zk4 = get_fake_zk(node4) - zk4.sync("/test_four_0") - ku.wait_configs_equal(config, zk4) - - for i in range(100): - assert zk4.exists(f"test_four_{i}") is not None - zk4.create(f"/test_four_{100 + i}", b"somedata") - - with pytest.raises(Exception): - zk1.stop() - zk1.close() - zk1 = get_fake_zk(node1) - zk1.sync("/test_four_0") - - zk2.stop() - zk2.close() - zk2 = get_fake_zk(node2) - zk2.sync("/test_four_0") - ku.wait_configs_equal(config, zk2) - - zk3.stop() - zk3.close() - zk3 = get_fake_zk(node3) - zk3.sync("/test_four_0") - ku.wait_configs_equal(config, zk3) - - for i in range(200): - assert zk2.exists(f"test_four_{i}") is not None - assert zk3.exists(f"test_four_{i}") is not None diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index c97c3e5e2a8..18cfdfa5070 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -13,25 +13,36 @@ from multiprocessing.dummy import Pool from helpers.test_tools import assert_eq_with_retry -def check_query(clickhouse_node, query, result_set, retry_count=10, interval_seconds=3): - lastest_result = "" +def check_query( + clickhouse_node, + query, + result_set, + retry_count=30, + interval_seconds=1, + on_failure=None, +): + latest_result = "" + if "/* expect: " not in query: + query = "/* expect: " + result_set.rstrip("\n") + "*/ " + query for i in range(retry_count): try: - lastest_result = clickhouse_node.query(query) - if result_set == lastest_result: + latest_result = clickhouse_node.query(query) + if result_set == latest_result: return - logging.debug(f"latest_result {lastest_result}") + logging.debug(f"latest_result {latest_result}") time.sleep(interval_seconds) except Exception as e: logging.debug(f"check_query retry {i+1} exception {e}") time.sleep(interval_seconds) else: - result_got = clickhouse_node.query(query) + latest_result = clickhouse_node.query(query) + if on_failure is not None and latest_result != result_set: + on_failure(latest_result, result_set) assert ( - result_got == result_set - ), f"Got result {result_got}, while expected result {result_set}" + latest_result == result_set + ), f"Got result '{latest_result}', expected result '{result_set}'" def dml_with_materialized_mysql_database(clickhouse_node, mysql_node, service_name): @@ -1238,7 +1249,7 @@ def err_sync_user_privs_with_materialized_mysql_database( ) assert "priv_err_db" in clickhouse_node.query("SHOW DATABASES") assert "test_table_1" not in clickhouse_node.query("SHOW TABLES FROM priv_err_db") - clickhouse_node.query_with_retry("DETACH DATABASE priv_err_db") + clickhouse_node.query_with_retry("DETACH DATABASE priv_err_db SYNC") mysql_node.query("REVOKE SELECT ON priv_err_db.* FROM 'test'@'%'") time.sleep(3) @@ -1431,7 +1442,7 @@ def mysql_kill_sync_thread_restore_test(clickhouse_node, mysql_node, service_nam time.sleep(sleep_time) clickhouse_node.query("SELECT * FROM test_database.test_table") - clickhouse_node.query_with_retry("DETACH DATABASE test_database") + clickhouse_node.query_with_retry("DETACH DATABASE test_database SYNC") clickhouse_node.query("ATTACH DATABASE test_database") check_query( clickhouse_node, @@ -1495,7 +1506,7 @@ def mysql_killed_while_insert(clickhouse_node, mysql_node, service_name): mysql_node.alloc_connection() - clickhouse_node.query_with_retry("DETACH DATABASE kill_mysql_while_insert") + clickhouse_node.query_with_retry("DETACH DATABASE kill_mysql_while_insert SYNC") clickhouse_node.query("ATTACH DATABASE kill_mysql_while_insert") result = mysql_node.query_and_get_data( @@ -1581,6 +1592,134 @@ def utf8mb4_test(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE utf8mb4_test") +def utf8mb4_column_test(clickhouse_node, mysql_node, service_name): + db = "utf8mb4_column_test" + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db}") + + # Full sync + mysql_node.query(f"CREATE TABLE {db}.unquoted (id INT primary key, 日期 DATETIME)") + mysql_node.query(f"CREATE TABLE {db}.quoted (id INT primary key, `日期` DATETIME)") + mysql_node.query(f"INSERT INTO {db}.unquoted VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.quoted VALUES(1, now())") + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + + # Full sync replicated unquoted columns names since they use SHOW CREATE TABLE + # which returns quoted column names + check_query( + clickhouse_node, + f"/* expect: quoted unquoted */ SHOW TABLES FROM {db}", + "quoted\nunquoted\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.unquoted", + "1\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.quoted", + "1\n", + ) + + # Inc sync + mysql_node.query( + f"CREATE TABLE {db}.unquoted_new (id INT primary key, 日期 DATETIME)" + ) + mysql_node.query( + f"CREATE TABLE {db}.quoted_new (id INT primary key, `日期` DATETIME)" + ) + mysql_node.query(f"INSERT INTO {db}.unquoted_new VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.quoted_new VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.unquoted VALUES(2, now())") + mysql_node.query(f"INSERT INTO {db}.quoted VALUES(2, now())") + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM {db}.quoted", + "2\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.quoted_new", + "1\n", + ) + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM {db}.unquoted", + "2\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.unquoted_new", + "1\n", + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`") + mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`") + + +def utf8mb4_name_test(clickhouse_node, mysql_node, service_name): + db = "您Hi您" + table = "日期" + mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`") + clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`") + mysql_node.query(f"CREATE DATABASE `{db}`") + mysql_node.query( + f"CREATE TABLE `{db}`.`{table}` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO `{db}`.`{table}` VALUES(1, now())") + mysql_node.query(f"INSERT INTO `{db}`.`{table}`(id, `{table}`) VALUES(2, now())") + mysql_node.query( + f"CREATE TABLE {db}.{table}_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO {db}.{table}_unquoted VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.{table}_unquoted(id, {table}) VALUES(2, now())") + clickhouse_node.query( + f"CREATE DATABASE `{db}` ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}`", + "2\n", + ) + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}_unquoted`", + "2\n", + ) + + # Inc sync + mysql_node.query( + f"CREATE TABLE `{db}`.`{table}2` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO `{db}`.`{table}2` VALUES(1, now())") + mysql_node.query(f"INSERT INTO `{db}`.`{table}2`(id, `{table}`) VALUES(2, now())") + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}2`", + "2\n", + ) + + mysql_node.query( + f"CREATE TABLE {db}.{table}2_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO {db}.{table}2_unquoted VALUES(1, now())") + mysql_node.query( + f"INSERT INTO {db}.{table}2_unquoted(id, {table}) VALUES(2, now())" + ) + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}2_unquoted`", + "2\n", + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`") + mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`") + + def system_parts_test(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE IF EXISTS system_parts_test") clickhouse_node.query("DROP DATABASE IF EXISTS system_parts_test") @@ -1701,6 +1840,41 @@ def materialized_with_column_comments_test(clickhouse_node, mysql_node, service_ mysql_node.query("DROP DATABASE materialized_with_column_comments_test") +def double_quoted_comment(clickhouse_node, mysql_node, service_name): + db = "comment_db" + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db}") + mysql_node.query( + f'CREATE TABLE {db}.t1 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT "ID")' + ) + mysql_node.query( + f"CREATE TABLE {db}.t2 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT 'ID')" + ) + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t1\nt2\n", + ) + + # incremental + mysql_node.query( + f'CREATE TABLE {db}.t3 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT "ID")' + ) + mysql_node.query( + f"CREATE TABLE {db}.t4 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT 'ID')" + ) + check_query( + clickhouse_node, f"SHOW TABLES FROM {db} FORMAT TSV", "t1\nt2\nt3\nt4\n" + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + + def materialized_with_enum8_test(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE IF EXISTS materialized_with_enum8_test") clickhouse_node.query("DROP DATABASE IF EXISTS materialized_with_enum8_test") @@ -2418,6 +2592,20 @@ def named_collections(clickhouse_node, mysql_node, service_name): f"/* expect: (1, 'a', 1), (2, 'b', 2) */ SELECT * FROM {db}.t1", "1\ta\t1\n2\tb\t2\n", ) + clickhouse_node.query(f"ALTER NAMED COLLECTION {db} SET port=9999") + clickhouse_node.query_with_retry(f"DETACH DATABASE {db} SYNC") + mysql_node.query(f"INSERT INTO {db}.t1 VALUES (3, 'c', 3)") + assert "ConnectionFailed:" in clickhouse_node.query_and_get_error( + f"ATTACH DATABASE {db}" + ) + clickhouse_node.query(f"ALTER NAMED COLLECTION {db} SET port=3306") + clickhouse_node.query(f"ATTACH DATABASE {db}") + check_query( + clickhouse_node, + f"/* expect: (1, 'a', 1), (2, 'b', 2), (3, 'c', 3) */ SELECT * FROM {db}.t1", + "1\ta\t1\n2\tb\t2\n3\tc\t3\n", + ) + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") mysql_node.query(f"DROP DATABASE IF EXISTS {db}") @@ -2449,3 +2637,37 @@ def create_table_as_select(clickhouse_node, mysql_node, service_name): clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + + +def table_with_indexes(clickhouse_node, mysql_node, service_name): + db = "table_with_indexes" + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db}") + + mysql_node.query( + f"CREATE TABLE {db}.t1(id INT NOT NULL PRIMARY KEY," + f"data varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL) ENGINE = InnoDB" + ) + + mysql_node.query(f"INSERT INTO {db}.t1 VALUES(1, 'some test string 1')") + mysql_node.query(f"INSERT INTO {db}.t1 VALUES(2, 'some test string 2')") + + clickhouse_node.query( + f""" + CREATE DATABASE {db} ENGINE = MaterializeMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse') + TABLE OVERRIDE t1 (COLUMNS ( + INDEX data_idx data TYPE ngrambf_v1(5, 65536, 4, 0) GRANULARITY 1 + )) + """ + ) + + check_query( + clickhouse_node, + "SELECT data_uncompressed_bytes FROM system.data_skipping_indices WHERE " + "database = 'table_with_indexes' and table = 't1' and name = 'data_idx'", + "65536\n", + ) + + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 32c1da8a2bd..64c4fb45e9b 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -1,17 +1,11 @@ -import os -import os.path as p import time -import pwd -import re import pymysql.cursors import pytest from helpers.cluster import ( ClickHouseCluster, ClickHouseInstance, get_docker_compose_path, - run_and_check, ) -import docker import logging from . import materialized_with_ddl @@ -52,6 +46,7 @@ def started_cluster(): cluster.start() yield cluster finally: + node_db.stop_clickhouse() # ensures that coverage report is written to disk, even if cluster.shutdown() times out. cluster.shutdown() @@ -62,8 +57,6 @@ class MySQLConnection: user="root", password="clickhouse", ip_address=None, - docker_compose=None, - project_name=cluster.project_name, ): self.user = user self.port = port @@ -86,7 +79,7 @@ class MySQLConnection: else: self.mysql_connection.ping(reconnect=True) logging.debug( - "MySQL Connection establised: {}:{}".format( + "MySQL Connection established: {}:{}".format( self.ip_address, self.port ) ) @@ -94,7 +87,7 @@ class MySQLConnection: except Exception as e: errors += [str(e)] time.sleep(1) - raise Exception("Connection not establised, {}".format(errors)) + raise Exception("Connection not established, {}".format(errors)) def query(self, execution_query): with self.alloc_connection().cursor() as cursor: @@ -118,9 +111,9 @@ class MySQLConnection: if result is not None: print(cursor.fetchall()) - def query_and_get_data(self, executio_query): + def query_and_get_data(self, execution_query): with self.alloc_connection().cursor() as cursor: - cursor.execute(executio_query) + cursor.execute(execution_query) return cursor.fetchall() def close(self): @@ -381,6 +374,12 @@ def test_utf8mb4( ): materialized_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_5_7, "mysql57") materialized_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_8_0, "mysql80") + materialized_with_ddl.utf8mb4_column_test( + clickhouse_node, started_mysql_8_0, "mysql80" + ) + materialized_with_ddl.utf8mb4_name_test( + clickhouse_node, started_mysql_8_0, "mysql80" + ) def test_system_parts_table(started_cluster, started_mysql_8_0, clickhouse_node): @@ -422,6 +421,12 @@ def test_materialized_with_column_comments( ) +def test_double_quoted_comment(started_cluster, started_mysql_8_0, clickhouse_node): + materialized_with_ddl.double_quoted_comment( + clickhouse_node, started_mysql_8_0, "mysql80" + ) + + def test_materialized_with_enum( started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node ): @@ -541,3 +546,9 @@ def test_create_table_as_select(started_cluster, started_mysql_8_0, clickhouse_n materialized_with_ddl.create_table_as_select( clickhouse_node, started_mysql_8_0, "mysql80" ) + + +def test_table_with_indexes(started_cluster, started_mysql_8_0, clickhouse_node): + materialized_with_ddl.table_with_indexes( + clickhouse_node, started_mysql_8_0, "mysql80" + ) diff --git a/tests/integration/test_memory_profiler_min_max_borders/__init__.py b/tests/integration/test_memory_profiler_min_max_borders/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml similarity index 50% rename from tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml rename to tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml index 7a00648b28e..56fc5ed34ca 100644 --- a/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml +++ b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml @@ -1,7 +1,7 @@ - 1 + 1 diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml new file mode 100644 index 00000000000..5b3e17d145f --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml @@ -0,0 +1,5 @@ + + 1 + 4096 + 8192 + diff --git a/tests/integration/test_memory_profiler_min_max_borders/test.py b/tests/integration/test_memory_profiler_min_max_borders/test.py new file mode 100644 index 00000000000..df9439bc2bb --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/test.py @@ -0,0 +1,40 @@ +from helpers.cluster import ClickHouseCluster +import pytest + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/memory_profiler.xml"], + user_configs=["configs/max_untracked_memory.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_trace_boundaries_work(started_cluster): + if node.is_built_with_sanitizer(): + pytest.skip("Disabled for sanitizers") + + node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null") + node.query("SYSTEM FLUSH LOGS") + + assert ( + node.query( + "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'" + ) + == "1\n" + ) + assert ( + node.query( + "SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" + ) + == "0\n" + ) diff --git a/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml index 07f57c08e32..b9df1672400 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml +++ b/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml @@ -10,6 +10,8 @@ devstoreaccount1 Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== 100000 + 10 + 10 + false + + diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/server.crt b/tests/integration/test_profile_max_sessions_for_user/configs/server.crt new file mode 100755 index 00000000000..070d37f3b77 --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/server.crt @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC+zCCAeOgAwIBAgIJANhP897Se2gmMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV +BAMMCWxvY2FsaG9zdDAeFw0yMDA0MTgyMTE2NDBaFw0yMTA0MTgyMTE2NDBaMBQx +EjAQBgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAM92kcojQoMsjZ9YGhPMY6h/fDUsZeSKHLxgqE6wbmfU1oZKCPWqnvl+4n0J +pnT5h1ETxxYZLepimKq0DEVPUTmCl0xmcKbtUNiaTUKYKsdita6b2vZCX9wUPN9p +2Kjnm41l+aZNqIEBhIgHNWg9qowi20y0EIXR79jQLwwaInHAaJLZxVsqY2zjQ/D7 +1Zh82MXud7iqxBQiEfw9Cz35UFA239R8QTlPkVQfsN1gfLxnLk24QUX3o+hbUI1g +nlSpyYDHYQlOmwz8doDs6THHAZNJ4bPE9xHNFpw6dGZdbtH+IKQ/qRZIiOaiNuzJ +IOHl6XQDRDkW2LMTiCQ6fjC7Pz8CAwEAAaNQME4wHQYDVR0OBBYEFFvhaA/Eguyf +BXkMj8BkNLBqMnz2MB8GA1UdIwQYMBaAFFvhaA/EguyfBXkMj8BkNLBqMnz2MAwG +A1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBACeU/oL48eVAKH7NQntHhRaJ +ZGeQzKIjrSBjFo8BGXD1nJZhUeFsylLrhCkC8/5/3grE3BNVX9bxcGjO81C9Mn4U +t0z13d6ovJjCZSQArtLwgeJGlpH7gNdD3DyT8DQmrqYVnmnB7UmBu45XH1LWGQZr +FAOhGRVs6s6mNj8QlLMgdmsOeOQnsGCMdoss8zV9vO2dc4A5SDSSL2mqGGY4Yjtt +X+XlEhXXnksGyx8NGVOZX4wcj8WeCAj/lihQ7Zh6XYwZH9i+E46ompUwoziZnNPu +2RH63tLNCxkOY2HF5VMlbMmzer3FkhlM6TAZZRPcvSphKPwXK4A33yqc6wnWvpc= +-----END CERTIFICATE----- diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/server.key b/tests/integration/test_profile_max_sessions_for_user/configs/server.key new file mode 100755 index 00000000000..b3dee82dcda --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/server.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDPdpHKI0KDLI2f +WBoTzGOof3w1LGXkihy8YKhOsG5n1NaGSgj1qp75fuJ9CaZ0+YdRE8cWGS3qYpiq +tAxFT1E5gpdMZnCm7VDYmk1CmCrHYrWum9r2Ql/cFDzfadio55uNZfmmTaiBAYSI +BzVoPaqMIttMtBCF0e/Y0C8MGiJxwGiS2cVbKmNs40Pw+9WYfNjF7ne4qsQUIhH8 +PQs9+VBQNt/UfEE5T5FUH7DdYHy8Zy5NuEFF96PoW1CNYJ5UqcmAx2EJTpsM/HaA +7OkxxwGTSeGzxPcRzRacOnRmXW7R/iCkP6kWSIjmojbsySDh5el0A0Q5FtizE4gk +On4wuz8/AgMBAAECggEAJ54J2yL+mZQRe2NUn4FBarTloDXZQ1pIgISov1Ybz0Iq +sTxEF728XAKp95y3J9Fa0NXJB+RJC2BGrRpy2W17IlNY1yMc0hOxg5t7s4LhcG/e +J/jlSG+GZL2MnlFVKXQJFWhq0yIzUmdayqstvLlB7z7cx/n+yb88YRfoVBRNjZEL +Tdrsw+087igDjrIxZJ3eMN5Wi434n9s4yAoRQC1bP5wcWx0gD4MzdmL8ip6suiRc +LRuBAhV/Op812xlxUhrF5dInUM9OLlGTXpUzexAS8Cyy7S4bfkW2BaCxTF7I7TFw +Whx28CKn/G49tIuU0m6AlxWbXpLVePTFyMb7RJz5cQKBgQD7VQd2u3HM6eE3PcXD +p6ObdLTUk8OAJ5BMmADFc71W0Epyo26/e8KXKGYGxE2W3fr13y+9b0fl5fxZPuhS +MgvXEO7rItAVsLcp0IzaqY0WUee2b4XWPAU0XuPqvjYMpx8H5OEHqFK6lhZysAqM +X7Ot3/Hux9X0MC4v5a/HNbDUOQKBgQDTUPaP3ADRrmpmE2sWuzWEnCSEz5f0tCLO +wTqhV/UraWUNlAbgK5NB790IjH/gotBSqqNPLJwJh0LUfClKM4LiaHsEag0OArOF +GhPMK1Ohps8c2RRsiG8+hxX2HEHeAVbkouEDPDiHdIW/92pBViDoETXL6qxDKbm9 +LkOcVeDfNwKBgQChh1xsqrvQ/t+IKWNZA/zahH9TwEP9sW/ESkz0mhYuHWA7nV4o +ItpFW+l2n+Nd+vy32OFN1p9W2iD9GrklWpTRfEiRRqaFyjVt4mMkhaPvnGRXlAVo +Utrldbb1v5ntN9txr2ARE9VXpe53dzzQSxGnxi4vUK/paK3GitAWMCOdwQKBgQCi +hmGsUXQb0P6qVYMGr6PAw2re7t8baLRguoMCdqjs45nCMLh9D2apzvb8TTtJJU/+ +VJlYGqJEPdDrpjcHh8jBo8QBqCM0RGWYGG9jl2syKB6hPGCV/PU6bSE58Y/DVNpk +7NUM7PM5UyhPddY2PC0A78Ole29UFLJzSzLa+b4DTwKBgH9Wh2k4YPnPcRrX89UL +eSwWa1CGq6HWX8Kd5qyz256aeHWuG5nv15+rBt+D7nwajUsqeVkAXz5H/dHuG1xz +jb7RW+pEjx0GVAmIbkM9vOLqEUfHHHPuk4AXCGGZ5sarPiKg4BHKBBsY1dpoO5UH +0j71fRA6zurHnTXDaCLWlUpZ +-----END PRIVATE KEY----- diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/ssl_conf.xml b/tests/integration/test_profile_max_sessions_for_user/configs/ssl_conf.xml new file mode 100644 index 00000000000..778d327c460 --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/ssl_conf.xml @@ -0,0 +1,17 @@ + + + + + + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + + /etc/clickhouse-server/config.d/dhparam.pem + none + true + true + sslv2,sslv3 + true + + + diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/users.xml b/tests/integration/test_profile_max_sessions_for_user/configs/users.xml new file mode 100644 index 00000000000..3bed673b2ca --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/users.xml @@ -0,0 +1,16 @@ + + + + 2 + 0 + + + + + + + + 123 + + + diff --git a/tests/integration/test_profile_max_sessions_for_user/protos/clickhouse_grpc.proto b/tests/integration/test_profile_max_sessions_for_user/protos/clickhouse_grpc.proto new file mode 120000 index 00000000000..25d15f11e3b --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/protos/clickhouse_grpc.proto @@ -0,0 +1 @@ +../../../../src/Server/grpc_protos/clickhouse_grpc.proto \ No newline at end of file diff --git a/tests/integration/test_profile_max_sessions_for_user/test.py b/tests/integration/test_profile_max_sessions_for_user/test.py new file mode 100755 index 00000000000..28c2597e52d --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/test.py @@ -0,0 +1,249 @@ +import os + +import grpc +import pymysql.connections +import psycopg2 as py_psql +import pytest +import sys +import threading + +from helpers.cluster import ClickHouseCluster, run_and_check +from helpers.test_tools import assert_logs_contain_with_retry + +from helpers.uclient import client, prompt + +MAX_SESSIONS_FOR_USER = 2 +POSTGRES_SERVER_PORT = 5433 +MYSQL_SERVER_PORT = 9001 +GRPC_PORT = 9100 + +TEST_USER = "test_user" +TEST_PASSWORD = "123" + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DEFAULT_ENCODING = "utf-8" + +# Use grpcio-tools to generate *pb2.py files from *.proto. +proto_dir = os.path.join(SCRIPT_DIR, "./protos") +gen_dir = os.path.join(SCRIPT_DIR, "./_gen") +os.makedirs(gen_dir, exist_ok=True) +run_and_check( + f"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} {proto_dir}/clickhouse_grpc.proto", + shell=True, +) + +sys.path.append(gen_dir) + +import clickhouse_grpc_pb2 +import clickhouse_grpc_pb2_grpc + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "node", + main_configs=[ + "configs/ports.xml", + "configs/log.xml", + "configs/ssl_conf.xml", + "configs/dhparam.pem", + "configs/server.crt", + "configs/server.key", + ], + user_configs=["configs/users.xml"], + env_variables={ + "UBSAN_OPTIONS": "print_stacktrace=1", + # Bug in TSAN reproduces in this test https://github.com/grpc/grpc/issues/29550#issuecomment-1188085387 + "TSAN_OPTIONS": "report_atomic_races=0 " + + os.getenv("TSAN_OPTIONS", default=""), + }, +) + + +def get_query(name, id): + return f"SElECT '{name}', {id}, number from system.numbers" + + +def grpc_get_url(): + return f"{instance.ip_address}:{GRPC_PORT}" + + +def grpc_create_insecure_channel(): + channel = grpc.insecure_channel(grpc_get_url()) + grpc.channel_ready_future(channel).result(timeout=2) + return channel + + +def grpc_query(query_text, channel, session_id_): + query_info = clickhouse_grpc_pb2.QueryInfo( + query=query_text, + session_id=session_id_, + user_name=TEST_USER, + password=TEST_PASSWORD, + ) + + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(channel) + result = stub.ExecuteQuery(query_info) + if result and result.HasField("exception"): + raise Exception(result.exception.display_text) + return result.output.decode(DEFAULT_ENCODING) + + +def threaded_run_test(sessions): + instance.rotate_logs() + thread_list = [] + for i in range(len(sessions)): + thread = ThreadWithException(target=sessions[i], args=(i,)) + thread_list.append(thread) + thread.start() + + if len(sessions) > MAX_SESSIONS_FOR_USER: + # High retry amount to avoid flakiness in ASAN (+Analyzer) tests + assert_logs_contain_with_retry( + instance, "overflown session count", retry_count=60 + ) + + instance.query(f"KILL QUERY WHERE user='{TEST_USER}' SYNC") + + for thread in thread_list: + thread.join() + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + # Wait for the PostgreSQL handler to start. + # Cluster.start waits until port 9000 becomes accessible. + # Server opens the PostgreSQL compatibility port a bit later. + instance.wait_for_log_line("PostgreSQL compatibility protocol") + yield cluster + finally: + cluster.shutdown() + + +class ThreadWithException(threading.Thread): + def run(self): + try: + super().run() + except: + pass + + +def postgres_session(id): + ch = py_psql.connect( + host=instance.ip_address, + port=POSTGRES_SERVER_PORT, + user=TEST_USER, + password=TEST_PASSWORD, + database="default", + ) + cur = ch.cursor() + cur.execute(get_query("postgres_session", id)) + cur.fetchall() + + +def mysql_session(id): + client = pymysql.connections.Connection( + host=instance.ip_address, + user=TEST_USER, + password=TEST_PASSWORD, + database="default", + port=MYSQL_SERVER_PORT, + ) + cursor = client.cursor(pymysql.cursors.DictCursor) + cursor.execute(get_query("mysql_session", id)) + cursor.fetchall() + + +def tcp_session(id): + instance.query(get_query("tcp_session", id), user=TEST_USER, password=TEST_PASSWORD) + + +def http_session(id): + instance.http_query( + get_query("http_session", id), user=TEST_USER, password=TEST_PASSWORD + ) + + +def http_named_session(id): + instance.http_query( + get_query("http_named_session", id), + user=TEST_USER, + password=TEST_PASSWORD, + params={"session_id": id}, + ) + + +def grpc_session(id): + grpc_query( + get_query("grpc_session", id), grpc_create_insecure_channel(), f"session_{id}" + ) + + +def test_profile_max_sessions_for_user_tcp(started_cluster): + threaded_run_test([tcp_session] * 3) + + +def test_profile_max_sessions_for_user_postgres(started_cluster): + threaded_run_test([postgres_session] * 3) + + +def test_profile_max_sessions_for_user_mysql(started_cluster): + threaded_run_test([mysql_session] * 3) + + +def test_profile_max_sessions_for_user_http(started_cluster): + threaded_run_test([http_session] * 3) + + +def test_profile_max_sessions_for_user_http_named_session(started_cluster): + threaded_run_test([http_named_session] * 3) + + +def test_profile_max_sessions_for_user_grpc(started_cluster): + threaded_run_test([grpc_session] * 3) + + +def test_profile_max_sessions_for_user_tcp_and_others(started_cluster): + threaded_run_test([tcp_session, grpc_session, grpc_session]) + threaded_run_test([tcp_session, http_session, http_session]) + threaded_run_test([tcp_session, mysql_session, mysql_session]) + threaded_run_test([tcp_session, postgres_session, postgres_session]) + threaded_run_test([tcp_session, http_session, postgres_session]) + threaded_run_test([tcp_session, postgres_session, http_session]) + + +def test_profile_max_sessions_for_user_setting_in_query(started_cluster): + instance.query_and_get_error("SET max_sessions_for_user = 10") + + +def test_profile_max_sessions_for_user_client_suggestions_connection(started_cluster): + command_text = f"{started_cluster.get_client_cmd()} --host {instance.ip_address} --port 9000 -u {TEST_USER} --password {TEST_PASSWORD}" + command_text_without_suggestions = command_text + " --disable_suggestion" + + # Launch client1 without suggestions to avoid a race condition: + # Client1 opens a session. + # Client1 opens a session for suggestion connection. + # Client2 fails to open a session and gets the USER_SESSION_LIMIT_EXCEEDED error. + # + # Expected order: + # Client1 opens a session. + # Client2 opens a session. + # Client2 fails to open a session for suggestions and with USER_SESSION_LIMIT_EXCEEDED (No error printed). + # Client3 fails to open a session. + # Client1 executes the query. + # Client2 loads suggestions from the server using the main connection and executes a query. + with client( + name="client1>", log=None, command=command_text_without_suggestions + ) as client1: + client1.expect(prompt) + with client(name="client2>", log=None, command=command_text) as client2: + client2.expect(prompt) + with client(name="client3>", log=None, command=command_text) as client3: + client3.expect("USER_SESSION_LIMIT_EXCEEDED") + + client1.send("SELECT 'CLIENT_1_SELECT' FORMAT CSV") + client1.expect("CLIENT_1_SELECT") + client1.expect(prompt) + client2.send("SELECT 'CLIENT_2_SELECT' FORMAT CSV") + client2.expect("CLIENT_2_SELECT") + client2.expect(prompt) diff --git a/tests/integration/test_quorum_inserts/test.py b/tests/integration/test_quorum_inserts/test.py index 4dbd530dd17..1276a6079f0 100644 --- a/tests/integration/test_quorum_inserts/test.py +++ b/tests/integration/test_quorum_inserts/test.py @@ -147,12 +147,16 @@ def test_drop_replica_and_achieve_quorum(started_cluster): @pytest.mark.parametrize(("add_new_data"), [False, True]) def test_insert_quorum_with_drop_partition(started_cluster, add_new_data): - zero.query( - "DROP TABLE IF EXISTS test_quorum_insert_with_drop_partition ON CLUSTER cluster" + # use different table names for easier disambiguation in logs between runs (you may also check uuid though, but not always convenient) + table_name = ( + "test_quorum_insert_with_drop_partition_new_data" + if add_new_data + else "test_quorum_insert_with_drop_partition" ) + zero.query(f"DROP TABLE IF EXISTS {table_name} ON CLUSTER cluster") create_query = ( - "CREATE TABLE test_quorum_insert_with_drop_partition ON CLUSTER cluster " + f"CREATE TABLE {table_name} ON CLUSTER cluster " "(a Int8, d Date) " "Engine = ReplicatedMergeTree " "PARTITION BY d ORDER BY a " @@ -161,78 +165,74 @@ def test_insert_quorum_with_drop_partition(started_cluster, add_new_data): print("Create Replicated table with three replicas") zero.query(create_query) - print("Stop fetches for test_quorum_insert_with_drop_partition at first replica.") - first.query("SYSTEM STOP FETCHES test_quorum_insert_with_drop_partition") + print(f"Stop fetches for {table_name} at first replica.") + first.query(f"SYSTEM STOP FETCHES {table_name}") print("Insert with quorum. (zero and second)") - zero.query( - "INSERT INTO test_quorum_insert_with_drop_partition(a,d) VALUES(1, '2011-01-01')" - ) + zero.query(f"INSERT INTO {table_name}(a,d) VALUES(1, '2011-01-01')") print("Drop partition.") - zero.query( - "ALTER TABLE test_quorum_insert_with_drop_partition DROP PARTITION '2011-01-01'" - ) + zero.query(f"ALTER TABLE {table_name} DROP PARTITION '2011-01-01'") if add_new_data: print("Insert to deleted partition") - zero.query( - "INSERT INTO test_quorum_insert_with_drop_partition(a,d) VALUES(2, '2011-01-01')" - ) + zero.query(f"INSERT INTO {table_name}(a,d) VALUES(2, '2011-01-01')") - print("Resume fetches for test_quorum_insert_with_drop_partition at first replica.") - first.query("SYSTEM START FETCHES test_quorum_insert_with_drop_partition") + print(f"Resume fetches for {table_name} at first replica.") + first.query(f"SYSTEM START FETCHES {table_name}") print("Sync first replica with others.") - first.query("SYSTEM SYNC REPLICA test_quorum_insert_with_drop_partition") + first.query(f"SYSTEM SYNC REPLICA {table_name}") assert "20110101" not in first.query( - """ - WITH (SELECT toString(uuid) FROM system.tables WHERE name = 'test_quorum_insert_with_drop_partition') AS uuid, + f""" + WITH (SELECT toString(uuid) FROM system.tables WHERE name = '{table_name}') AS uuid, '/clickhouse/tables/' || uuid || '/0/quorum/last_part' AS p SELECT * FROM system.zookeeper WHERE path = p FORMAT Vertical """ ) + # Sync second replica not to have `REPLICA_IS_NOT_IN_QUORUM` error + second.query(f"SYSTEM SYNC REPLICA {table_name}") + print("Select from updated partition.") if add_new_data: + assert TSV("2\t2011-01-01\n") == TSV(zero.query(f"SELECT * FROM {table_name}")) assert TSV("2\t2011-01-01\n") == TSV( - zero.query("SELECT * FROM test_quorum_insert_with_drop_partition") - ) - assert TSV("2\t2011-01-01\n") == TSV( - second.query("SELECT * FROM test_quorum_insert_with_drop_partition") + second.query(f"SELECT * FROM {table_name}") ) else: - assert TSV("") == TSV( - zero.query("SELECT * FROM test_quorum_insert_with_drop_partition") - ) - assert TSV("") == TSV( - second.query("SELECT * FROM test_quorum_insert_with_drop_partition") - ) + assert TSV("") == TSV(zero.query(f"SELECT * FROM {table_name}")) + assert TSV("") == TSV(second.query(f"SELECT * FROM {table_name}")) - zero.query( - "DROP TABLE IF EXISTS test_quorum_insert_with_drop_partition ON CLUSTER cluster" - ) + zero.query(f"DROP TABLE IF EXISTS {table_name} ON CLUSTER cluster") @pytest.mark.parametrize(("add_new_data"), [False, True]) def test_insert_quorum_with_move_partition(started_cluster, add_new_data): - zero.query( - "DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_source ON CLUSTER cluster" + # use different table names for easier disambiguation in logs between runs (you may also check uuid though, but not always convenient) + source_table_name = ( + "test_insert_quorum_with_move_partition_source_new_data" + if add_new_data + else "test_insert_quorum_with_move_partition_source" ) - zero.query( - "DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_destination ON CLUSTER cluster" + destination_table_name = ( + "test_insert_quorum_with_move_partition_destination_new_data" + if add_new_data + else "test_insert_quorum_with_move_partition_destination" ) + zero.query(f"DROP TABLE IF EXISTS {source_table_name} ON CLUSTER cluster") + zero.query(f"DROP TABLE IF EXISTS {destination_table_name} ON CLUSTER cluster") create_source = ( - "CREATE TABLE test_insert_quorum_with_move_partition_source ON CLUSTER cluster " + f"CREATE TABLE {source_table_name} ON CLUSTER cluster " "(a Int8, d Date) " "Engine = ReplicatedMergeTree " "PARTITION BY d ORDER BY a " ) create_destination = ( - "CREATE TABLE test_insert_quorum_with_move_partition_destination ON CLUSTER cluster " + f"CREATE TABLE {destination_table_name} ON CLUSTER cluster " "(a Int8, d Date) " "Engine = ReplicatedMergeTree " "PARTITION BY d ORDER BY a " @@ -244,65 +244,52 @@ def test_insert_quorum_with_move_partition(started_cluster, add_new_data): print("Create destination Replicated table with three replicas") zero.query(create_destination) - print( - "Stop fetches for test_insert_quorum_with_move_partition_source at first replica." - ) - first.query("SYSTEM STOP FETCHES test_insert_quorum_with_move_partition_source") + print(f"Stop fetches for {source_table_name} at first replica.") + first.query(f"SYSTEM STOP FETCHES {source_table_name}") print("Insert with quorum. (zero and second)") - zero.query( - "INSERT INTO test_insert_quorum_with_move_partition_source(a,d) VALUES(1, '2011-01-01')" - ) + zero.query(f"INSERT INTO {source_table_name}(a,d) VALUES(1, '2011-01-01')") print("Drop partition.") zero.query( - "ALTER TABLE test_insert_quorum_with_move_partition_source MOVE PARTITION '2011-01-01' TO TABLE test_insert_quorum_with_move_partition_destination" + f"ALTER TABLE {source_table_name} MOVE PARTITION '2011-01-01' TO TABLE {destination_table_name}" ) if add_new_data: print("Insert to deleted partition") - zero.query( - "INSERT INTO test_insert_quorum_with_move_partition_source(a,d) VALUES(2, '2011-01-01')" - ) + zero.query(f"INSERT INTO {source_table_name}(a,d) VALUES(2, '2011-01-01')") - print( - "Resume fetches for test_insert_quorum_with_move_partition_source at first replica." - ) - first.query("SYSTEM START FETCHES test_insert_quorum_with_move_partition_source") + print(f"Resume fetches for {source_table_name} at first replica.") + first.query(f"SYSTEM START FETCHES {source_table_name}") print("Sync first replica with others.") - first.query("SYSTEM SYNC REPLICA test_insert_quorum_with_move_partition_source") + first.query(f"SYSTEM SYNC REPLICA {source_table_name}") assert "20110101" not in first.query( - """ - WITH (SELECT toString(uuid) FROM system.tables WHERE name = 'test_insert_quorum_with_move_partition_source') AS uuid, + f""" + WITH (SELECT toString(uuid) FROM system.tables WHERE name = '{source_table_name}') AS uuid, '/clickhouse/tables/' || uuid || '/0/quorum/last_part' AS p SELECT * FROM system.zookeeper WHERE path = p FORMAT Vertical """ ) + # Sync second replica not to have `REPLICA_IS_NOT_IN_QUORUM` error + second.query(f"SYSTEM SYNC REPLICA {source_table_name}") + print("Select from updated partition.") if add_new_data: assert TSV("2\t2011-01-01\n") == TSV( - zero.query("SELECT * FROM test_insert_quorum_with_move_partition_source") + zero.query(f"SELECT * FROM {source_table_name}") ) assert TSV("2\t2011-01-01\n") == TSV( - second.query("SELECT * FROM test_insert_quorum_with_move_partition_source") + second.query(f"SELECT * FROM {source_table_name}") ) else: - assert TSV("") == TSV( - zero.query("SELECT * FROM test_insert_quorum_with_move_partition_source") - ) - assert TSV("") == TSV( - second.query("SELECT * FROM test_insert_quorum_with_move_partition_source") - ) + assert TSV("") == TSV(zero.query(f"SELECT * FROM {source_table_name}")) + assert TSV("") == TSV(second.query(f"SELECT * FROM {source_table_name}")) - zero.query( - "DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_source ON CLUSTER cluster" - ) - zero.query( - "DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_destination ON CLUSTER cluster" - ) + zero.query(f"DROP TABLE IF EXISTS {source_table_name} ON CLUSTER cluster") + zero.query(f"DROP TABLE IF EXISTS {destination_table_name} ON CLUSTER cluster") def test_insert_quorum_with_ttl(started_cluster): diff --git a/tests/integration/test_quorum_inserts_parallel/test.py b/tests/integration/test_quorum_inserts_parallel/test.py index 99548e37a54..7f8784d822c 100644 --- a/tests/integration/test_quorum_inserts_parallel/test.py +++ b/tests/integration/test_quorum_inserts_parallel/test.py @@ -27,7 +27,11 @@ def started_cluster(): def test_parallel_quorum_actually_parallel(started_cluster): - settings = {"insert_quorum": "3", "insert_quorum_parallel": "1"} + settings = { + "insert_quorum": "3", + "insert_quorum_parallel": "1", + "function_sleep_max_microseconds_per_block": "0", + } for i, node in enumerate([node1, node2, node3]): node.query( "CREATE TABLE r (a UInt64, b String) ENGINE=ReplicatedMergeTree('/test/r', '{num}') ORDER BY tuple()".format( diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 4f98b9a0d0d..cec14b0af73 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -105,7 +105,7 @@ def test_quota_from_users_xml(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -252,7 +252,7 @@ def test_simpliest_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[]", 0, @@ -326,7 +326,7 @@ def test_tracking_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -446,7 +446,7 @@ def test_exceed_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -523,7 +523,7 @@ def test_exceed_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -613,7 +613,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -675,7 +675,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952,63113904]", 0, @@ -824,7 +824,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -914,7 +914,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[]", 0, @@ -986,7 +986,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -1048,7 +1048,7 @@ def test_add_remove_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -1111,7 +1111,7 @@ def test_add_remove_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -1121,7 +1121,7 @@ def test_add_remove_quota(): [ "myQuota2", "4590510c-4d13-bf21-ec8a-c2187b092e73", - "users.xml", + "users_xml", "['client_key','user_name']", "[3600,2629746]", 0, @@ -1214,7 +1214,7 @@ def test_add_remove_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -1283,7 +1283,7 @@ def test_add_remove_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -1346,7 +1346,7 @@ def test_reload_users_xml_by_timer(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -1385,7 +1385,7 @@ def test_reload_users_xml_by_timer(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", ["user_name"], "[31556952]", 0, @@ -1554,7 +1554,7 @@ def test_query_inserts(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, diff --git a/tests/integration/test_read_temporary_tables_on_failure/test.py b/tests/integration/test_read_temporary_tables_on_failure/test.py index fd1d92eff92..77c8f3cf26b 100644 --- a/tests/integration/test_read_temporary_tables_on_failure/test.py +++ b/tests/integration/test_read_temporary_tables_on_failure/test.py @@ -19,7 +19,10 @@ def start_cluster(): def test_different_versions(start_cluster): with pytest.raises(QueryTimeoutExceedException): - node.query("SELECT sleepEachRow(3) FROM numbers(10)", timeout=5) + node.query( + "SELECT sleepEachRow(3) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 0", + timeout=5, + ) with pytest.raises(QueryRuntimeException): node.query("SELECT 1", settings={"max_concurrent_queries_for_user": 1}) assert node.contains_in_log("Too many simultaneous queries for user") diff --git a/tests/integration/test_rename_column/test.py b/tests/integration/test_rename_column/test.py index 8dc57cf08ff..1c87b101b11 100644 --- a/tests/integration/test_rename_column/test.py +++ b/tests/integration/test_rename_column/test.py @@ -159,7 +159,7 @@ def insert( ) elif slow: query.append( - "INSERT INTO {table_name} ({col0}, {col1}) SELECT number + sleepEachRow(0.001) AS {col0}, number + 1 AS {col1} FROM numbers_mt({chunk})".format( + "INSERT INTO {table_name} ({col0}, {col1}) SELECT number + sleepEachRow(0.001) AS {col0}, number + 1 AS {col1} FROM numbers_mt({chunk}) SETTINGS function_sleep_max_microseconds_per_block = 0".format( table_name=table_name, chunk=chunk, col0=col_names[0], @@ -198,7 +198,7 @@ def select( try: if slow: r = node.query( - "SELECT count() FROM (SELECT num2, sleepEachRow(0.5) FROM {} WHERE {} % 1000 > 0)".format( + "SELECT count() FROM (SELECT num2, sleepEachRow(0.5) FROM {} WHERE {} % 1000 > 0) SETTINGS function_sleep_max_microseconds_per_block = 0".format( table_name, col_name ) ) diff --git a/tests/integration/test_replicated_database/configs/config.xml b/tests/integration/test_replicated_database/configs/config.xml index 16cd942e975..e598cc28d5d 100644 --- a/tests/integration/test_replicated_database/configs/config.xml +++ b/tests/integration/test_replicated_database/configs/config.xml @@ -1,5 +1,6 @@ 10 + 1 10 diff --git a/tests/integration/test_replicated_database/configs/settings.xml b/tests/integration/test_replicated_database/configs/settings.xml index 5666ffeace8..7c0e60a044e 100644 --- a/tests/integration/test_replicated_database/configs/settings.xml +++ b/tests/integration/test_replicated_database/configs/settings.xml @@ -4,6 +4,8 @@ 1 1 1 + 0 + 0 diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index ed034a326da..f45841124d9 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -672,7 +672,11 @@ def test_alters_from_different_replicas(started_cluster): def create_some_tables(db): - settings = {"distributed_ddl_task_timeout": 0} + settings = { + "distributed_ddl_task_timeout": 0, + "allow_experimental_object_type": 1, + "allow_suspicious_codecs": 1, + } main_node.query(f"CREATE TABLE {db}.t1 (n int) ENGINE=Memory", settings=settings) dummy_node.query( f"CREATE TABLE {db}.t2 (s String) ENGINE=Memory", settings=settings @@ -690,11 +694,11 @@ def create_some_tables(db): settings=settings, ) dummy_node.query( - f"CREATE TABLE {db}.rmt2 (n int) ENGINE=ReplicatedMergeTree order by n", + f"CREATE TABLE {db}.rmt2 (n int CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12))) ENGINE=ReplicatedMergeTree order by n", settings=settings, ) main_node.query( - f"CREATE TABLE {db}.rmt3 (n int) ENGINE=ReplicatedMergeTree order by n", + f"CREATE TABLE {db}.rmt3 (n int, json Object('json') materialized '') ENGINE=ReplicatedMergeTree order by n", settings=settings, ) dummy_node.query( @@ -868,7 +872,10 @@ def test_recover_staled_replica(started_cluster): ]: assert main_node.query(f"SELECT (*,).1 FROM recover.{table}") == "42\n" for table in ["t2", "rmt1", "rmt2", "rmt4", "d1", "d2", "mt2", "mv1", "mv3"]: - assert dummy_node.query(f"SELECT (*,).1 FROM recover.{table}") == "42\n" + assert ( + dummy_node.query(f"SELECT '{table}', (*,).1 FROM recover.{table}") + == f"{table}\t42\n" + ) for table in ["m1", "mt1"]: assert dummy_node.query(f"SELECT count() FROM recover.{table}") == "0\n" global test_recover_staled_replica_run @@ -1219,7 +1226,7 @@ def test_force_synchronous_settings(started_cluster): def select_func(): dummy_node.query( - "SELECT sleepEachRow(1) FROM test_force_synchronous_settings.t" + "SELECT sleepEachRow(1) FROM test_force_synchronous_settings.t SETTINGS function_sleep_max_microseconds_per_block = 0" ) select_thread = threading.Thread(target=select_func) @@ -1255,7 +1262,7 @@ def test_recover_digest_mismatch(started_cluster): "mv /var/lib/clickhouse/metadata/recover_digest_mismatch/t1.sql /var/lib/clickhouse/metadata/recover_digest_mismatch/m1.sql", "sed --follow-symlinks -i 's/Int32/String/' /var/lib/clickhouse/metadata/recover_digest_mismatch/mv1.sql", "rm -f /var/lib/clickhouse/metadata/recover_digest_mismatch/d1.sql", - # f"rm -rf /var/lib/clickhouse/metadata/recover_digest_mismatch/", # Directory already exists + "rm -rf /var/lib/clickhouse/metadata/recover_digest_mismatch/", # Will trigger "Directory already exists" "rm -rf /var/lib/clickhouse/store", ] diff --git a/tests/integration/test_replicated_merge_tree_compatibility/test.py b/tests/integration/test_replicated_merge_tree_compatibility/test.py index 68f2776e955..c30a0d86c98 100644 --- a/tests/integration/test_replicated_merge_tree_compatibility/test.py +++ b/tests/integration/test_replicated_merge_tree_compatibility/test.py @@ -9,6 +9,7 @@ node1 = cluster.add_instance( tag="20.12.4.5", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -17,6 +18,7 @@ node2 = cluster.add_instance( tag="20.12.4.5", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml deleted file mode 100644 index bb4aba94e0b..00000000000 --- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml +++ /dev/null @@ -1,93 +0,0 @@ - - - - - hdfs - hdfs://hdfs1:9000/clickhouse1/ - - true - - - hdfs - hdfs://hdfs1:9000/clickhouse1/ - - true - - - hdfs - hdfs://hdfs1:9000/clickhouse2/ - - true - - - - - -
- hdfs1 -
-
-
- - -
- default -
- - hdfs1 - -
- 0.0 -
- - -
- hdfs2 -
- - hdfs1 - -
-
- - -
- hdfs1_again -
- - hdfs1 - -
-
-
-
- - - 1024000 - 1 - true - - - - - - - node1 - 9000 - - - - - node2 - 9000 - - - - - - - test_cluster - 1 - - true -
diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py deleted file mode 100644 index eb3d62eb718..00000000000 --- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py +++ /dev/null @@ -1,322 +0,0 @@ -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/42561 - -pytestmark = pytest.mark.skip - -import logging -from string import Template -import time - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry - -from pyhdfs import HdfsClient - -SHARDS = 2 -FILES_OVERHEAD_PER_TABLE = 1 # format_version.txt -FILES_OVERHEAD_PER_PART_COMPACT = 7 - - -def wait_for_hdfs_objects(cluster, fp, expected, num_tries=30): - fs = HdfsClient(hosts=cluster.hdfs_ip) - while num_tries > 0: - num_hdfs_objects = len(fs.listdir(fp)) - if num_hdfs_objects == expected: - break - num_tries -= 1 - time.sleep(1) - assert len(fs.listdir(fp)) == expected - - -@pytest.fixture(scope="module") -def cluster(): - try: - cluster = ClickHouseCluster(__file__) - cluster.add_instance( - "node1", - main_configs=["configs/config.d/storage_conf.xml"], - macros={"replica": "node1"}, - with_zookeeper=True, - with_hdfs=True, - ) - cluster.add_instance( - "node2", - main_configs=["configs/config.d/storage_conf.xml"], - macros={"replica": "node2"}, - with_zookeeper=True, - with_hdfs=True, - ) - logging.info("Starting cluster...") - cluster.start() - if cluster.instances["node1"].is_debug_build(): - # https://github.com/ClickHouse/ClickHouse/issues/27814 - pytest.skip( - "libhdfs3 calls rand function which does not pass harmful check in debug build" - ) - logging.info("Cluster started") - - fs = HdfsClient(hosts=cluster.hdfs_ip) - fs.mkdirs("/clickhouse1") - fs.mkdirs("/clickhouse2") - logging.info("Created HDFS directory") - - yield cluster - finally: - cluster.shutdown() - - -def test_hdfs_zero_copy_replication_insert(cluster): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - try: - node1.query( - """ - CREATE TABLE hdfs_test ON CLUSTER test_cluster (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hdfs_test', '{replica}') - ORDER BY (dt, id) - SETTINGS storage_policy='hdfs_only' - """ - ) - wait_for_hdfs_objects( - cluster, "/clickhouse1", SHARDS * FILES_OVERHEAD_PER_TABLE - ) - - node1.query("INSERT INTO hdfs_test VALUES (now() - INTERVAL 3 DAY, 10)") - node2.query("SYSTEM SYNC REPLICA hdfs_test", timeout=30) - assert node1.query("SELECT count() FROM hdfs_test FORMAT Values") == "(1)" - assert node2.query("SELECT count() FROM hdfs_test FORMAT Values") == "(1)" - assert ( - node1.query("SELECT id FROM hdfs_test ORDER BY dt FORMAT Values") == "(10)" - ) - assert ( - node2.query("SELECT id FROM hdfs_test ORDER BY dt FORMAT Values") == "(10)" - ) - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hdfs_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hdfs_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - wait_for_hdfs_objects( - cluster, - "/clickhouse1", - SHARDS * FILES_OVERHEAD_PER_TABLE + FILES_OVERHEAD_PER_PART_COMPACT, - ) - finally: - node1.query("DROP TABLE IF EXISTS hdfs_test SYNC") - node2.query("DROP TABLE IF EXISTS hdfs_test SYNC") - - -@pytest.mark.parametrize( - ("storage_policy", "init_objects"), - [("hybrid", 0), ("tiered", 0), ("tiered_copy", FILES_OVERHEAD_PER_TABLE)], -) -def test_hdfs_zero_copy_replication_single_move(cluster, storage_policy, init_objects): - node1 = cluster.instances["node1"] - try: - node1.query( - Template( - """ - CREATE TABLE single_node_move_test (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/single_node_move_test', '{replica}') - ORDER BY (dt, id) - SETTINGS storage_policy='$policy',temporary_directories_lifetime=1 - """ - ).substitute(policy=storage_policy) - ) - wait_for_hdfs_objects(cluster, "/clickhouse1", init_objects) - - node1.query( - "INSERT INTO single_node_move_test VALUES (now() - INTERVAL 3 DAY, 10), (now() - INTERVAL 1 DAY, 11)" - ) - assert ( - node1.query( - "SELECT id FROM single_node_move_test ORDER BY dt FORMAT Values" - ) - == "(10),(11)" - ) - - node1.query( - "ALTER TABLE single_node_move_test MOVE PARTITION ID 'all' TO VOLUME 'external'" - ) - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='single_node_move_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - assert ( - node1.query( - "SELECT id FROM single_node_move_test ORDER BY dt FORMAT Values" - ) - == "(10),(11)" - ) - wait_for_hdfs_objects( - cluster, "/clickhouse1", init_objects + FILES_OVERHEAD_PER_PART_COMPACT - ) - - node1.query( - "ALTER TABLE single_node_move_test MOVE PARTITION ID 'all' TO VOLUME 'main'" - ) - assert ( - node1.query( - "SELECT id FROM single_node_move_test ORDER BY dt FORMAT Values" - ) - == "(10),(11)" - ) - finally: - node1.query("DROP TABLE IF EXISTS single_node_move_test SYNC") - - -@pytest.mark.parametrize( - ("storage_policy", "init_objects"), - [("hybrid", 0), ("tiered", 0), ("tiered_copy", SHARDS * FILES_OVERHEAD_PER_TABLE)], -) -def test_hdfs_zero_copy_replication_move(cluster, storage_policy, init_objects): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - try: - node1.query( - Template( - """ - CREATE TABLE move_test ON CLUSTER test_cluster (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/move_test', '{replica}') - ORDER BY (dt, id) - SETTINGS storage_policy='$policy' - """ - ).substitute(policy=storage_policy) - ) - wait_for_hdfs_objects(cluster, "/clickhouse1", init_objects) - - node1.query( - "INSERT INTO move_test VALUES (now() - INTERVAL 3 DAY, 10), (now() - INTERVAL 1 DAY, 11)" - ) - node2.query("SYSTEM SYNC REPLICA move_test", timeout=30) - - assert ( - node1.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") - == "(10),(11)" - ) - assert ( - node2.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") - == "(10),(11)" - ) - - node1.query( - "ALTER TABLE move_test MOVE PARTITION ID 'all' TO VOLUME 'external'" - ) - wait_for_hdfs_objects( - cluster, "/clickhouse1", init_objects + FILES_OVERHEAD_PER_PART_COMPACT - ) - - node2.query( - "ALTER TABLE move_test MOVE PARTITION ID 'all' TO VOLUME 'external'" - ) - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='move_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='move_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - assert ( - node1.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") - == "(10),(11)" - ) - assert ( - node2.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") - == "(10),(11)" - ) - wait_for_hdfs_objects( - cluster, "/clickhouse1", init_objects + FILES_OVERHEAD_PER_PART_COMPACT - ) - finally: - node1.query("DROP TABLE IF EXISTS move_test SYNC") - node2.query("DROP TABLE IF EXISTS move_test SYNC") - - -@pytest.mark.parametrize(("storage_policy"), ["hybrid", "tiered", "tiered_copy"]) -def test_hdfs_zero_copy_with_ttl_move(cluster, storage_policy): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - try: - node1.query( - Template( - """ - CREATE TABLE ttl_move_test ON CLUSTER test_cluster (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/ttl_move_test', '{replica}') - ORDER BY (dt, id) - TTL dt + INTERVAL 2 DAY TO VOLUME 'external' - SETTINGS storage_policy='$policy' - """ - ).substitute(policy=storage_policy) - ) - - node1.query("INSERT INTO ttl_move_test VALUES (now() - INTERVAL 3 DAY, 10)") - node1.query("INSERT INTO ttl_move_test VALUES (now() - INTERVAL 1 DAY, 11)") - - node1.query("OPTIMIZE TABLE ttl_move_test FINAL") - node2.query("SYSTEM SYNC REPLICA ttl_move_test", timeout=30) - - assert_eq_with_retry(node1, "SELECT count() FROM ttl_move_test", "2") - assert_eq_with_retry(node2, "SELECT count() FROM ttl_move_test", "2") - assert ( - node1.query("SELECT id FROM ttl_move_test ORDER BY id FORMAT Values") - == "(10),(11)" - ) - assert ( - node2.query("SELECT id FROM ttl_move_test ORDER BY id FORMAT Values") - == "(10),(11)" - ) - finally: - node1.query("DROP TABLE IF EXISTS ttl_move_test SYNC") - node2.query("DROP TABLE IF EXISTS ttl_move_test SYNC") - - -def test_hdfs_zero_copy_with_ttl_delete(cluster): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - try: - node1.query( - """ - CREATE TABLE ttl_delete_test ON CLUSTER test_cluster (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/ttl_delete_test', '{replica}') - ORDER BY (dt, id) - TTL dt + INTERVAL 2 DAY - SETTINGS storage_policy='tiered' - """ - ) - - node1.query("INSERT INTO ttl_delete_test VALUES (now() - INTERVAL 3 DAY, 10)") - node1.query("INSERT INTO ttl_delete_test VALUES (now() - INTERVAL 1 DAY, 11)") - - node1.query("OPTIMIZE TABLE ttl_delete_test FINAL") - node2.query("SYSTEM SYNC REPLICA ttl_delete_test", timeout=30) - - assert_eq_with_retry(node1, "SELECT count() FROM ttl_delete_test", "1") - assert_eq_with_retry(node2, "SELECT count() FROM ttl_delete_test", "1") - - assert ( - node1.query("SELECT id FROM ttl_delete_test ORDER BY id FORMAT Values") - == "(11)" - ) - assert ( - node2.query("SELECT id FROM ttl_delete_test ORDER BY id FORMAT Values") - == "(11)" - ) - finally: - node1.query("DROP TABLE IF EXISTS ttl_delete_test SYNC") - node2.query("DROP TABLE IF EXISTS ttl_delete_test SYNC") diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/__init__.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/config/merge_tree_conf.xml b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/config/merge_tree_conf.xml new file mode 100644 index 00000000000..8ff3bdf9a2f --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/config/merge_tree_conf.xml @@ -0,0 +1,5 @@ + + + 30000 + + diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py new file mode 100644 index 00000000000..67dd03098e9 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager +from helpers.test_tools import assert_eq_with_retry +from multiprocessing.dummy import Pool +import time + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "node1", + main_configs=["config/merge_tree_conf.xml"], + with_zookeeper=True, + stay_alive=True, +) + +node2 = cluster.add_instance( + "node2", + main_configs=["config/merge_tree_conf.xml"], + with_zookeeper=True, + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def test_shutdown_and_wait(start_cluster): + for i, node in enumerate([node1, node2]): + node.query( + f"CREATE TABLE test_table (value UInt64) ENGINE=ReplicatedMergeTree('/test/table', 'r{i}') ORDER BY tuple()" + ) + + node1.query("INSERT INTO test_table VALUES (0)") + node2.query("SYSTEM SYNC REPLICA test_table") + + assert node1.query("SELECT * FROM test_table") == "0\n" + assert node2.query("SELECT * FROM test_table") == "0\n" + + def soft_shutdown(node): + node.stop_clickhouse(kill=False, stop_wait_sec=60) + + p = Pool(50) + + def insert(value): + node1.query(f"INSERT INTO test_table VALUES ({value})") + + with PartitionManager() as pm: + pm.partition_instances(node1, node2) + p.map(insert, range(1, 50)) + + # Start shutdown async + waiter = p.apply_async(soft_shutdown, (node1,)) + # to be sure that shutdown started + time.sleep(5) + + # node 2 partitioned and don't see any data + assert node2.query("SELECT * FROM test_table") == "0\n" + + # Restore network + pm.heal_all() + + # wait for shutdown to finish + waiter.get() + + node2.query("SYSTEM SYNC REPLICA test_table", timeout=5) + + # check second replica has all data + assert node2.query("SELECT sum(value) FROM test_table") == "1225\n" + # and nothing in queue + assert node2.query("SELECT count() FROM system.replication_queue") == "0\n" + + # It can happend that the second replica is superfast + assert node1.contains_in_log( + "Successfully waited all the parts" + ) or node1.contains_in_log("All parts found on replica") diff --git a/tests/integration/test_replicated_mutations/configs/users.xml b/tests/integration/test_replicated_mutations/configs/users.xml new file mode 100644 index 00000000000..b0990ca3a60 --- /dev/null +++ b/tests/integration/test_replicated_mutations/configs/users.xml @@ -0,0 +1,7 @@ + + + + 0 + + + diff --git a/tests/integration/test_replicated_mutations/test.py b/tests/integration/test_replicated_mutations/test.py index 7479f082b06..e20bcf367e3 100644 --- a/tests/integration/test_replicated_mutations/test.py +++ b/tests/integration/test_replicated_mutations/test.py @@ -15,6 +15,7 @@ node2 = cluster.add_instance( "node2", macros={"cluster": "test1"}, main_configs=["configs/merge_tree.xml"], + user_configs=["configs/users.xml"], with_zookeeper=True, ) @@ -22,12 +23,14 @@ node3 = cluster.add_instance( "node3", macros={"cluster": "test2"}, main_configs=["configs/merge_tree_max_parts.xml"], + user_configs=["configs/users.xml"], with_zookeeper=True, ) node4 = cluster.add_instance( "node4", macros={"cluster": "test2"}, main_configs=["configs/merge_tree_max_parts.xml"], + user_configs=["configs/users.xml"], with_zookeeper=True, ) @@ -35,6 +38,7 @@ node5 = cluster.add_instance( "node5", macros={"cluster": "test3"}, main_configs=["configs/merge_tree_max_parts.xml"], + user_configs=["configs/users.xml"], ) all_nodes = [node1, node2, node3, node4, node5] diff --git a/tests/integration/test_replicated_user_defined_functions/test.py b/tests/integration/test_replicated_user_defined_functions/test.py index c0990819bf4..f54be21c4c0 100644 --- a/tests/integration/test_replicated_user_defined_functions/test.py +++ b/tests/integration/test_replicated_user_defined_functions/test.py @@ -1,10 +1,12 @@ import inspect +from contextlib import nullcontext as does_not_raise import pytest import time import os.path from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException from helpers.test_tools import assert_eq_with_retry, TSV SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -83,6 +85,33 @@ def test_create_and_drop(): node1.query("DROP FUNCTION f1") +@pytest.mark.parametrize( + "ignore, expected_raise", + [("true", does_not_raise()), ("false", pytest.raises(QueryRuntimeException))], +) +def test_create_and_drop_udf_on_cluster(ignore, expected_raise): + node1.replace_config( + "/etc/clickhouse-server/users.d/users.xml", + inspect.cleandoc( + f""" + + + + {ignore} + + + + """ + ), + ) + node1.query("SYSTEM RELOAD CONFIG") + + with expected_raise: + node1.query("CREATE FUNCTION f1 ON CLUSTER default AS (x, y) -> x + y") + assert node1.query("SELECT f1(12, 3)") == "15\n" + node1.query("DROP FUNCTION f1 ON CLUSTER default") + + def test_create_and_replace(): node1.query("CREATE FUNCTION f1 AS (x, y) -> x + y") assert node1.query("SELECT f1(12, 3)") == "15\n" diff --git a/tests/integration/test_replicated_users/test.py b/tests/integration/test_replicated_users/test.py index a7dbaf6ed30..489724ed4fb 100644 --- a/tests/integration/test_replicated_users/test.py +++ b/tests/integration/test_replicated_users/test.py @@ -1,3 +1,4 @@ +import inspect import pytest import time @@ -82,6 +83,37 @@ def test_create_replicated_on_cluster(started_cluster, entity): node1.query(f"DROP {entity.keyword} {entity.name} {entity.options}") +@pytest.mark.parametrize("entity", entities, ids=get_entity_id) +def test_create_replicated_on_cluster_ignore(started_cluster, entity): + node1.replace_config( + "/etc/clickhouse-server/users.d/users.xml", + inspect.cleandoc( + f""" + + + + true + + + + """ + ), + ) + node1.query("SYSTEM RELOAD CONFIG") + + node1.query( + f"CREATE {entity.keyword} {entity.name} ON CLUSTER default {entity.options}" + ) + assert ( + f"cannot insert because {entity.keyword.lower()} `{entity.name}{entity.options}` already exists in replicated" + in node2.query_and_get_error_with_retry( + f"CREATE {entity.keyword} {entity.name} {entity.options}" + ) + ) + + node1.query(f"DROP {entity.keyword} {entity.name} {entity.options}") + + @pytest.mark.parametrize("entity", entities, ids=get_entity_id) def test_create_replicated_if_not_exists_on_cluster(started_cluster, entity): node1.query( diff --git a/tests/integration/test_replicated_zero_copy_projection_mutation/__init__.py b/tests/integration/test_replicated_zero_copy_projection_mutation/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_replicated_zero_copy_projection_mutation/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_zero_copy_projection_mutation/configs/config.d/storage_conf.xml new file mode 100644 index 00000000000..44d043b944f --- /dev/null +++ b/tests/integration/test_replicated_zero_copy_projection_mutation/configs/config.d/storage_conf.xml @@ -0,0 +1,28 @@ + + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + true + + + + + +
+ s3 +
+
+
+
+
+ + + 1 + + +
diff --git a/tests/integration/test_replicated_zero_copy_projection_mutation/configs/config.d/users.xml b/tests/integration/test_replicated_zero_copy_projection_mutation/configs/config.d/users.xml new file mode 100644 index 00000000000..246de9ecb96 --- /dev/null +++ b/tests/integration/test_replicated_zero_copy_projection_mutation/configs/config.d/users.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_replicated_zero_copy_projection_mutation/test.py b/tests/integration/test_replicated_zero_copy_projection_mutation/test.py new file mode 100644 index 00000000000..1b68aac08a7 --- /dev/null +++ b/tests/integration/test_replicated_zero_copy_projection_mutation/test.py @@ -0,0 +1,355 @@ +import logging +import time +from contextlib import contextmanager +import pathlib + +import pytest + +from helpers.mock_servers import start_s3_mock +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + + +def args_to_dict(**kwargs): + return kwargs + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + + kwargs = args_to_dict( + main_configs=[ + "configs/config.d/storage_conf.xml", + ], + user_configs=[ + "configs/config.d/users.xml", + ], + with_minio=True, + with_zookeeper=True, + stay_alive=True, + ) + + cluster.add_instance("node1", **kwargs) + cluster.add_instance("node2", **kwargs) + + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(scope="module") +def all_cluster_nodes(cluster): + yield cluster.instances.values() + + +@pytest.fixture(scope="module") +def first_cluster_node(cluster): + yield cluster.instances["node1"] + + +@pytest.fixture(scope="module") +def second_cluster_node(cluster): + yield cluster.instances["node2"] + + +@pytest.fixture(scope="module") +def init_broken_s3(cluster): + yield start_s3_mock(cluster, "broken_s3", "8081") + + +@pytest.fixture(scope="function") +def broken_s3(init_broken_s3): + init_broken_s3.reset() + yield init_broken_s3 + + +def list_objects(cluster, path="data/", hint="list_objects"): + minio = cluster.minio_client + objects = list(minio.list_objects(cluster.minio_bucket, path, recursive=True)) + names = [x.object_name for x in objects] + names.sort() + logging.info(f"{hint} ({len(objects)}): {names}") + return names + + +def wait_for_delete_s3_objects(cluster, expected, timeout=30): + while timeout > 0: + if len(list_objects(cluster, "data/")) == expected: + return + timeout -= 1 + time.sleep(1) + final_listing = list_objects(cluster, "data/") + assert len(final_listing) == expected, ",".join(final_listing) + + +def remove_all_s3_objects(cluster): + minio = cluster.minio_client + for obj in list_objects(cluster, "data/"): + minio.remove_object(cluster.minio_bucket, obj) + + +@pytest.fixture(autouse=True, scope="function") +def clear_minio(cluster): + try: + # CH do some writes to the S3 at start. For example, file data/clickhouse_access_check_{server_uuid}. + # Set the timeout there as 10 sec in order to resolve the race with that file exists. + wait_for_delete_s3_objects(cluster, 0, timeout=10) + except: + # Remove extra objects to prevent tests cascade failing + remove_all_s3_objects(cluster) + + yield + + +@contextmanager +def drop_table_guard(nodes, table): + for node in nodes: + node.query(f"DROP TABLE IF EXISTS {table} SYNC") + try: + yield + finally: + for node in nodes: + node.query(f"DROP TABLE IF EXISTS {table} SYNC") + + +def test_all_projection_files_are_dropped_when_part_is_dropped( + cluster, first_cluster_node +): + node = first_cluster_node + + with drop_table_guard([node], "test_all_projection_files_are_dropped"): + node.query( + """ + CREATE TABLE test_all_projection_files_are_dropped(a UInt32, b UInt32) + ENGINE MergeTree() + ORDER BY a + SETTINGS storage_policy='s3', old_parts_lifetime=0 + """ + ) + + objects_empty_table = list_objects(cluster) + + node.query( + "ALTER TABLE test_all_projection_files_are_dropped ADD projection b_order (SELECT a, b ORDER BY b)" + ) + node.query( + "ALTER TABLE test_all_projection_files_are_dropped MATERIALIZE projection b_order" + ) + + node.query( + """ + INSERT INTO test_all_projection_files_are_dropped + VALUES (1, 105), (5, 101), (3, 103), (4, 102), (2, 104) + """ + ) + + node.query( + "ALTER TABLE test_all_projection_files_are_dropped DROP PARTITION ID 'all'" + ) + + objects_at_the_end = list_objects(cluster) + assert objects_at_the_end == objects_empty_table + + +def test_hardlinks_preserved_when_projection_dropped( + cluster, all_cluster_nodes, first_cluster_node, second_cluster_node +): + with drop_table_guard( + all_cluster_nodes, "test_hardlinks_preserved_when_projection_dropped" + ): + create_query = """ + CREATE TABLE test_hardlinks_preserved_when_projection_dropped + ( + a UInt32, + b UInt32, + c UInt32, + PROJECTION projection_order_by_b + ( + SELECT a, b ORDER BY b + ) + ) + ENGINE ReplicatedMergeTree('/clickhouse/tables/test_projection', '{instance}') + ORDER BY a + """ + + first_node_settings = """ + SETTINGS + storage_policy='s3', + old_parts_lifetime=0 + """ + + # big old_parts_lifetime value makes second node to hold outdated part for us, we make it as broken_on_start + second_node_settings = """ + SETTINGS + storage_policy='s3', + old_parts_lifetime=10000 + """ + + first_cluster_node.query(create_query + first_node_settings) + second_cluster_node.query(create_query + second_node_settings) + + objects_empty_table = list_objects(cluster) + + first_cluster_node.query("SYSTEM FLUSH LOGS") + table_uuid = first_cluster_node.query( + """ + SELECT uuid FROM system.tables + WHERE name = 'test_hardlinks_preserved_when_projection_dropped' + """ + ).strip() + + first_cluster_node.query( + """ + INSERT INTO test_hardlinks_preserved_when_projection_dropped + VALUES (1, 105, 1), (5, 101, 1), (3, 103, 1), (4, 102, 1), (2, 104, 1) + """ + ) + + # second_cluster_node will fetch the mutated part when it is ready on first_cluster_node + second_cluster_node.query("SYSTEM STOP MERGES") + + first_cluster_node.query( + """ + ALTER TABLE test_hardlinks_preserved_when_projection_dropped + UPDATE c = 2 where c = 1 + """, + settings={"mutations_sync": "1"}, + ) + + assert_eq_with_retry( + first_cluster_node, "SELECT COUNT() FROM system.replication_queue", "0" + ) + + # the mutated part is ready on first_cluster_node, second replica just fetches it + second_cluster_node.query("SYSTEM START MERGES") + + # fist node removed outdated part + assert_eq_with_retry( + first_cluster_node, + """ + SELECT removal_state FROM system.parts + WHERE name = 'all_0_0_0' + AND table = 'test_hardlinks_preserved_when_projection_dropped' + AND not active + """, + "", + retry_count=300, + sleep_time=1, + ) + + # make sure that alter update made hardlinks inside projection + hardlinks = ( + first_cluster_node.query( + f""" + SELECT value + FROM system.zookeeper + WHERE + path like '/clickhouse/zero_copy/zero_copy_s3/{table_uuid}' AND name = 'all_0_0_0' + """, + settings={"allow_unrestricted_reads_from_keeper": "1"}, + ) + .strip() + .split() + ) + assert len(hardlinks) > 0, ",".join(hardlinks) + assert any(["proj/" in x for x in hardlinks]), ",".join(hardlinks) + + part_path_on_second_node = second_cluster_node.query( + """ + SELECT path FROM system.parts + WHERE + name = 'all_0_0_0' AND table = 'test_hardlinks_preserved_when_projection_dropped' + """ + ).strip() + + # that corrupts outdatated part all_0_0_0 + script = ( + f"INDEX_FILE={part_path_on_second_node}/primary.cidx" + """ + cp $INDEX_FILE $INDEX_FILE.backup + echo "unexpected data in metadata file" | cat > $INDEX_FILE + """ + ) + second_cluster_node.exec_in_container(["bash", "-c", script]) + + # corrupted outdatated part all_0_0_0 is detached as broken_on_start + second_cluster_node.restart_clickhouse() + + second_cluster_node.query( + "SYSTEM WAIT LOADING PARTS test_hardlinks_preserved_when_projection_dropped" + ) + + second_cluster_node.query("SYSTEM FLUSH LOGS") + + # make sure there is outdated broken-on-start part + broken_parts = ( + second_cluster_node.query( + """ + SELECT name, reason, path FROM system.detached_parts + WHERE + table = 'test_hardlinks_preserved_when_projection_dropped' + """ + ) + .strip() + .split("\n") + ) + assert len(broken_parts) == 1, broken_parts + # style checker black asked to do this. It is crazy + broken_part_name, reason, broken_part_path_on_second_node = broken_parts[ + 0 + ].split("\t") + assert "broken-on-start" == reason + + script = ( + f"INDEX_FILE={broken_part_path_on_second_node}/primary.cidx" + """ + mv $INDEX_FILE.backup $INDEX_FILE + """ + ) + second_cluster_node.exec_in_container(["bash", "-c", script]) + + # when detached part is removed, removeSharedRecursive is called + second_cluster_node.query( + f""" + ALTER TABLE test_hardlinks_preserved_when_projection_dropped + DROP DETACHED PART '{broken_part_name}' + """, + settings={"allow_drop_detached": "1"}, + ) + + # it is an easy way to read all data in part + # "0" means corrupted, https://clickhouse.com/docs/en/sql-reference/statements/check-table + assert ( + "1" + == first_cluster_node.query( + """ + CHECK TABLE test_hardlinks_preserved_when_projection_dropped + """ + ).strip() + ) + + assert ( + "1" + == second_cluster_node.query( + """ + CHECK TABLE test_hardlinks_preserved_when_projection_dropped + """ + ).strip() + ) + + second_cluster_node.query( + f""" + ALTER TABLE test_hardlinks_preserved_when_projection_dropped + DROP PART 'all_0_0_0_1' + """, + settings={"alter_sync": 2}, + ) + + wait_for_delete_s3_objects(cluster, len(objects_empty_table)) + + objects_at_the_end = list_objects(cluster) + assert objects_at_the_end == objects_empty_table diff --git a/tests/integration/test_replicating_constants/test.py b/tests/integration/test_replicating_constants/test.py index 82cc5e757f1..00781e473c7 100644 --- a/tests/integration/test_replicating_constants/test.py +++ b/tests/integration/test_replicating_constants/test.py @@ -11,6 +11,7 @@ node2 = cluster.add_instance( image="yandex/clickhouse-server", tag="19.1.14", with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_restart_server/test.py b/tests/integration/test_restart_server/test.py index 180f8240d01..43f3056533f 100755 --- a/tests/integration/test_restart_server/test.py +++ b/tests/integration/test_restart_server/test.py @@ -5,7 +5,7 @@ cluster = ClickHouseCluster(__file__) node = cluster.add_instance("node", stay_alive=True) -@pytest.fixture(scope="module") +@pytest.fixture(scope="module", autouse=True) def start_cluster(): try: cluster.start() @@ -14,9 +14,23 @@ def start_cluster(): cluster.shutdown() -def test_drop_memory_database(start_cluster): +def test_drop_memory_database(): node.query("CREATE DATABASE test ENGINE Memory") node.query("CREATE TABLE test.test_table(a String) ENGINE Memory") node.query("DROP DATABASE test") node.restart_clickhouse(kill=True) assert node.query("SHOW DATABASES LIKE 'test'").strip() == "" + + +def test_flushes_async_insert_queue(): + node.query( + """ + CREATE TABLE flush_test (a String, b UInt64) ENGINE = MergeTree ORDER BY a; + SET async_insert = 1; + SET wait_for_async_insert = 0; + SET async_insert_busy_timeout_ms = 1000000; + INSERT INTO flush_test VALUES ('world', 23456); + """ + ) + node.restart_clickhouse() + assert node.query("SELECT * FROM flush_test") == "world\t23456\n" diff --git a/tests/integration/test_restore_replica/test.py b/tests/integration/test_restore_replica/test.py index 31c503f6184..3d0baa87c21 100644 --- a/tests/integration/test_restore_replica/test.py +++ b/tests/integration/test_restore_replica/test.py @@ -44,6 +44,18 @@ def fill_table(): check_data(499500, 1000) +# kazoo.delete may throw NotEmptyError on concurrent modifications of the path +def zk_rmr_with_retries(zk, path): + for i in range(1, 10): + try: + zk.delete(path, recursive=True) + return + except Exception as ex: + print(ex) + time.sleep(0.5) + assert False + + @pytest.fixture(scope="module") def start_cluster(): try: @@ -84,7 +96,7 @@ def test_restore_replica_sequential(start_cluster): fill_table() print("Deleting root ZK path metadata") - zk.delete("/clickhouse/tables/test", recursive=True) + zk_rmr_with_retries(zk, "/clickhouse/tables/test") assert zk.exists("/clickhouse/tables/test") is None node_1.query("SYSTEM RESTART REPLICA test") @@ -119,7 +131,7 @@ def test_restore_replica_parallel(start_cluster): fill_table() print("Deleting root ZK path metadata") - zk.delete("/clickhouse/tables/test", recursive=True) + zk_rmr_with_retries(zk, "/clickhouse/tables/test") assert zk.exists("/clickhouse/tables/test") is None node_1.query("SYSTEM RESTART REPLICA test") @@ -147,12 +159,12 @@ def test_restore_replica_alive_replicas(start_cluster): fill_table() print("Deleting replica2 path, trying to restore replica1") - zk.delete("/clickhouse/tables/test/replicas/replica2", recursive=True) + zk_rmr_with_retries(zk, "/clickhouse/tables/test/replicas/replica2") assert zk.exists("/clickhouse/tables/test/replicas/replica2") is None node_1.query_and_get_error("SYSTEM RESTORE REPLICA test") print("Deleting replica1 path, trying to restore replica1") - zk.delete("/clickhouse/tables/test/replicas/replica1", recursive=True) + zk_rmr_with_retries(zk, "/clickhouse/tables/test/replicas/replica1") assert zk.exists("/clickhouse/tables/test/replicas/replica1") is None node_1.query("SYSTEM RESTART REPLICA test") diff --git a/tests/integration/test_reverse_dns_query/configs/listen_host.xml b/tests/integration/test_reverse_dns_query/configs/listen_host.xml index 58ef55cd3f3..9c27c612f63 100644 --- a/tests/integration/test_reverse_dns_query/configs/listen_host.xml +++ b/tests/integration/test_reverse_dns_query/configs/listen_host.xml @@ -1,5 +1,5 @@ - + :: 0.0.0.0 1 - +
diff --git a/tests/integration/test_reverse_dns_query/coredns_config/Corefile b/tests/integration/test_reverse_dns_query/coredns_config/Corefile index 84d297f7cdf..3edf37dafa5 100644 --- a/tests/integration/test_reverse_dns_query/coredns_config/Corefile +++ b/tests/integration/test_reverse_dns_query/coredns_config/Corefile @@ -1,4 +1,8 @@ . { + hosts /example.com { + reload "20ms" + fallthrough + } forward . 127.0.0.11 log } diff --git a/tests/integration/test_reverse_dns_query/coredns_config/example.com b/tests/integration/test_reverse_dns_query/coredns_config/example.com new file mode 100644 index 00000000000..6c6e4cbee2e --- /dev/null +++ b/tests/integration/test_reverse_dns_query/coredns_config/example.com @@ -0,0 +1 @@ +filled in runtime, but needs to exist in order to be volume mapped in docker diff --git a/tests/integration/test_reverse_dns_query/test.py b/tests/integration/test_reverse_dns_query/test.py index c5d4304a5d2..00c3956f74f 100644 --- a/tests/integration/test_reverse_dns_query/test.py +++ b/tests/integration/test_reverse_dns_query/test.py @@ -1,4 +1,5 @@ import pytest +import socket from helpers.cluster import ClickHouseCluster, get_docker_compose_path, run_and_check from time import sleep import os @@ -30,6 +31,28 @@ def started_cluster(): cluster.shutdown() +def check_ptr_record(ip, hostname): + try: + host, aliaslist, ipaddrlist = socket.gethostbyaddr(ip) + if hostname.lower() == host.lower(): + return True + except socket.herror: + pass + return False + + +def setup_dns_server(ip): + domains_string = "test.example.com" + example_file_path = f'{ch_server.env_variables["COREDNS_CONFIG_DIR"]}/example.com' + run_and_check(f"echo '{ip} {domains_string}' > {example_file_path}", shell=True) + + # DNS server takes time to reload the configuration. + for try_num in range(10): + if all(check_ptr_record(ip, host) for host in domains_string.split()): + break + sleep(1) + + def setup_ch_server(dns_server_ip): ch_server.exec_in_container( (["bash", "-c", f"echo 'nameserver {dns_server_ip}' > /etc/resolv.conf"]) @@ -42,9 +65,10 @@ def setup_ch_server(dns_server_ip): def test_reverse_dns_query(started_cluster): dns_server_ip = cluster.get_instance_ip(cluster.coredns_host) - + random_ipv6 = "4ae8:fa0f:ee1d:68c5:0b76:1b79:7ae6:1549" # https://commentpicker.com/ip-address-generator.php + setup_dns_server(random_ipv6) setup_ch_server(dns_server_ip) for _ in range(0, 200): - response = ch_server.query("select reverseDNSQuery('2001:4860:4860::8888')") - assert response == "['dns.google']\n" + response = ch_server.query(f"select reverseDNSQuery('{random_ipv6}')") + assert response == "['test.example.com']\n" diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index 9a352f43eab..0e1bbea9cea 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -287,7 +287,7 @@ def test_introspection(): assert instance.query( "SELECT name, storage from system.roles WHERE name IN ('R1', 'R2') ORDER BY name" - ) == TSV([["R1", "local directory"], ["R2", "local directory"]]) + ) == TSV([["R1", "local_directory"], ["R2", "local_directory"]]) assert instance.query( "SELECT * from system.grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, access_type, database, table, column, is_partial_revoke, grant_option" @@ -301,7 +301,7 @@ def test_introspection(): ) assert instance.query( - "SELECT * from system.role_grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, granted_role_name" + "SELECT user_name, role_name, granted_role_name, granted_role_is_default, with_admin_option from system.role_grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, granted_role_name" ) == TSV([["A", "\\N", "R1", 1, 0], ["B", "\\N", "R2", 1, 1]]) assert instance.query( diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index 1933823f5d2..d88a8f2a243 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -420,7 +420,7 @@ def test_introspection(): "mydb", "local", "5b23c389-7e18-06bf-a6bc-dd1afbbc0a97", - "users.xml", + "users_xml", "a = 1", 0, 0, @@ -433,7 +433,7 @@ def test_introspection(): "mydb", "filtered_table1", "9e8a8f62-4965-2b5e-8599-57c7b99b3549", - "users.xml", + "users_xml", "a = 1", 0, 0, @@ -446,7 +446,7 @@ def test_introspection(): "mydb", "filtered_table2", "cffae79d-b9bf-a2ef-b798-019c18470b25", - "users.xml", + "users_xml", "a + b < 1 or c - d > 5", 0, 0, @@ -459,7 +459,7 @@ def test_introspection(): "mydb", "filtered_table3", "12fc5cef-e3da-3940-ec79-d8be3911f42b", - "users.xml", + "users_xml", "c = 1", 0, 0, @@ -636,7 +636,9 @@ def test_grant_create_row_policy(): assert node.query("SHOW POLICIES") == "" node.query("CREATE USER X") - expected_error = "necessary to have grant CREATE ROW POLICY ON mydb.filtered_table1" + expected_error = ( + "necessary to have the grant CREATE ROW POLICY ON mydb.filtered_table1" + ) assert expected_error in node.query_and_get_error( "CREATE POLICY pA ON mydb.filtered_table1 FOR SELECT USING a + + + 2 + + + 496 + + diff --git a/tests/integration/test_runtime_configurable_cache_size/configs/smaller_mark_cache.xml b/tests/integration/test_runtime_configurable_cache_size/configs/smaller_mark_cache.xml new file mode 100644 index 00000000000..2613b4bbeee --- /dev/null +++ b/tests/integration/test_runtime_configurable_cache_size/configs/smaller_mark_cache.xml @@ -0,0 +1,5 @@ + + + 248 + + diff --git a/tests/integration/test_runtime_configurable_cache_size/configs/smaller_query_cache.xml b/tests/integration/test_runtime_configurable_cache_size/configs/smaller_query_cache.xml new file mode 100644 index 00000000000..6f2de0fa8f5 --- /dev/null +++ b/tests/integration/test_runtime_configurable_cache_size/configs/smaller_query_cache.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_runtime_configurable_cache_size/test.py b/tests/integration/test_runtime_configurable_cache_size/test.py new file mode 100644 index 00000000000..6119ff1ebea --- /dev/null +++ b/tests/integration/test_runtime_configurable_cache_size/test.py @@ -0,0 +1,144 @@ +import os +import pytest +import shutil +import time +from helpers.cluster import ClickHouseCluster + +# Tests that sizes of in-memory caches (mark / uncompressed / index mark / index uncompressed / mmapped file / query cache) can be changed +# at runtime (issue #51085). This file tests only the mark cache (which uses the SLRU cache policy) and the query cache (which uses the TTL +# cache policy). As such, both tests are representative for the other caches. + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/default.xml"], + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +CONFIG_DIR = os.path.join(SCRIPT_DIR, "configs") + + +# temporarily disabled due to https://github.com/ClickHouse/ClickHouse/pull/51446#issuecomment-1687066351 +# def test_mark_cache_size_is_runtime_configurable(start_cluster): +# # the initial config specifies the mark cache size as 496 bytes, just enough to hold two marks +# node.query("SYSTEM DROP MARK CACHE") +# +# node.query("CREATE TABLE test1 (val String) ENGINE=MergeTree ORDER BY val") +# node.query("INSERT INTO test1 VALUES ('abc') ('def') ('ghi')") +# node.query("SELECT * FROM test1 WHERE val = 'def'") # cache 1st mark +# +# node.query("CREATE TABLE test2 (val String) ENGINE=MergeTree ORDER BY val") +# node.query("INSERT INTO test2 VALUES ('abc') ('def') ('ghi')") +# node.query("SELECT * FROM test2 WHERE val = 'def'") # cache 2nd mark +# +# # Result checking is based on asynchronous metrics. These are calculated by default every 1.0 sec, and this is also the +# # smallest possible value. Found no statement to force-recalculate them, therefore waaaaait... +# time.sleep(2.0) +# res = node.query( +# "SELECT value FROM system.asynchronous_metrics WHERE metric LIKE 'MarkCacheFiles'" +# ) +# assert res == "2\n" +# res = node.query( +# "SELECT value FROM system.asynchronous_metrics WHERE metric LIKE 'MarkCacheBytes'" +# ) +# assert res == "496\n" +# +# # switch to a config with a mark cache size of 248 bytes +# node.copy_file_to_container( +# os.path.join(CONFIG_DIR, "smaller_mark_cache.xml"), +# "/etc/clickhouse-server/config.d/default.xml", +# ) +# +# node.query("SYSTEM RELOAD CONFIG") +# +# # check that eviction worked as expected +# time.sleep(2.0) +# res = node.query( +# "SELECT value FROM system.asynchronous_metrics WHERE metric LIKE 'MarkCacheFiles'" +# ) +# assert res == "1\n" +# res = node.query( +# "SELECT value FROM system.asynchronous_metrics WHERE metric LIKE 'MarkCacheBytes'" +# ) +# assert res == "248\n" +# +# # check that the new mark cache maximum size is respected when more marks are cached +# node.query("CREATE TABLE test3 (val String) ENGINE=MergeTree ORDER BY val") +# node.query("INSERT INTO test3 VALUES ('abc') ('def') ('ghi')") +# node.query("SELECT * FROM test3 WHERE val = 'def'") +# time.sleep(2.0) +# res = node.query( +# "SELECT value FROM system.asynchronous_metrics WHERE metric LIKE 'MarkCacheFiles'" +# ) +# assert res == "1\n" +# res = node.query( +# "SELECT value FROM system.asynchronous_metrics WHERE metric LIKE 'MarkCacheBytes'" +# ) +# assert res == "248\n" +# +# # restore the original config +# node.copy_file_to_container( +# os.path.join(CONFIG_DIR, "default.xml"), +# "/etc/clickhouse-server/config.d/default.xml", +# ) + + +def test_query_cache_size_is_runtime_configurable(start_cluster): + # the inital config specifies the maximum query cache size as 2, run 3 queries, expect 2 cache entries + node.query("SYSTEM DROP QUERY CACHE") + node.query("SELECT 1 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + node.query("SELECT 2 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + node.query("SELECT 3 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + + time.sleep(2.0) + res = node.query( + "SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'" + ) + assert res == "2\n" + + # switch to a config with a maximum query cache size of 1 + node.copy_file_to_container( + os.path.join(CONFIG_DIR, "smaller_query_cache.xml"), + "/etc/clickhouse-server/config.d/default.xml", + ) + + node.query("SYSTEM RELOAD CONFIG") + + # check that eviction worked as expected + time.sleep(2.0) + res = node.query( + "SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'" + ) + assert ( + res == "2\n" + ) # "Why not 1?", you think. Reason is that QC uses the TTLCachePolicy that evicts lazily only upon insert. + # Not a real issue, can be changed later, at least there's a test now. + + # Also, you may also wonder "why query_cache_ttl = 1"? Reason is that TTLCachePolicy only removes *stale* entries. With the default TTL + # (60 sec), no entries would be removed at all. Again: not a real issue, can be changed later and there's at least a test now. + + # check that the new query cache maximum size is respected when more queries run + node.query("SELECT 4 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + node.query("SELECT 5 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + time.sleep(2.0) + res = node.query( + "SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'" + ) + assert res == "1\n" + + # restore the original config + node.copy_file_to_container( + os.path.join(CONFIG_DIR, "default.xml"), + "/etc/clickhouse-server/config.d/default.xml", + ) diff --git a/tests/integration/test_s3_storage_conf_new_proxy/__init__.py b/tests/integration/test_s3_storage_conf_new_proxy/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/proxy_list.xml b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/proxy_list.xml new file mode 100644 index 00000000000..af5687d88ac --- /dev/null +++ b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/proxy_list.xml @@ -0,0 +1,8 @@ + + + + http://proxy1 + http://proxy2 + + + \ No newline at end of file diff --git a/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/storage_conf.xml b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/storage_conf.xml new file mode 100644 index 00000000000..94ac83b32ac --- /dev/null +++ b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/storage_conf.xml @@ -0,0 +1,21 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + + + +
+ s3 +
+
+
+
+
+
diff --git a/tests/integration/test_s3_with_proxy/configs/config.d/users.xml b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/users.xml similarity index 100% rename from tests/integration/test_s3_with_proxy/configs/config.d/users.xml rename to tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/users.xml diff --git a/tests/integration/test_s3_storage_conf_new_proxy/configs/config.xml b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.xml new file mode 100644 index 00000000000..f4be5ab6b7c --- /dev/null +++ b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.xml @@ -0,0 +1,7 @@ + + 9000 + 127.0.0.1 + 500 + ./clickhouse/ + users.xml + diff --git a/tests/integration/test_s3_with_proxy/proxy-resolver/resolver.py b/tests/integration/test_s3_storage_conf_new_proxy/proxy-resolver/resolver.py similarity index 100% rename from tests/integration/test_s3_with_proxy/proxy-resolver/resolver.py rename to tests/integration/test_s3_storage_conf_new_proxy/proxy-resolver/resolver.py diff --git a/tests/integration/test_s3_storage_conf_new_proxy/test.py b/tests/integration/test_s3_storage_conf_new_proxy/test.py new file mode 100644 index 00000000000..c98eb05a217 --- /dev/null +++ b/tests/integration/test_s3_storage_conf_new_proxy/test.py @@ -0,0 +1,66 @@ +import logging +import time + +import pytest +from helpers.cluster import ClickHouseCluster + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node", + main_configs=[ + "configs/config.d/storage_conf.xml", + "configs/config.d/proxy_list.xml", + ], + with_minio=True, + ) + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def check_proxy_logs(cluster, proxy_instance, http_methods={"POST", "PUT", "GET"}): + for i in range(10): + logs = cluster.get_container_logs(proxy_instance) + # Check with retry that all possible interactions with Minio are present + for http_method in http_methods: + if logs.find(http_method + " http://minio1") >= 0: + return + time.sleep(1) + else: + assert False, f"{http_methods} method not found in logs of {proxy_instance}" + + +@pytest.mark.parametrize("policy", ["s3"]) +def test_s3_with_proxy_list(cluster, policy): + node = cluster.instances["node"] + + node.query( + """ + CREATE TABLE s3_test ( + id Int64, + data String + ) ENGINE=MergeTree() + ORDER BY id + SETTINGS storage_policy='{}' + """.format( + policy + ) + ) + node.query("INSERT INTO s3_test VALUES (0,'data'),(1,'data')") + assert ( + node.query("SELECT * FROM s3_test order by id FORMAT Values") + == "(0,'data'),(1,'data')" + ) + + node.query("DROP TABLE IF EXISTS s3_test SYNC") + + for proxy in ["proxy1", "proxy2"]: + check_proxy_logs(cluster, proxy, ["PUT", "GET"]) diff --git a/tests/integration/test_s3_storage_conf_proxy/__init__.py b/tests/integration/test_s3_storage_conf_proxy/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_s3_with_proxy/configs/config.d/storage_conf.xml b/tests/integration/test_s3_storage_conf_proxy/configs/config.d/storage_conf.xml similarity index 100% rename from tests/integration/test_s3_with_proxy/configs/config.d/storage_conf.xml rename to tests/integration/test_s3_storage_conf_proxy/configs/config.d/storage_conf.xml diff --git a/tests/integration/test_s3_storage_conf_proxy/configs/config.d/users.xml b/tests/integration/test_s3_storage_conf_proxy/configs/config.d/users.xml new file mode 100644 index 00000000000..0011583a68c --- /dev/null +++ b/tests/integration/test_s3_storage_conf_proxy/configs/config.d/users.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/tests/integration/test_s3_storage_conf_proxy/configs/config.xml b/tests/integration/test_s3_storage_conf_proxy/configs/config.xml new file mode 100644 index 00000000000..f4be5ab6b7c --- /dev/null +++ b/tests/integration/test_s3_storage_conf_proxy/configs/config.xml @@ -0,0 +1,7 @@ + + 9000 + 127.0.0.1 + 500 + ./clickhouse/ + users.xml + diff --git a/tests/integration/test_s3_storage_conf_proxy/proxy-resolver/resolver.py b/tests/integration/test_s3_storage_conf_proxy/proxy-resolver/resolver.py new file mode 100644 index 00000000000..eaea4c1dab2 --- /dev/null +++ b/tests/integration/test_s3_storage_conf_proxy/proxy-resolver/resolver.py @@ -0,0 +1,14 @@ +import random + +import bottle + + +@bottle.route("/hostname") +def index(): + if random.randrange(2) == 0: + return "proxy1" + else: + return "proxy2" + + +bottle.run(host="0.0.0.0", port=8080) diff --git a/tests/integration/test_s3_with_proxy/test.py b/tests/integration/test_s3_storage_conf_proxy/test.py similarity index 76% rename from tests/integration/test_s3_with_proxy/test.py rename to tests/integration/test_s3_storage_conf_proxy/test.py index e5624d4e056..6cf612f8259 100644 --- a/tests/integration/test_s3_with_proxy/test.py +++ b/tests/integration/test_s3_storage_conf_proxy/test.py @@ -4,18 +4,7 @@ import time import pytest from helpers.cluster import ClickHouseCluster - - -# Runs simple proxy resolver in python env container. -def run_resolver(cluster): - container_id = cluster.get_container_id("resolver") - current_dir = os.path.dirname(__file__) - cluster.copy_file_to_container( - container_id, - os.path.join(current_dir, "proxy-resolver", "resolver.py"), - "resolver.py", - ) - cluster.exec_in_container(container_id, ["python", "resolver.py"], detach=True) +import helpers.s3_url_proxy_tests_util as proxy_util @pytest.fixture(scope="module") @@ -29,7 +18,7 @@ def cluster(): cluster.start() logging.info("Cluster started") - run_resolver(cluster) + proxy_util.run_resolver(cluster, os.path.dirname(__file__)) logging.info("Proxy resolver started") yield cluster @@ -46,7 +35,7 @@ def check_proxy_logs(cluster, proxy_instance, http_methods={"POST", "PUT", "GET" return time.sleep(1) else: - assert False, "http method not found in logs" + assert False, f"{http_methods} method not found in logs of {proxy_instance}" @pytest.mark.parametrize("policy", ["s3", "s3_with_resolver"]) @@ -65,7 +54,6 @@ def test_s3_with_proxy_list(cluster, policy): policy ) ) - node.query("INSERT INTO s3_test VALUES (0,'data'),(1,'data')") assert ( node.query("SELECT * FROM s3_test order by id FORMAT Values") diff --git a/tests/integration/test_s3_table_function_with_http_proxy/__init__.py b/tests/integration/test_s3_table_function_with_http_proxy/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_list.xml b/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_list.xml new file mode 100644 index 00000000000..af5687d88ac --- /dev/null +++ b/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_list.xml @@ -0,0 +1,8 @@ + + + + http://proxy1 + http://proxy2 + + + \ No newline at end of file diff --git a/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_remote.xml b/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_remote.xml new file mode 100644 index 00000000000..30d99605458 --- /dev/null +++ b/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_remote.xml @@ -0,0 +1,15 @@ + + + + + http://resolver:8080/hostname + http + 80 + 10 + + + diff --git a/tests/integration/test_s3_table_function_with_http_proxy/proxy-resolver/resolver.py b/tests/integration/test_s3_table_function_with_http_proxy/proxy-resolver/resolver.py new file mode 100644 index 00000000000..8c7611303b8 --- /dev/null +++ b/tests/integration/test_s3_table_function_with_http_proxy/proxy-resolver/resolver.py @@ -0,0 +1,11 @@ +import random + +import bottle + + +@bottle.route("/hostname") +def index(): + return "proxy1" + + +bottle.run(host="0.0.0.0", port=8080) diff --git a/tests/integration/test_s3_table_function_with_http_proxy/test.py b/tests/integration/test_s3_table_function_with_http_proxy/test.py new file mode 100644 index 00000000000..8751b8f3e99 --- /dev/null +++ b/tests/integration/test_s3_table_function_with_http_proxy/test.py @@ -0,0 +1,59 @@ +import logging +import helpers.s3_url_proxy_tests_util as proxy_util +import os + +import pytest +from helpers.cluster import ClickHouseCluster + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + + cluster.add_instance( + "remote_proxy_node", + main_configs=[ + "configs/config.d/proxy_remote.xml", + ], + with_minio=True, + ) + + cluster.add_instance( + "proxy_list_node", + main_configs=[ + "configs/config.d/proxy_list.xml", + ], + with_minio=True, + ) + + cluster.add_instance( + "env_node", + with_minio=True, + env_variables={ + "http_proxy": "http://proxy1", + }, + ) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + proxy_util.run_resolver(cluster, os.path.dirname(__file__)) + logging.info("Proxy resolver started") + + yield cluster + finally: + cluster.shutdown() + + +def test_s3_with_http_proxy_list(cluster): + proxy_util.simple_test(cluster, ["proxy1", "proxy2"], "http", "proxy_list_node") + + +def test_s3_with_http_remote_proxy(cluster): + proxy_util.simple_test(cluster, ["proxy1"], "http", "remote_proxy_node") + + +def test_s3_with_http_env_proxy(cluster): + proxy_util.simple_test(cluster, ["proxy1"], "http", "env_node") diff --git a/tests/integration/test_s3_table_function_with_https_proxy/__init__.py b/tests/integration/test_s3_table_function_with_https_proxy/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_list.xml b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_list.xml new file mode 100644 index 00000000000..9f2724d78b4 --- /dev/null +++ b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_list.xml @@ -0,0 +1,12 @@ + + + + http://proxy1 + http://proxy2 + + + https://proxy1 + https://proxy2 + + + \ No newline at end of file diff --git a/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_remote.xml b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_remote.xml new file mode 100644 index 00000000000..c0f5975224d --- /dev/null +++ b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_remote.xml @@ -0,0 +1,22 @@ + + + + + http://resolver:8080/hostname + http + 80 + 10 + + + + http://resolver:8080/hostname + https + 443 + 10 + + + diff --git a/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/ssl.xml b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/ssl.xml new file mode 100644 index 00000000000..d4cea955c68 --- /dev/null +++ b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/ssl.xml @@ -0,0 +1,11 @@ + + + + true + none + + AcceptCertificateHandler + + + + diff --git a/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/users.xml b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/users.xml new file mode 100644 index 00000000000..0011583a68c --- /dev/null +++ b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/users.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/tests/integration/test_s3_with_proxy/configs/config.xml b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.xml similarity index 100% rename from tests/integration/test_s3_with_proxy/configs/config.xml rename to tests/integration/test_s3_table_function_with_https_proxy/configs/config.xml diff --git a/tests/integration/test_s3_table_function_with_https_proxy/minio_certs/CAs/public.crt b/tests/integration/test_s3_table_function_with_https_proxy/minio_certs/CAs/public.crt new file mode 100644 index 00000000000..7f87261aea8 --- /dev/null +++ b/tests/integration/test_s3_table_function_with_https_proxy/minio_certs/CAs/public.crt @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC+TCCAeGgAwIBAgIQfF4j70ZdR/W3XlFJq5iZgDANBgkqhkiG9w0BAQsFADAS +MRAwDgYDVQQKEwdBY21lIENvMB4XDTIwMDcwOTE1MTQ1M1oXDTIxMDcwOTE1MTQ1 +M1owEjEQMA4GA1UEChMHQWNtZSBDbzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCC +AQoCggEBAM4i2tOlbbDxcvckVK/Zms95n2ipr7dZ0qToSf8qmF5d2EH6mqC0Vv2d +MJ+8JhQEKBh8AvUjmSqjd8tWmLJcqA84Gc8s8stB565wwkaMBvMExKlO+PQtynRd +xZjQVnj16hB0ZP4JHeVUOqMQa7uPQZQp6kqdkJ3u84EhRmU8fCCtUPOJIYHcfx7P +ScYfmJCpmqxrfWP18XcyYlhoCTm/nV+XT+XfUGwc6Sok5pCX5C70PiQ5MrEvYDIC +Q3iRNi2Lj4pTG8GUSwAcKLB08o7mxHvR1MGDGohtGnSAhdniK9aljNmBQfNIErFI +3529YDMW/qwRKSEkJpMy7r8RkfYamUsCAwEAAaNLMEkwDgYDVR0PAQH/BAQDAgKk +MBMGA1UdJQQMMAoGCCsGAQUFBwMBMA8GA1UdEwEB/wQFMAMBAf8wEQYDVR0RBAow +CIIGbWluaW8xMA0GCSqGSIb3DQEBCwUAA4IBAQDAlDKuJfQHzsBtFioNnf9VS+LA +m/aUG7ngl0Orynef45Kl21l1ToM0feKk1UnEN8crwE0wjZttby84sYaq0us7mnbl +CnFsvphNqk5jxMbSs/8Vx4TVEimyO7r5JeG4J9lEnAu2hKK5ZlwPzj7G8bL4fOvk +OGiiP5r0K3wTVU/Y96MmDUaJwBNiyp7WtsBRzkteSPQJDC98gUCYeYsIFokUs3gz +ILOAbGQBLKUn9kmYc+/LLNha0nsC0eQGmLaJgIYfele63c6KkklQ3ePjRZ71JfmN +TulovRrwUf0J4hYcIgC1URZbClsnQDOBFCY6Lm8eI+IGNWWU4I9WGoJ1Lkvk +-----END CERTIFICATE----- diff --git a/tests/integration/test_s3_table_function_with_https_proxy/minio_certs/private.key b/tests/integration/test_s3_table_function_with_https_proxy/minio_certs/private.key new file mode 100644 index 00000000000..28a0f4bfde7 --- /dev/null +++ b/tests/integration/test_s3_table_function_with_https_proxy/minio_certs/private.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC9ORgaBCx42ejp +9PSjc0uvwH/hTB6yZvZB4S+wxbzzfeKomX/JBcFHmGCIJJVjVV0rafv3vw+9f9u4 +wrZpN4HZKnVyz3mBXEA1WDvLTLV8n8zVyso1qbnfF9Fa8wnk89b0xGWyM7jie7/c +TIGMrgm7hIPaM2zDzFwIfIAqZ1AexC4vADIffF9rcFLLjNHuv1uAc32jdfQEPluv +mBMzGkz254+MabxZWIZjkYn70kNSZDoyFmMGafBtkRTUPNq2+fGv/eLJ9Lxm3153 +Ja0sCyzLlEo9+/z4ERqM5zwWre4vcwfO63c5pcSCzGw84teTpmDwSyiSR70TYJdt +BGQqZvLZAgMBAAECggEANe8oJ4I5CtlRwh3H/S7Hy/iaeqUvuroORwjghwpVqTGg +gV3/RlUVmkqceTG0QvP58n3rC9qxqdnfzvHw/FyN7lBj2a25fF3HD21u3aunrzX9 +NJLwwAr4p9YqHjpX/6JhCrNQKVMEx8luDmTgKDETJRfIXVF7FvQQ53pVLcD03U+g +MgN61HBzfT5L0TLHoiKNQbVi+Wm1gw3zvb/a9Z1rULRZfIuKGM0bNNqRZt4rUUAV +QicklDR0Qv59jhr5Y/zjinKkqF8qudvUkaNT2JH1DLfXiAhuC0OQugMjYzNntQB4 +hMhkqARnjuk/WPMvnXivnqx9o69BL5wyXIj3vD4fgQKBgQDVKaXAZJ5bo3VfcpLm +cyjtUuOzAxLU1bVGI0Hm1ARqeGVxSTypZLSX8xFi2n5Bvbgh/Y60aEac/1uKoXA9 +gej1MT4hKpXyagrARx97E8zk5nf88kVxkiKUrifMjP2lDzHIYhdKk9R3SiV6gWvA +FoJtjBwFhJ6uWUPyry4nqFSENQKBgQDjP9k6CTZF0EnDqbADiQr7VKpebqhtLWRD +U0bQh/l57VrWqGksVOlivIJChP49q1H+hQ1YgfKIEDag8JJnf/inUSpVsw1ljAjv +knqNzn0Gdd9lTsiNGgqlCjhmWedkh4eO8uau479TwQc6gB4PQdLAFynQtt8Kk45P +GxdpRx4AlQKBgQCgxUGbYwhBC37aF1sObqrenBbajCXm2qxXEv6Ab0ZJWzb/g4I6 +LJc8x3pEeZCiWsoG8Otxy/f+L2bGn049Rb8DNzmp4Cmp5SrorHvk4yE1P1IeOEgC +CXsFcnjYATrJBDXC8aCpgefMdOLhi71N6mxC3VrBGq5nxzHFVzTTelUMRQKBgQDa +yekhiCb5liy+tcuhy7qH+Z7BpjaATrh+XVoLgS5+5jeT/basmN/OUQH0e0iwJRaf +Poh30zynJT0DPDsobLwAkxN4SRg30Vf1GAjoKIqUwr2fMvfBafYfqbRdTmeKkTXB +OjlA3kKhp3GHMDxAojX+/Q4kRTx+WUwk+0dR88d99QKBgEiYrkSLjKXUFllDmVyp +HtlYKZiq5c33DA06SA2uVOprCdTbnbvP4WrgUsLGvqBcaPEd06fGGbvJWwUdnkXM +HNAkqSeUe5ueovidtoPdF+aPyxdGg3Z8551xOoHZFYrvgdZ4YMPcJrwQQsvWCcYP +GDnSoD8Xjd2LmekTpDBt5ZVz +-----END PRIVATE KEY----- diff --git a/tests/integration/test_s3_table_function_with_https_proxy/minio_certs/public.crt b/tests/integration/test_s3_table_function_with_https_proxy/minio_certs/public.crt new file mode 100644 index 00000000000..0d0992c8f5b --- /dev/null +++ b/tests/integration/test_s3_table_function_with_https_proxy/minio_certs/public.crt @@ -0,0 +1,19 @@ +-----BEGIN CERTIFICATE----- +MIIDBTCCAe2gAwIBAgIRANb2pr4HgR8YFwKNJMUSWiIwDQYJKoZIhvcNAQELBQAw +EjEQMA4GA1UEChMHQWNtZSBDbzAeFw0yMDA3MDkxODE1MDBaFw0yMTA3MDkxODE1 +MDBaMBIxEDAOBgNVBAoTB0FjbWUgQ28wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAw +ggEKAoIBAQC9ORgaBCx42ejp9PSjc0uvwH/hTB6yZvZB4S+wxbzzfeKomX/JBcFH +mGCIJJVjVV0rafv3vw+9f9u4wrZpN4HZKnVyz3mBXEA1WDvLTLV8n8zVyso1qbnf +F9Fa8wnk89b0xGWyM7jie7/cTIGMrgm7hIPaM2zDzFwIfIAqZ1AexC4vADIffF9r +cFLLjNHuv1uAc32jdfQEPluvmBMzGkz254+MabxZWIZjkYn70kNSZDoyFmMGafBt +kRTUPNq2+fGv/eLJ9Lxm3153Ja0sCyzLlEo9+/z4ERqM5zwWre4vcwfO63c5pcSC +zGw84teTpmDwSyiSR70TYJdtBGQqZvLZAgMBAAGjVjBUMA4GA1UdDwEB/wQEAwIC +pDATBgNVHSUEDDAKBggrBgEFBQcDATAPBgNVHRMBAf8EBTADAQH/MBwGA1UdEQQV +MBOCBm1pbmlvMYIJbG9jYWxob3N0MA0GCSqGSIb3DQEBCwUAA4IBAQAKU2LhvFFz +RFfUibt/WTj3rtUfKEBrQuUOYt2A8MTbC8pyEu+UJASTzunluUFze5zchEm1s3pZ +YRLcNwbJqLE6CzUxQ9b2iUhaeWuKrx4ZoPkY0uGiaXM/iKfVKTuNmhF2Sf/P4xUE +Pt19yQjpIhcicWQc37BBQFvnvy+n5wgHa/pgl1+QUvAa/fwYhF9S28xRLESzZepm +NMYysopV+YMaxcFa9SH44toXtXnvRWwVdEorlq1W3/AiJg8hDPzSa9UXLMjA968J +ONtn3qvwac9Ot53+QsXJdsMmDZLWGCi6I1w0ZQetpr/0ubaA1F3GdK9eB/S0thqU +l2VUgn3c/kKS +-----END CERTIFICATE----- diff --git a/tests/integration/test_s3_table_function_with_https_proxy/proxy-resolver/resolver.py b/tests/integration/test_s3_table_function_with_https_proxy/proxy-resolver/resolver.py new file mode 100644 index 00000000000..8c7611303b8 --- /dev/null +++ b/tests/integration/test_s3_table_function_with_https_proxy/proxy-resolver/resolver.py @@ -0,0 +1,11 @@ +import random + +import bottle + + +@bottle.route("/hostname") +def index(): + return "proxy1" + + +bottle.run(host="0.0.0.0", port=8080) diff --git a/tests/integration/test_s3_table_function_with_https_proxy/test.py b/tests/integration/test_s3_table_function_with_https_proxy/test.py new file mode 100644 index 00000000000..a498410a4d4 --- /dev/null +++ b/tests/integration/test_s3_table_function_with_https_proxy/test.py @@ -0,0 +1,67 @@ +import logging +import helpers.s3_url_proxy_tests_util as proxy_util +import os + +import pytest +from helpers.cluster import ClickHouseCluster + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + + # minio_certs_dir is set only once and used by all instances + + cluster.add_instance( + "remote_proxy_node", + main_configs=[ + "configs/config.d/proxy_remote.xml", + "configs/config.d/ssl.xml", + ], + with_minio=True, + minio_certs_dir="minio_certs", + ) + + cluster.add_instance( + "proxy_list_node", + main_configs=[ + "configs/config.d/proxy_list.xml", + "configs/config.d/ssl.xml", + ], + with_minio=True, + ) + + cluster.add_instance( + "env_node", + main_configs=[ + "configs/config.d/ssl.xml", + ], + with_minio=True, + env_variables={ + "https_proxy": "https://proxy1", + }, + ) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + proxy_util.run_resolver(cluster, os.path.dirname(__file__)) + logging.info("Proxy resolver started") + + yield cluster + finally: + cluster.shutdown() + + +def test_s3_with_https_proxy_list(cluster): + proxy_util.simple_test(cluster, ["proxy1", "proxy2"], "https", "proxy_list_node") + + +def test_s3_with_https_remote_proxy(cluster): + proxy_util.simple_test(cluster, ["proxy1"], "https", "remote_proxy_node") + + +def test_s3_with_https_env_proxy(cluster): + proxy_util.simple_test(cluster, ["proxy1"], "https", "env_node") diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index 2a4e0eece08..0ca81a27802 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -153,96 +153,6 @@ def test_s3_zero_copy_replication(started_cluster, policy): node2.query("DROP TABLE IF EXISTS s3_test SYNC") -@pytest.mark.skip(reason="Test is flaky (and never was stable)") -def test_s3_zero_copy_on_hybrid_storage(started_cluster): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - - node1.query( - """ - CREATE TABLE hybrid_test ON CLUSTER test_cluster (id UInt32, value String) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/hybrid_test', '{}') - ORDER BY id - SETTINGS storage_policy='hybrid',temporary_directories_lifetime=1 - """.format( - "{replica}" - ) - ) - - node1.query("INSERT INTO hybrid_test VALUES (0,'data'),(1,'data')") - node2.query("SYSTEM SYNC REPLICA hybrid_test", timeout=30) - - assert ( - node1.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - assert ( - node2.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','default')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','default')" - ) - - node1.query("ALTER TABLE hybrid_test MOVE PARTITION ID 'all' TO DISK 's31'") - - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','s31')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','default')" - ) - - # Total objects in S3 - s3_objects = get_large_objects_count(cluster, size=0) - - node2.query("ALTER TABLE hybrid_test MOVE PARTITION ID 'all' TO DISK 's31'") - - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','s31')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','s31')" - ) - - # Check that after moving partition on node2 no new obects on s3 - wait_for_large_objects_count(cluster, s3_objects, size=0) - - assert ( - node1.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - assert ( - node2.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - - node1.query("DROP TABLE IF EXISTS hybrid_test SYNC") - node2.query("DROP TABLE IF EXISTS hybrid_test SYNC") - - def insert_data_time(node, table, number_of_mb, time, start=0): values = ",".join( f"({x},{time})" diff --git a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py deleted file mode 100644 index 5fbe426074f..00000000000 --- a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python3 -import time - -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/45887 - -pytestmark = pytest.mark.skip - -from helpers.cluster import ClickHouseCluster - - -single_node_cluster = ClickHouseCluster(__file__) -small_node = single_node_cluster.add_instance( - "small_node", main_configs=["configs/s3.xml"], with_minio=True -) - - -@pytest.fixture(scope="module") -def started_single_node_cluster(): - try: - single_node_cluster.start() - - yield single_node_cluster - finally: - single_node_cluster.shutdown() - - -def test_move_and_s3_memory_usage(started_single_node_cluster): - if small_node.is_built_with_sanitizer() or small_node.is_debug_build(): - pytest.skip("Disabled for debug and sanitizers. Too slow.") - - small_node.query( - "CREATE TABLE s3_test_with_ttl (x UInt32, a String codec(NONE), b String codec(NONE), c String codec(NONE), d String codec(NONE), e String codec(NONE)) engine = MergeTree order by x partition by x SETTINGS storage_policy='s3_and_default'" - ) - - for _ in range(10): - small_node.query( - "insert into s3_test_with_ttl select 0, repeat('a', 100), repeat('b', 100), repeat('c', 100), repeat('d', 100), repeat('e', 100) from zeros(400000) settings max_block_size = 8192, max_insert_block_size=10000000, min_insert_block_size_rows=10000000" - ) - - # After this, we should have 5 columns per 10 * 100 * 400000 ~ 400 MB; total ~2G data in partition - small_node.query( - "optimize table s3_test_with_ttl final", - settings={ - "send_logs_level": "error", - "allow_prefetched_read_pool_for_remote_filesystem": 0, - }, - ) - - small_node.query("system flush logs") - # Will take memory usage from metric_log. - # It is easier then specifying total memory limit (insert queries can hit this limit). - small_node.query("truncate table system.metric_log") - - small_node.query( - "alter table s3_test_with_ttl move partition 0 to volume 'external'", - settings={ - "send_logs_level": "error", - "allow_prefetched_read_pool_for_remote_filesystem": 0, - }, - ) - small_node.query("system flush logs") - max_usage = small_node.query( - """ - select max(m.val - am.val * 4096) from - (select toStartOfMinute(event_time) as time, max(CurrentMetric_MemoryTracking) as val from system.metric_log group by time) as m join - (select toStartOfMinute(event_time) as time, min(value) as val from system.asynchronous_metric_log where metric='jemalloc.arenas.all.pdirty' group by time) as am using time;""" - ) - # 3G limit is a big one. However, we can hit it anyway with parallel s3 writes enabled. - # Also actual value can be bigger because of memory drift. - # Increase it a little bit if test fails. - assert int(max_usage) < 3e9 - res = small_node.query( - "select * from system.errors where last_error_message like '%Memory limit%' limit 1", - settings={ - "allow_prefetched_read_pool_for_remote_filesystem": 0, - }, - ) - assert res == "" diff --git a/tests/integration/test_scheduler/__init__.py b/tests/integration/test_scheduler/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_scheduler/configs/scheduler.xml b/tests/integration/test_scheduler/configs/scheduler.xml new file mode 100644 index 00000000000..523ba1a5a98 --- /dev/null +++ b/tests/integration/test_scheduler/configs/scheduler.xml @@ -0,0 +1,62 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + 33554432 + 10 + 10 + network_read + network_write + + + + + +
+ s3 +
+
+
+
+
+ + + inflight_limit1000000 + priority + fifo0 + fair1 + fifo9 + fifo1 + + + inflight_limit1000000 + priority + fifo0 + fair1 + fifo9 + fifo1 + + + + + /prio/admin + /prio/admin + + + /prio/fair/prod + /prio/fair/prod + + + /prio/fair/dev + /prio/fair/dev + + + /prio/fair/dev + /prio/fair/dev + + +
diff --git a/tests/integration/test_scheduler/test.py b/tests/integration/test_scheduler/test.py new file mode 100644 index 00000000000..e6def99c076 --- /dev/null +++ b/tests/integration/test_scheduler/test.py @@ -0,0 +1,112 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name +# pylint: disable=line-too-long + +import time +import threading +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + stay_alive=True, + main_configs=["configs/scheduler.xml"], + with_minio=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield + finally: + cluster.shutdown() + + +def test_s3_disk(): + node.query( + f""" + drop table if exists data; + create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3'; + """ + ) + + def write_query(workload): + try: + node.query( + f"insert into data select * from numbers(1e5) settings workload='{workload}'" + ) + except QueryRuntimeException: + pass + + thread1 = threading.Thread(target=write_query, args=["development"]) + thread2 = threading.Thread(target=write_query, args=["production"]) + thread3 = threading.Thread(target=write_query, args=["admin"]) + + thread1.start() + thread2.start() + thread3.start() + + thread3.join() + thread2.join() + thread1.join() + + assert ( + node.query( + f"select dequeued_requests>0 from system.scheduler where resource='network_write' and path='/prio/admin'" + ) + == "1\n" + ) + assert ( + node.query( + f"select dequeued_requests>0 from system.scheduler where resource='network_write' and path='/prio/fair/dev'" + ) + == "1\n" + ) + assert ( + node.query( + f"select dequeued_requests>0 from system.scheduler where resource='network_write' and path='/prio/fair/prod'" + ) + == "1\n" + ) + + def read_query(workload): + try: + node.query(f"select sum(key*key) from data settings workload='{workload}'") + except QueryRuntimeException: + pass + + thread1 = threading.Thread(target=read_query, args=["development"]) + thread2 = threading.Thread(target=read_query, args=["production"]) + thread3 = threading.Thread(target=read_query, args=["admin"]) + + thread1.start() + thread2.start() + thread3.start() + + thread3.join() + thread2.join() + thread1.join() + + assert ( + node.query( + f"select dequeued_requests>0 from system.scheduler where resource='network_read' and path='/prio/admin'" + ) + == "1\n" + ) + assert ( + node.query( + f"select dequeued_requests>0 from system.scheduler where resource='network_read' and path='/prio/fair/dev'" + ) + == "1\n" + ) + assert ( + node.query( + f"select dequeued_requests>0 from system.scheduler where resource='network_read' and path='/prio/fair/prod'" + ) + == "1\n" + ) diff --git a/tests/integration/test_select_access_rights/test_from_system_tables.py b/tests/integration/test_select_access_rights/test_from_system_tables.py index ac938a9694a..3cd6b90109c 100644 --- a/tests/integration/test_select_access_rights/test_from_system_tables.py +++ b/tests/integration/test_select_access_rights/test_from_system_tables.py @@ -44,14 +44,14 @@ def test_system_db(): assert node.query("SELECT count()>0 FROM system.settings", user="another") == "1\n" expected_error = ( - "necessary to have grant SELECT for at least one column on system.users" + "necessary to have the grant SELECT for at least one column on system.users" ) assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.users", user="another" ) expected_error = ( - "necessary to have grant SELECT for at least one column on system.clusters" + "necessary to have the grant SELECT for at least one column on system.clusters" ) assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.clusters", user="another" @@ -72,14 +72,14 @@ def test_system_db(): assert node.query("SELECT count()>0 FROM system.settings", user="sqluser") == "1\n" expected_error = ( - "necessary to have grant SELECT for at least one column on system.users" + "necessary to have the grant SELECT for at least one column on system.users" ) assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.users", user="sqluser" ) expected_error = ( - "necessary to have grant SELECT for at least one column on system.clusters" + "necessary to have the grant SELECT for at least one column on system.clusters" ) assert node.query_and_get_error( "SELECT count()>0 FROM system.clusters", user="sqluser" @@ -138,7 +138,7 @@ def test_information_schema(): ) expected_error = ( - "necessary to have grant SELECT(table_name) ON information_schema.tables" + "necessary to have the grant SELECT(table_name) ON information_schema.tables" ) assert expected_error in node.query_and_get_error( "SELECT count() FROM information_schema.tables WHERE table_name='table1'", diff --git a/tests/integration/test_select_access_rights/test_main.py b/tests/integration/test_select_access_rights/test_main.py index 76940cdadb4..eedecc2d30c 100644 --- a/tests/integration/test_select_access_rights/test_main.py +++ b/tests/integration/test_select_access_rights/test_main.py @@ -30,7 +30,7 @@ def test_select_single_column(): select_query = "SELECT a FROM table1" assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -39,7 +39,7 @@ def test_select_single_column(): instance.query("REVOKE SELECT(a) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -51,7 +51,7 @@ def test_select_single_column_with_table_grant(): select_query = "SELECT a FROM table1" assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -60,7 +60,7 @@ def test_select_single_column_with_table_grant(): instance.query("REVOKE SELECT(a) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -72,19 +72,19 @@ def test_select_all_columns(): select_query = "SELECT * FROM table1" assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) instance.query("GRANT SELECT(d) ON default.table1 TO A") assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) instance.query("GRANT SELECT(a) ON default.table1 TO A") assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -99,7 +99,7 @@ def test_select_all_columns_with_table_grant(): select_query = "SELECT * FROM table1" assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -114,7 +114,7 @@ def test_alias(): select_query = "SELECT x, y, x + y AS s FROM table1" assert ( - "it's necessary to have grant SELECT(x, y) ON default.table1" + "it's necessary to have the grant SELECT(x, y) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -129,7 +129,7 @@ def test_alias_columns(): select_query = "SELECT * FROM table1" assert ( - "it's necessary to have grant SELECT(x, y) ON default.table1" + "it's necessary to have the grant SELECT(x, y) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -138,7 +138,7 @@ def test_alias_columns(): select_query = "SELECT s FROM table1" assert ( - "it's necessary to have grant SELECT(s) ON default.table1" + "it's necessary to have the grant SELECT(s) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -156,7 +156,7 @@ def test_materialized_columns(): select_query = "SELECT * FROM table1" assert ( - "it's necessary to have grant SELECT(x, y) ON default.table1" + "it's necessary to have the grant SELECT(x, y) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -165,7 +165,7 @@ def test_materialized_columns(): select_query = "SELECT p FROM table1" assert ( - "it's necessary to have grant SELECT(p) ON default.table1" + "it's necessary to have the grant SELECT(p) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -186,13 +186,13 @@ def test_select_join(): select_query = "SELECT * FROM table1 JOIN table2 USING(d)" assert ( - "it's necessary to have grant SELECT(d, x, y) ON default.table2" + "it's necessary to have the grant SELECT(d, x, y) ON default.table2" in instance.query_and_get_error(select_query, user="A") ) instance.query("GRANT SELECT(d, x, y) ON default.table2 TO A") assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -201,7 +201,7 @@ def test_select_join(): instance.query("REVOKE SELECT ON default.table2 FROM A") assert ( - "it's necessary to have grant SELECT(d, x, y) ON default.table2" + "it's necessary to have the grant SELECT(d, x, y) ON default.table2" in instance.query_and_get_error(select_query, user="A") ) @@ -216,13 +216,13 @@ def test_select_union(): select_query = "SELECT * FROM table1 UNION ALL SELECT * FROM table2" assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) instance.query("GRANT SELECT(a, b) ON default.table1 TO A") assert ( - "it's necessary to have grant SELECT(a, b) ON default.table2" + "it's necessary to have the grant SELECT(a, b) ON default.table2" in instance.query_and_get_error(select_query, user="A") ) @@ -231,7 +231,7 @@ def test_select_union(): instance.query("REVOKE SELECT ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -243,7 +243,7 @@ def test_select_count(): select_query = "SELECT count() FROM table1" assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -252,7 +252,7 @@ def test_select_count(): instance.query("REVOKE SELECT(x) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -261,7 +261,7 @@ def test_select_count(): instance.query("REVOKE SELECT(y) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -279,7 +279,7 @@ def test_select_where(): select_query = "SELECT a FROM table1 WHERE b = 0" assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -288,7 +288,7 @@ def test_select_where(): instance.query("REVOKE SELECT ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -306,7 +306,7 @@ def test_select_prewhere(): select_query = "SELECT a FROM table1 PREWHERE b = 0" assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -315,7 +315,7 @@ def test_select_prewhere(): instance.query("REVOKE SELECT ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -334,11 +334,11 @@ def test_select_with_row_policy(): select_query = "SELECT a FROM table1" select_query2 = "SELECT count() FROM table1" assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query2, user="A") ) @@ -348,10 +348,10 @@ def test_select_with_row_policy(): instance.query("REVOKE SELECT(a) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query2, user="A") ) diff --git a/tests/integration/test_session_log/.gitignore b/tests/integration/test_session_log/.gitignore new file mode 100644 index 00000000000..edf565ec632 --- /dev/null +++ b/tests/integration/test_session_log/.gitignore @@ -0,0 +1 @@ +_gen diff --git a/tests/integration/test_session_log/__init__.py b/tests/integration/test_session_log/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_session_log/configs/log.xml b/tests/integration/test_session_log/configs/log.xml new file mode 100644 index 00000000000..7a079b81e69 --- /dev/null +++ b/tests/integration/test_session_log/configs/log.xml @@ -0,0 +1,9 @@ + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + \ No newline at end of file diff --git a/tests/integration/test_session_log/configs/ports.xml b/tests/integration/test_session_log/configs/ports.xml new file mode 100644 index 00000000000..fbaefc16b3a --- /dev/null +++ b/tests/integration/test_session_log/configs/ports.xml @@ -0,0 +1,9 @@ + + 5433 + 9001 + 9100 + + + false + + \ No newline at end of file diff --git a/tests/integration/test_session_log/configs/session_log.xml b/tests/integration/test_session_log/configs/session_log.xml new file mode 100644 index 00000000000..a0e4e3e2216 --- /dev/null +++ b/tests/integration/test_session_log/configs/session_log.xml @@ -0,0 +1,9 @@ + + + system + session_log
+ + toYYYYMM(event_date) + 7500 +
+
diff --git a/tests/integration/test_session_log/configs/users.xml b/tests/integration/test_session_log/configs/users.xml new file mode 100644 index 00000000000..0416dfadc8a --- /dev/null +++ b/tests/integration/test_session_log/configs/users.xml @@ -0,0 +1,23 @@ + + + + 0 + + + + + + + pass + + + pass + + + pass + + + pass + + + \ No newline at end of file diff --git a/tests/integration/test_session_log/protos/clickhouse_grpc.proto b/tests/integration/test_session_log/protos/clickhouse_grpc.proto new file mode 120000 index 00000000000..25d15f11e3b --- /dev/null +++ b/tests/integration/test_session_log/protos/clickhouse_grpc.proto @@ -0,0 +1 @@ +../../../../src/Server/grpc_protos/clickhouse_grpc.proto \ No newline at end of file diff --git a/tests/integration/test_session_log/test.py b/tests/integration/test_session_log/test.py new file mode 100644 index 00000000000..0c350e6c2c9 --- /dev/null +++ b/tests/integration/test_session_log/test.py @@ -0,0 +1,295 @@ +import os +import grpc +import pymysql.connections +import pytest +import random +import sys +import threading + +from helpers.cluster import ClickHouseCluster, run_and_check + +POSTGRES_SERVER_PORT = 5433 +MYSQL_SERVER_PORT = 9001 +GRPC_PORT = 9100 +SESSION_LOG_MATCHING_FIELDS = "auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface" + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DEFAULT_ENCODING = "utf-8" + +# Use grpcio-tools to generate *pb2.py files from *.proto. +proto_dir = os.path.join(SCRIPT_DIR, "./protos") +gen_dir = os.path.join(SCRIPT_DIR, "./_gen") +os.makedirs(gen_dir, exist_ok=True) +run_and_check( + f"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} {proto_dir}/clickhouse_grpc.proto", + shell=True, +) + +sys.path.append(gen_dir) + +import clickhouse_grpc_pb2 +import clickhouse_grpc_pb2_grpc + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "node", + main_configs=[ + "configs/ports.xml", + "configs/log.xml", + "configs/session_log.xml", + ], + user_configs=["configs/users.xml"], + # Bug in TSAN reproduces in this test https://github.com/grpc/grpc/issues/29550#issuecomment-1188085387 + env_variables={ + "TSAN_OPTIONS": "report_atomic_races=0 " + os.getenv("TSAN_OPTIONS", default="") + }, + with_postgres=True, +) + + +def grpc_get_url(): + return f"{instance.ip_address}:{GRPC_PORT}" + + +def grpc_create_insecure_channel(): + channel = grpc.insecure_channel(grpc_get_url()) + grpc.channel_ready_future(channel).result(timeout=2) + return channel + + +session_id_counter = 0 + + +def next_session_id(): + global session_id_counter + session_id = session_id_counter + session_id_counter += 1 + return str(session_id) + + +def grpc_query(query, user_, pass_, raise_exception): + try: + query_info = clickhouse_grpc_pb2.QueryInfo( + query=query, + session_id=next_session_id(), + user_name=user_, + password=pass_, + ) + channel = grpc_create_insecure_channel() + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(channel) + result = stub.ExecuteQuery(query_info) + if result and result.HasField("exception"): + raise Exception(result.exception.display_text) + + return result.output.decode(DEFAULT_ENCODING) + except Exception: + assert raise_exception + + +def postgres_query(query, user_, pass_, raise_exception): + try: + connection_string = f"host={instance.hostname} port={POSTGRES_SERVER_PORT} dbname=default user={user_} password={pass_}" + cluster.exec_in_container( + cluster.postgres_id, + [ + "/usr/bin/psql", + connection_string, + "--no-align", + "--field-separator=' '", + "-c", + query, + ], + shell=True, + ) + except Exception: + assert raise_exception + + +def mysql_query(query, user_, pass_, raise_exception): + try: + client = pymysql.connections.Connection( + host=instance.ip_address, + user=user_, + password=pass_, + database="default", + port=MYSQL_SERVER_PORT, + ) + cursor = client.cursor(pymysql.cursors.DictCursor) + if raise_exception: + with pytest.raises(Exception): + cursor.execute(query) + else: + cursor.execute(query) + cursor.fetchall() + except Exception: + assert raise_exception + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + # Wait for the PostgreSQL handler to start. + # Cluster.start waits until port 9000 becomes accessible. + # Server opens the PostgreSQL compatibility port a bit later. + instance.wait_for_log_line("PostgreSQL compatibility protocol") + yield cluster + finally: + cluster.shutdown() + + +def test_grpc_session(started_cluster): + grpc_query("SELECT 1", "grpc_user", "pass", False) + grpc_query("SELECT 2", "grpc_user", "wrong_pass", True) + grpc_query("SELECT 3", "wrong_grpc_user", "pass", True) + + instance.query("SYSTEM FLUSH LOGS") + login_success_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'LoginSuccess'" + ) + assert login_success_records == "grpc_user\t1\t1\n" + logout_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'Logout'" + ) + assert logout_records == "grpc_user\t1\t1\n" + login_failure_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'LoginFailure'" + ) + assert login_failure_records == "grpc_user\t1\t1\n" + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'grpc_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'grpc_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "1\n" + + +def test_mysql_session(started_cluster): + mysql_query("SELECT 1", "mysql_user", "pass", False) + mysql_query("SELECT 2", "mysql_user", "wrong_pass", True) + mysql_query("SELECT 3", "wrong_mysql_user", "pass", True) + + instance.query("SYSTEM FLUSH LOGS") + login_success_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'LoginSuccess'" + ) + assert login_success_records == "mysql_user\t1\t1\n" + logout_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'Logout'" + ) + assert logout_records == "mysql_user\t1\t1\n" + login_failure_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'LoginFailure'" + ) + assert login_failure_records == "mysql_user\t1\t1\n" + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'mysql_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'mysql_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "1\n" + + +def test_postgres_session(started_cluster): + postgres_query("SELECT 1", "postgres_user", "pass", False) + postgres_query("SELECT 2", "postgres_user", "wrong_pass", True) + postgres_query("SELECT 3", "wrong_postgres_user", "pass", True) + + instance.query("SYSTEM FLUSH LOGS") + login_success_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'LoginSuccess'" + ) + assert login_success_records == "postgres_user\t1\t1\n" + logout_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'Logout'" + ) + assert logout_records == "postgres_user\t1\t1\n" + login_failure_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'LoginFailure'" + ) + assert login_failure_records == "postgres_user\t1\t1\n" + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'postgres_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'postgres_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "1\n" + + +def test_parallel_sessions(started_cluster): + thread_list = [] + for _ in range(10): + # Sleep time does not significantly matter here, + # test should pass even without sleeping. + for function in [postgres_query, grpc_query, mysql_query]: + thread = threading.Thread( + target=function, + args=( + f"SELECT sleep({random.uniform(0.03, 0.04)})", + "parallel_user", + "pass", + False, + ), + ) + thread.start() + thread_list.append(thread) + thread = threading.Thread( + target=function, + args=( + f"SELECT sleep({random.uniform(0.03, 0.04)})", + "parallel_user", + "wrong_pass", + True, + ), + ) + thread.start() + thread_list.append(thread) + thread = threading.Thread( + target=function, + args=( + f"SELECT sleep({random.uniform(0.03, 0.04)})", + "wrong_parallel_user", + "pass", + True, + ), + ) + thread.start() + thread_list.append(thread) + + for thread in thread_list: + thread.join() + + instance.query("SYSTEM FLUSH LOGS") + port_0_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user'" + ) + assert port_0_sessions == "90\n" + + port_0_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND client_port = 0" + ) + assert port_0_sessions == "0\n" + + address_0_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND client_address = toIPv6('::')" + ) + assert address_0_sessions == "0\n" + + grpc_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'gRPC'" + ) + assert grpc_sessions == "30\n" + + mysql_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'MySQL'" + ) + assert mysql_sessions == "30\n" + + postgres_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'PostgreSQL'" + ) + assert postgres_sessions == "30\n" + + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'parallel_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'parallel_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "30\n" + + logout_failure_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND type = 'LoginFailure'" + ) + assert logout_failure_sessions == "30\n" diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index 13acc3f1042..5e40b534cee 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -88,7 +88,7 @@ def test_smoke(): ) ) assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 1, 0, "['robin']", "[]"] + ["xyz", "local_directory", 1, 0, "['robin']", "[]"] ] assert system_settings_profile_elements(profile_name="xyz") == [ [ @@ -120,7 +120,7 @@ def test_smoke(): instance.query("SET max_memory_usage = 80000000", user="robin") instance.query("SET max_memory_usage = 120000000", user="robin") assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 1, 0, "[]", "[]"] + ["xyz", "local_directory", 1, 0, "[]", "[]"] ] assert system_settings_profile_elements(user_name="robin") == [] @@ -201,7 +201,7 @@ def test_settings_from_granted_role(): ) ) assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 2, 0, "[]", "[]"] + ["xyz", "local_directory", 2, 0, "[]", "[]"] ] assert system_settings_profile_elements(profile_name="xyz") == [ [ @@ -276,7 +276,7 @@ def test_settings_from_granted_role(): ) ) assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 2, 0, "['worker']", "[]"] + ["xyz", "local_directory", 2, 0, "['worker']", "[]"] ] instance.query("ALTER SETTINGS PROFILE xyz TO NONE") @@ -293,7 +293,7 @@ def test_settings_from_granted_role(): ) instance.query("SET max_memory_usage = 120000000", user="robin") assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 2, 0, "[]", "[]"] + ["xyz", "local_directory", 2, 0, "[]", "[]"] ] @@ -323,7 +323,7 @@ def test_inheritance(): ) assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 1, 0, "[]", "[]"] + ["xyz", "local_directory", 1, 0, "[]", "[]"] ] assert system_settings_profile_elements(profile_name="xyz") == [ [ @@ -340,7 +340,7 @@ def test_inheritance(): ] ] assert system_settings_profile("alpha") == [ - ["alpha", "local directory", 1, 0, "['robin']", "[]"] + ["alpha", "local_directory", 1, 0, "['robin']", "[]"] ] assert system_settings_profile_elements(profile_name="alpha") == [ ["alpha", "\\N", "\\N", 0, "\\N", "\\N", "\\N", "\\N", "\\N", "xyz"] @@ -589,10 +589,10 @@ def test_function_current_profiles(): def test_allow_ddl(): - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "CREATE TABLE tbl(a Int32) ENGINE=Log", user="robin" ) - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "GRANT CREATE ON tbl TO robin", user="robin" ) assert "DDL queries are prohibited" in instance.query_and_get_error( @@ -615,10 +615,10 @@ def test_allow_introspection(): assert "Introspection functions are disabled" in instance.query_and_get_error( "SELECT demangle('a')" ) - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "SELECT demangle('a')", user="robin" ) - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "SELECT demangle('a')", user="robin", settings={"allow_introspection_functions": 1}, @@ -659,7 +659,7 @@ def test_allow_introspection(): "REVOKE demangle ON *.* FROM robin", settings={"allow_introspection_functions": 1}, ) - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "SELECT demangle('a')", user="robin" ) diff --git a/tests/integration/test_shutdown_wait_unfinished_queries/configs/users.xml b/tests/integration/test_shutdown_wait_unfinished_queries/configs/users.xml new file mode 100644 index 00000000000..b0990ca3a60 --- /dev/null +++ b/tests/integration/test_shutdown_wait_unfinished_queries/configs/users.xml @@ -0,0 +1,7 @@ + + + + 0 + + + diff --git a/tests/integration/test_shutdown_wait_unfinished_queries/test.py b/tests/integration/test_shutdown_wait_unfinished_queries/test.py index ae0710149de..af86b79c387 100644 --- a/tests/integration/test_shutdown_wait_unfinished_queries/test.py +++ b/tests/integration/test_shutdown_wait_unfinished_queries/test.py @@ -2,14 +2,22 @@ import pytest import threading import time +import uuid from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) node_wait_queries = cluster.add_instance( - "node_wait_queries", main_configs=["configs/config_wait.xml"], stay_alive=True + "node_wait_queries", + main_configs=["configs/config_wait.xml"], + user_configs=["configs/users.xml"], + stay_alive=True, ) node_kill_queries = cluster.add_instance( - "node_kill_queries", main_configs=["configs/config_kill.xml"], stay_alive=True + "node_kill_queries", + main_configs=["configs/config_kill.xml"], + user_configs=["configs/users.xml"], + stay_alive=True, ) global result @@ -24,32 +32,55 @@ def start_cluster(): cluster.shutdown() -def do_long_query(node): +def do_long_query(node, query_id): global result result = node.query_and_get_answer_with_error( "SELECT sleepEachRow(1) FROM system.numbers LIMIT 10", settings={"send_logs_level": "trace"}, + query_id=query_id, ) def test_shutdown_wait_unfinished_queries(start_cluster): global result - long_query = threading.Thread(target=do_long_query, args=(node_wait_queries,)) + query_id = uuid.uuid4().hex + long_query = threading.Thread( + target=do_long_query, + args=( + node_wait_queries, + query_id, + ), + ) long_query.start() - time.sleep(1) + assert_eq_with_retry( + node_wait_queries, + f"SELECT query_id FROM system.processes WHERE query_id = '{query_id}'", + query_id, + ) node_wait_queries.stop_clickhouse(kill=False) long_query.join() assert result[0].count("0") == 10 - long_query = threading.Thread(target=do_long_query, args=(node_kill_queries,)) + query_id = uuid.uuid4().hex + long_query = threading.Thread( + target=do_long_query, + args=( + node_kill_queries, + query_id, + ), + ) long_query.start() - time.sleep(1) + assert_eq_with_retry( + node_kill_queries, + f"SELECT query_id FROM system.processes WHERE query_id = '{query_id}'", + query_id, + ) node_kill_queries.stop_clickhouse(kill=False) long_query.join() diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index ff2de7491e1..d31457488c1 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -160,6 +160,10 @@ def get_ssl_context(cert_name): ) context.verify_mode = ssl.CERT_REQUIRED context.check_hostname = True + # Python 3.10 has removed many ciphers from the cipher suite. + # Hence based on https://github.com/urllib3/urllib3/issues/3100#issuecomment-1671106236 + # we are expanding the list of cipher suites. + context.set_ciphers("DEFAULT") return context @@ -208,7 +212,9 @@ def test_https_wrong_cert(): with pytest.raises(Exception) as err: execute_query_https("SELECT currentUser()", user="john", cert_name="wrong") err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning(f"Failed attempt with wrong cert, err: {err_str}") continue @@ -314,7 +320,9 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: peter, err: {err_str}" @@ -334,7 +342,9 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: jane, err: {err_str}" diff --git a/tests/integration/test_storage_azure_blob_storage/configs/cluster.xml b/tests/integration/test_storage_azure_blob_storage/configs/cluster.xml new file mode 100644 index 00000000000..ffa4673c9ee --- /dev/null +++ b/tests/integration/test_storage_azure_blob_storage/configs/cluster.xml @@ -0,0 +1,39 @@ + + + + + + node_0 + 9000 + + + node_1 + 9000 + + + node_2 + 9000 + + + + + + + + node_0 + 9000 + + + + + node_1 + 19000 + + + + + + + simple_cluster + + \ No newline at end of file diff --git a/tests/integration/test_storage_azure_blob_storage/configs/schema_cache.xml b/tests/integration/test_storage_azure_blob_storage/configs/schema_cache.xml new file mode 100644 index 00000000000..e2168ecd06d --- /dev/null +++ b/tests/integration/test_storage_azure_blob_storage/configs/schema_cache.xml @@ -0,0 +1,3 @@ + + 2 + \ No newline at end of file diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 21f57a67495..3ec0506c525 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -24,7 +24,7 @@ def cluster(): cluster = ClickHouseCluster(__file__) cluster.add_instance( "node", - main_configs=["configs/named_collections.xml"], + main_configs=["configs/named_collections.xml", "configs/schema_cache.xml"], user_configs=["configs/disable_profilers.xml", "configs/users.xml"], with_azurite=True, ) @@ -35,14 +35,25 @@ def cluster(): cluster.shutdown() -def azure_query(node, query, try_num=10, settings={}): +def azure_query( + node, query, expect_error="false", try_num=10, settings={}, query_on_retry=None +): for i in range(try_num): try: - return node.query(query, settings=settings) + if expect_error == "true": + return node.query_and_get_error(query, settings=settings) + else: + return node.query(query, settings=settings) except Exception as ex: retriable_errors = [ "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", "DB::Exception: Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected", + "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", + "DB::Exception: Azure::Core::Http::TransportException: Error while polling for socket ready read", + "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response", + "Azure::Core::Http::TransportException, e.what() = Connection closed before getting full response or response is less than expected", + "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response", + "Azure::Core::Http::TransportException, e.what() = Error while polling for socket ready read", ] retry = False for error in retriable_errors: @@ -53,6 +64,8 @@ def azure_query(node, query, try_num=10, settings={}): break if not retry or i == try_num - 1: raise Exception(ex) + if query_on_retry is not None: + node.query(query_on_retry) continue @@ -611,3 +624,394 @@ def test_filter_using_file(cluster): query = f"select count(*) from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_partition_tf_*.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') WHERE _file='test_partition_tf_3.csv'" assert azure_query(node, query) == "1\n" + + +def test_read_subcolumns(cluster): + node = cluster.instances["node"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)", + ) + + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)", + ) + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\tcont/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\tcont/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "0\tcont/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert res == "42\tcont/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + + +def test_read_from_not_existing_container(cluster): + node = cluster.instances["node"] + query = f"select * from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont_not_exists', 'test_table.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')" + expected_err_msg = "container does not exist" + assert expected_err_msg in azure_query(node, query, expect_error="true") + + +def test_function_signatures(cluster): + node = cluster.instances["node"] + connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1;" + storage_account_url = "http://azurite1:10000/devstoreaccount1" + account_name = "devstoreaccount1" + account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32') VALUES (1),(2),(3)", + ) + + # " - connection_string, container_name, blobpath\n" + query_1 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv')" + assert azure_query(node, query_1) == "1\n2\n3\n" + + # " - connection_string, container_name, blobpath, structure \n" + query_2 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'column1 UInt32')" + assert azure_query(node, query_2) == "1\n2\n3\n" + + # " - connection_string, container_name, blobpath, format \n" + query_3 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV')" + assert azure_query(node, query_3) == "1\n2\n3\n" + + # " - connection_string, container_name, blobpath, format, compression \n" + query_4 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV', 'auto')" + assert azure_query(node, query_4) == "1\n2\n3\n" + + # " - connection_string, container_name, blobpath, format, compression, structure \n" + query_5 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV', 'auto', 'column1 UInt32')" + assert azure_query(node, query_5) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key\n" + query_6 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}')" + assert azure_query(node, query_6) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n" + query_7 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'column1 UInt32')" + assert azure_query(node, query_7) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key, format\n" + query_8 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV')" + assert azure_query(node, query_8) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n" + query_9 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto')" + assert azure_query(node, query_9) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n" + query_10 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32')" + assert azure_query(node, query_10) == "1\n2\n3\n" + + +def check_profile_event_for_query(instance, file, profile_event, amount): + instance.query("system flush logs") + query_pattern = f"azureBlobStorage%{file}".replace("'", "\\'") + res = int( + instance.query( + f"select ProfileEvents['{profile_event}'] from system.query_log where query like '%{query_pattern}%' and query not like '%ProfileEvents%' and type = 'QueryFinish' order by query_start_time_microseconds desc limit 1" + ) + ) + + assert res == amount + + +def check_cache_misses(instance, file, amount=1): + check_profile_event_for_query(instance, file, "SchemaInferenceCacheMisses", amount) + + +def check_cache_hits(instance, file, amount=1): + check_profile_event_for_query(instance, file, "SchemaInferenceCacheHits", amount) + + +def check_cache_invalidations(instance, file, amount=1): + check_profile_event_for_query( + instance, file, "SchemaInferenceCacheInvalidations", amount + ) + + +def check_cache_evictions(instance, file, amount=1): + check_profile_event_for_query( + instance, file, "SchemaInferenceCacheEvictions", amount + ) + + +def check_cache_num_rows_hots(instance, file, amount=1): + check_profile_event_for_query( + instance, file, "SchemaInferenceCacheNumRowsHits", amount + ) + + +def run_describe_query(instance, file, connection_string): + query = f"desc azureBlobStorage('{connection_string}', 'cont', '{file}')" + azure_query(instance, query) + + +def run_count_query(instance, file, connection_string, drop_cache_on_retry=False): + query = f"select count() from azureBlobStorage('{connection_string}', 'cont', '{file}', auto, auto, 'x UInt64')" + if drop_cache_on_retry: + return azure_query( + node=instance, + query=query, + query_on_retry="system drop schema cache for azure", + ) + + return azure_query(instance, query) + + +def check_cache(instance, expected_files): + sources = instance.query("select source from system.schema_inference_cache") + assert sorted(map(lambda x: x.strip().split("/")[-1], sources.split())) == sorted( + expected_files + ) + + +def test_schema_inference_cache(cluster): + node = cluster.instances["node"] + connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1;" + storage_account_url = "http://azurite1:10000/devstoreaccount1" + account_name = "devstoreaccount1" + account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" + + node.query("system drop schema cache") + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.jsonl', '{account_name}', '{account_key}') select * from numbers(100)", + ) + + time.sleep(1) + + run_describe_query(node, "test_cache0.jsonl", connection_string) + check_cache(node, ["test_cache0.jsonl"]) + check_cache_misses(node, "test_cache0.jsonl") + + run_describe_query(node, "test_cache0.jsonl", connection_string) + check_cache_hits(node, "test_cache0.jsonl") + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.jsonl', '{account_name}', '{account_key}') select * from numbers(100) settings azure_truncate_on_insert=1", + ) + + time.sleep(1) + + run_describe_query(node, "test_cache0.jsonl", connection_string) + check_cache_invalidations(node, "test_cache0.jsonl") + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache1.jsonl', '{account_name}', '{account_key}') select * from numbers(100) settings azure_truncate_on_insert=1", + ) + time.sleep(1) + + run_describe_query(node, "test_cache1.jsonl", connection_string) + check_cache(node, ["test_cache0.jsonl", "test_cache1.jsonl"]) + check_cache_misses(node, "test_cache1.jsonl") + + run_describe_query(node, "test_cache1.jsonl", connection_string) + check_cache_hits(node, "test_cache1.jsonl") + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache2.jsonl', '{account_name}', '{account_key}') select * from numbers(100) settings azure_truncate_on_insert=1", + ) + time.sleep(1) + + run_describe_query(node, "test_cache2.jsonl", connection_string) + check_cache(node, ["test_cache1.jsonl", "test_cache2.jsonl"]) + check_cache_misses(node, "test_cache2.jsonl") + check_cache_evictions(node, "test_cache2.jsonl") + + run_describe_query(node, "test_cache2.jsonl", connection_string) + check_cache_hits(node, "test_cache2.jsonl") + + run_describe_query(node, "test_cache1.jsonl", connection_string) + check_cache_hits(node, "test_cache1.jsonl") + + run_describe_query(node, "test_cache0.jsonl", connection_string) + check_cache(node, ["test_cache0.jsonl", "test_cache1.jsonl"]) + check_cache_misses(node, "test_cache0.jsonl") + check_cache_evictions(node, "test_cache0.jsonl") + + run_describe_query(node, "test_cache2.jsonl", connection_string) + check_cache(node, ["test_cache0.jsonl", "test_cache2.jsonl"]) + check_cache_misses( + node, + "test_cache2.jsonl", + ) + check_cache_evictions( + node, + "test_cache2.jsonl", + ) + + run_describe_query(node, "test_cache2.jsonl", connection_string) + + check_cache_hits( + node, + "test_cache2.jsonl", + ) + + run_describe_query(node, "test_cache0.jsonl", connection_string) + check_cache_hits( + node, + "test_cache0.jsonl", + ) + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache3.jsonl', '{account_name}', '{account_key}') select * from numbers(100) settings azure_truncate_on_insert=1", + ) + time.sleep(1) + + files = "test_cache{0,1,2,3}.jsonl" + run_describe_query(node, files, connection_string) + check_cache_hits(node, files) + + node.query(f"system drop schema cache for azure") + check_cache(node, []) + + run_describe_query(node, files, connection_string) + check_cache_misses(node, files, 4) + + node.query("system drop schema cache") + check_cache(node, []) + + run_describe_query(node, files, connection_string) + check_cache_misses(node, files, 4) + + node.query("system drop schema cache") + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.csv', '{account_name}', '{account_key}') select * from numbers(100) settings azure_truncate_on_insert=1", + ) + time.sleep(1) + + res = run_count_query(node, "test_cache0.csv", connection_string) + + assert int(res) == 100 + + check_cache(node, ["test_cache0.csv"]) + check_cache_misses( + node, + "test_cache0.csv", + ) + + res = run_count_query(node, "test_cache0.csv", connection_string) + assert int(res) == 100 + + check_cache_hits( + node, + "test_cache0.csv", + ) + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.csv', '{account_name}', '{account_key}') select * from numbers(200) settings azure_truncate_on_insert=1", + ) + time.sleep(1) + + res = run_count_query(node, "test_cache0.csv", connection_string) + + assert int(res) == 200 + + check_cache_invalidations( + node, + "test_cache0.csv", + ) + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache1.csv', '{account_name}', '{account_key}') select * from numbers(100) settings azure_truncate_on_insert=1", + ) + time.sleep(1) + + res = run_count_query(node, "test_cache1.csv", connection_string) + + assert int(res) == 100 + check_cache(node, ["test_cache0.csv", "test_cache1.csv"]) + check_cache_misses( + node, + "test_cache1.csv", + ) + + res = run_count_query(node, "test_cache1.csv", connection_string) + assert int(res) == 100 + check_cache_hits( + node, + "test_cache1.csv", + ) + + res = run_count_query(node, "test_cache{0,1}.csv", connection_string) + assert int(res) == 300 + check_cache_hits(node, "test_cache{0,1}.csv", 2) + + node.query(f"system drop schema cache for azure") + check_cache(node, []) + + res = run_count_query(node, "test_cache{0,1}.csv", connection_string, True) + assert int(res) == 300 + check_cache_misses(node, "test_cache{0,1}.csv", 2) + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache.parquet', '{account_name}', '{account_key}') select * from numbers(100) settings azure_truncate_on_insert=1", + ) + time.sleep(1) + + res = azure_query( + node, + f"select count() from azureBlobStorage('{connection_string}', 'cont', 'test_cache.parquet')", + ) + assert int(res) == 100 + check_cache_misses(node, "test_cache.parquet") + check_cache_hits(node, "test_cache.parquet") + check_cache_num_rows_hots(node, "test_cache.parquet") + + +def test_filtering_by_file_or_path(cluster): + node = cluster.instances["node"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_filter1.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1", + ) + + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_filter2.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 2", + ) + + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_filter3.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 3", + ) + + node.query( + f"select count() from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_filter*.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') where _file = 'test_filter1.tsv'" + ) + + node.query("SYSTEM FLUSH LOGS") + + result = node.query( + f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query like '%select%azure%test_filter%' AND type='QueryFinish'" + ) + + assert int(result) == 1 diff --git a/tests/integration/test_storage_azure_blob_storage/test_cluster.py b/tests/integration/test_storage_azure_blob_storage/test_cluster.py new file mode 100644 index 00000000000..454f1cdf294 --- /dev/null +++ b/tests/integration/test_storage_azure_blob_storage/test_cluster.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python3 + +import gzip +import json +import logging +import os +import io +import random +import threading +import time + +from azure.storage.blob import BlobServiceClient +import helpers.client +import pytest +from helpers.cluster import ClickHouseCluster, ClickHouseInstance +from helpers.test_tools import TSV +from helpers.network import PartitionManager +from helpers.mock_servers import start_mock_servers +from helpers.test_tools import exec_query_with_retry +from test_storage_azure_blob_storage.test import azure_query + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node_0", + main_configs=["configs/named_collections.xml", "configs/cluster.xml"], + user_configs=["configs/disable_profilers.xml", "configs/users.xml"], + with_azurite=True, + ) + cluster.add_instance( + "node_1", + main_configs=["configs/named_collections.xml", "configs/cluster.xml"], + user_configs=["configs/disable_profilers.xml", "configs/users.xml"], + with_azurite=True, + ) + cluster.add_instance( + "node_2", + main_configs=["configs/named_collections.xml", "configs/cluster.xml"], + user_configs=["configs/disable_profilers.xml", "configs/users.xml"], + with_azurite=True, + ) + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def get_azure_file_content(filename): + container_name = "cont" + connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;" + blob_service_client = BlobServiceClient.from_connection_string(connection_string) + container_client = blob_service_client.get_container_client(container_name) + blob_client = container_client.get_blob_client(filename) + download_stream = blob_client.download_blob() + return download_stream.readall().decode("utf-8") + + +def test_select_all(cluster): + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', " + "'auto', 'key UInt64, data String') VALUES (1, 'a'), (2, 'b')", + ) + print(get_azure_file_content("test_cluster_select_all.csv")) + + pure_azure = azure_query( + node, + """ + SELECT * from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto')""", + ) + print(pure_azure) + distributed_azure = azure_query( + node, + """ + SELECT * from azureBlobStorageCluster( + 'simple_cluster', 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto')""", + ) + print(distributed_azure) + assert TSV(pure_azure) == TSV(distributed_azure) + + +def test_count(cluster): + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', " + "'auto', 'key UInt64') VALUES (1), (2)", + ) + print(get_azure_file_content("test_cluster_count.csv")) + + pure_azure = azure_query( + node, + """ + SELECT count(*) from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto', 'key UInt64')""", + ) + print(pure_azure) + distributed_azure = azure_query( + node, + """ + SELECT count(*) from azureBlobStorageCluster( + 'simple_cluster', 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto', 'key UInt64')""", + ) + print(distributed_azure) + assert TSV(pure_azure) == TSV(distributed_azure) + + +def test_union_all(cluster): + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', " + "'auto', 'a Int32, b String') VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')", + ) + + pure_azure = azure_query( + node, + """ + SELECT * FROM + ( + SELECT * from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', + 'auto', 'a Int32, b String') + UNION ALL + SELECT * from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', + 'auto', 'a Int32, b String') + ) + ORDER BY (a) + """, + ) + azure_distributed = azure_query( + node, + """ + SELECT * FROM + ( + SELECT * from azureBlobStorageCluster( + 'simple_cluster', + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', + 'auto', 'a Int32, b String') + UNION ALL + SELECT * from azureBlobStorageCluster( + 'simple_cluster', + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', + 'auto', 'a Int32, b String') + ) + ORDER BY (a) + """, + ) + + assert TSV(pure_azure) == TSV(azure_distributed) + + +def test_skip_unavailable_shards(cluster): + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " + "'auto', 'a UInt64') VALUES (1), (2)", + ) + result = azure_query( + node, + """ + SELECT count(*) from azureBlobStorageCluster( + 'cluster_non_existent_port', + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') + SETTINGS skip_unavailable_shards = 1 + """, + ) + + assert result == "2\n" + + +def test_unset_skip_unavailable_shards(cluster): + # Although skip_unavailable_shards is not set, cluster table functions should always skip unavailable shards. + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_unset_skip_unavailable.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " + "'auto', 'a UInt64') VALUES (1), (2)", + ) + result = azure_query( + node, + """ + SELECT count(*) from azureBlobStorageCluster( + 'cluster_non_existent_port', + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') + """, + ) + + assert result == "2\n" + + +def test_cluster_with_named_collection(cluster): + node = cluster.instances["node_0"] + + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_with_named_collection.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " + "'auto', 'a UInt64') VALUES (1), (2)", + ) + + pure_azure = azure_query( + node, + """ + SELECT * from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_with_named_collection.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') + """, + ) + + azure_cluster = azure_query( + node, + """ + SELECT * from azureBlobStorageCluster( + 'simple_cluster', azure_conf2, container='cont', blob_path='test_cluster_with_named_collection.csv') + """, + ) + + assert TSV(pure_azure) == TSV(azure_cluster) + + +def test_partition_parallel_readig_withcluster(cluster): + node = cluster.instances["node_0"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + partition_by = "column3" + values = "(1, 2, 3), (3, 2, 1), (78, 43, 45)" + filename = "test_tf_{_partition_id}.csv" + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values}", + ) + + assert "1,2,3\n" == get_azure_file_content("test_tf_3.csv") + assert "3,2,1\n" == get_azure_file_content("test_tf_1.csv") + assert "78,43,45\n" == get_azure_file_content("test_tf_45.csv") + + azure_cluster = azure_query( + node, + """ + SELECT count(*) from azureBlobStorageCluster( + 'simple_cluster', + azure_conf2, container='cont', blob_path='test_tf_*.csv', format='CSV', compression='auto', structure='column1 UInt32, column2 UInt32, column3 UInt32') + """, + ) + + assert azure_cluster == "3\n" diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 8ff88791a3a..9af75cf37bd 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -318,7 +318,7 @@ def test_virtual_columns(started_cluster): hdfs_api.write_data("/file1", "1\n") hdfs_api.write_data("/file2", "2\n") hdfs_api.write_data("/file3", "3\n") - expected = "1\tfile1\thdfs://hdfs1:9000//file1\n2\tfile2\thdfs://hdfs1:9000//file2\n3\tfile3\thdfs://hdfs1:9000//file3\n" + expected = "1\tfile1\thdfs://hdfs1:9000/file1\n2\tfile2\thdfs://hdfs1:9000/file2\n3\tfile3\thdfs://hdfs1:9000/file3\n" assert ( node1.query( "select id, _file as file_name, _path as file_path from virtual_cols order by id" @@ -661,55 +661,40 @@ def test_virtual_columns_2(started_cluster): assert result.strip() == "kek" -def get_profile_event_for_query(node, query, profile_event): +def check_profile_event_for_query(node, file, profile_event, amount=1): node.query("system flush logs") - query = query.replace("'", "\\'") - return int( - node.query( - f"select ProfileEvents['{profile_event}'] from system.query_log where query='{query}' and type = 'QueryFinish' order by query_start_time_microseconds desc limit 1" + query_pattern = f"hdfs('hdfs://hdfs1:9000/{file}'".replace("'", "\\'") + assert ( + int( + node.query( + f"select ProfileEvents['{profile_event}'] from system.query_log where query like '%{query_pattern}%' and type = 'QueryFinish' order by query_start_time_microseconds desc limit 1" + ) ) + == amount ) def check_cache_misses(node1, file, amount=1): - assert ( - get_profile_event_for_query( - node1, - f"desc hdfs('hdfs://hdfs1:9000/{file}')", - "SchemaInferenceCacheMisses", - ) - == amount - ) + check_profile_event_for_query(node1, file, "SchemaInferenceCacheMisses", amount) def check_cache_hits(node1, file, amount=1): - assert ( - get_profile_event_for_query( - node1, f"desc hdfs('hdfs://hdfs1:9000/{file}')", "SchemaInferenceCacheHits" - ) - == amount - ) + check_profile_event_for_query(node1, file, "SchemaInferenceCacheHits", amount) def check_cache_invalidations(node1, file, amount=1): - assert ( - get_profile_event_for_query( - node1, - f"desc hdfs('hdfs://hdfs1:9000/{file}')", - "SchemaInferenceCacheInvalidations", - ) - == amount + check_profile_event_for_query( + node1, file, "SchemaInferenceCacheInvalidations", amount ) def check_cache_evictions(node1, file, amount=1): - assert ( - get_profile_event_for_query( - node1, - f"desc hdfs('hdfs://hdfs1:9000/{file}')", - "SchemaInferenceCacheEvictions", - ) - == amount + check_profile_event_for_query(node1, file, "SchemaInferenceCacheEvictions", amount) + + +def check_cache_num_rows_hits(node1, file, amount=1): + check_profile_event_for_query( + node1, file, "SchemaInferenceCacheNumRowsHits", amount ) @@ -725,6 +710,11 @@ def run_describe_query(node, file): node.query(query) +def run_count_query(node, file): + query = f"select count() from hdfs('hdfs://hdfs1:9000/{file}', auto, 'x UInt64')" + return node.query(query) + + def test_schema_inference_cache(started_cluster): node1.query("system drop schema cache") node1.query( @@ -812,6 +802,72 @@ def test_schema_inference_cache(started_cluster): run_describe_query(node1, files) check_cache_misses(node1, files, 4) + node1.query("system drop schema cache") + check_cache(node1, []) + + node1.query( + f"insert into function hdfs('hdfs://hdfs1:9000/test_cache0.csv') select * from numbers(100) settings hdfs_truncate_on_insert=1" + ) + time.sleep(1) + + res = run_count_query(node1, "test_cache0.csv") + assert int(res) == 100 + check_cache(node1, ["test_cache0.csv"]) + check_cache_misses(node1, "test_cache0.csv") + + res = run_count_query(node1, "test_cache0.csv") + assert int(res) == 100 + check_cache_hits(node1, "test_cache0.csv") + + node1.query( + f"insert into function hdfs('hdfs://hdfs1:9000/test_cache0.csv') select * from numbers(200) settings hdfs_truncate_on_insert=1" + ) + time.sleep(1) + + res = run_count_query(node1, "test_cache0.csv") + assert int(res) == 200 + check_cache_invalidations(node1, "test_cache0.csv") + + node1.query( + f"insert into function hdfs('hdfs://hdfs1:9000/test_cache1.csv') select * from numbers(100) settings hdfs_truncate_on_insert=1" + ) + time.sleep(1) + + res = run_count_query(node1, "test_cache1.csv") + assert int(res) == 100 + check_cache(node1, ["test_cache0.csv", "test_cache1.csv"]) + check_cache_misses(node1, "test_cache1.csv") + + res = run_count_query(node1, "test_cache1.csv") + assert int(res) == 100 + check_cache_hits(node1, "test_cache1.csv") + + res = run_count_query(node1, "test_cache{0,1}.csv") + assert int(res) == 300 + check_cache_hits(node1, "test_cache{0,1}.csv", 2) + + node1.query(f"system drop schema cache for hdfs") + check_cache(node1, []) + + res = run_count_query(node1, "test_cache{0,1}.csv") + assert int(res) == 300 + check_cache_misses(node1, "test_cache{0,1}.csv", 2) + + node1.query(f"system drop schema cache for hdfs") + check_cache(node1, []) + + node1.query( + f"insert into function hdfs('hdfs://hdfs1:9000/test_cache.parquet') select * from numbers(100) settings hdfs_truncate_on_insert=1" + ) + time.sleep(1) + res = node1.query( + f"select count() from hdfs('hdfs://hdfs1:9000/test_cache.parquet')" + ) + assert int(res) == 100 + check_cache_misses(node1, "test_cache.parquet") + check_cache_hits(node1, "test_cache.parquet") + check_cache_num_rows_hits(node1, "test_cache.parquet") + def test_hdfsCluster_skip_unavailable_shards(started_cluster): # Although skip_unavailable_shards is not set, cluster table functions should always skip unavailable shards. @@ -892,6 +948,54 @@ def test_skip_empty_files(started_cluster): assert int(res) == 0 +def test_read_subcolumns(started_cluster): + node = started_cluster.instances["node1"] + + node.query( + f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + node.query( + f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert ( + res + == "2\thdfs://hdfs1:9000/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + ) + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert ( + res + == "2\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + ) + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert ( + res + == "0\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + ) + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert ( + res + == "42\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + ) + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_kafka/configs/kafka.xml b/tests/integration/test_storage_kafka/configs/kafka.xml index 062c98a2ac7..c6075aff715 100644 --- a/tests/integration/test_storage_kafka/configs/kafka.xml +++ b/tests/integration/test_storage_kafka/configs/kafka.xml @@ -10,6 +10,10 @@ --> cgrp,consumer,topic,protocol + + + 600 + consumer_hang diff --git a/tests/integration/test_storage_kafka/configs/users.xml b/tests/integration/test_storage_kafka/configs/users.xml index 3168de649f8..d13651d5f92 100644 --- a/tests/integration/test_storage_kafka/configs/users.xml +++ b/tests/integration/test_storage_kafka/configs/users.xml @@ -4,6 +4,7 @@ 1 0 + 0 diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index d0686c7c36f..b1191af60b7 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -444,6 +444,34 @@ def test_kafka_settings_new_syntax(kafka_cluster): assert members[0]["client_id"] == "instance test 1234" +def test_kafka_settings_predefined_macros(kafka_cluster): + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = '{kafka_broker}:19092', + kafka_topic_list = '{database}_{table}_topic', + kafka_group_name = '{database}_{table}_group', + kafka_format = '{kafka_format_json_each_row}', + kafka_row_delimiter = '\\n', + kafka_commit_on_select = 1, + kafka_client_id = '{database}_{table} test 1234', + kafka_skip_broken_messages = 1; + """ + ) + + messages = [] + for i in range(50): + messages.append(json.dumps({"key": i, "value": i})) + kafka_produce(kafka_cluster, "test_kafka_topic", messages) + + result = instance.query("SELECT * FROM test.kafka", ignore_error=True) + kafka_check_result(result, True) + + members = describe_consumer_group(kafka_cluster, "test_kafka_group") + assert members[0]["client_id"] == "test_kafka test 1234" + + def test_kafka_json_as_string(kafka_cluster): kafka_produce( kafka_cluster, @@ -843,24 +871,7 @@ def test_kafka_formats(kafka_cluster): extra_settings=format_opts.get("extra_settings") or "", ) ) - - instance.wait_for_log_line( - "kafka.*Committed offset [0-9]+.*format_tests_", - repetitions=len(all_formats.keys()), - look_behind_lines=12000, - ) - - for format_name, format_opts in list(all_formats.items()): - logging.debug(("Checking {}".format(format_name))) - topic_name = f"format_tests_{format_name}" - # shift offsets by 1 if format supports empty value - offsets = ( - [1, 2, 3] if format_opts.get("supports_empty_value", False) else [0, 1, 2] - ) - result = instance.query( - "SELECT * FROM test.kafka_{format_name}_mv;".format(format_name=format_name) - ) - expected = """\ + raw_expected = """\ 0 0 AM 0.5 1 {topic_name} 0 {offset_0} 1 0 AM 0.5 1 {topic_name} 0 {offset_1} 2 0 AM 0.5 1 {topic_name} 0 {offset_1} @@ -878,7 +889,27 @@ def test_kafka_formats(kafka_cluster): 14 0 AM 0.5 1 {topic_name} 0 {offset_1} 15 0 AM 0.5 1 {topic_name} 0 {offset_1} 0 0 AM 0.5 1 {topic_name} 0 {offset_2} -""".format( +""" + + expected_rows_count = raw_expected.count("\n") + instance.query_with_retry( + f"SELECT * FROM test.kafka_{list(all_formats.keys())[-1]}_mv;", + retry_count=30, + sleep_time=1, + check_callback=lambda res: res.count("\n") == expected_rows_count, + ) + + for format_name, format_opts in list(all_formats.items()): + logging.debug(("Checking {}".format(format_name))) + topic_name = f"format_tests_{format_name}" + # shift offsets by 1 if format supports empty value + offsets = ( + [1, 2, 3] if format_opts.get("supports_empty_value", False) else [0, 1, 2] + ) + result = instance.query( + "SELECT * FROM test.kafka_{format_name}_mv;".format(format_name=format_name) + ) + expected = raw_expected.format( topic_name=topic_name, offset_0=offsets[0], offset_1=offsets[1], @@ -1155,6 +1186,7 @@ def test_kafka_consumer_hang2(kafka_cluster): instance.query( """ DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.kafka2; CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka @@ -2916,7 +2948,7 @@ def test_kafka_no_holes_when_write_suffix_failed(kafka_cluster): # while materialized view is working to inject zookeeper failure pm.drop_instance_zk_connections(instance) instance.wait_for_log_line( - "Error.*(session has been expired|Connection loss).*while pushing to view" + "Error.*(Connection loss|Coordination::Exception).*while pushing to view" ) pm.heal_all() instance.wait_for_log_line("Committed offset 22") @@ -3755,19 +3787,7 @@ def test_kafka_formats_with_broken_message(kafka_cluster): ) ) - for format_name, format_opts in list(all_formats.items()): - logging.debug("Checking {format_name}") - topic_name = f"{topic_name_prefix}{format_name}" - # shift offsets by 1 if format supports empty value - offsets = ( - [1, 2, 3] if format_opts.get("supports_empty_value", False) else [0, 1, 2] - ) - result = instance.query( - "SELECT * FROM test.kafka_data_{format_name}_mv;".format( - format_name=format_name - ) - ) - expected = """\ + raw_expected = """\ 0 0 AM 0.5 1 {topic_name} 0 {offset_0} 1 0 AM 0.5 1 {topic_name} 0 {offset_1} 2 0 AM 0.5 1 {topic_name} 0 {offset_1} @@ -3785,7 +3805,29 @@ def test_kafka_formats_with_broken_message(kafka_cluster): 14 0 AM 0.5 1 {topic_name} 0 {offset_1} 15 0 AM 0.5 1 {topic_name} 0 {offset_1} 0 0 AM 0.5 1 {topic_name} 0 {offset_2} -""".format( +""" + + expected_rows_count = raw_expected.count("\n") + instance.query_with_retry( + f"SELECT * FROM test.kafka_data_{list(all_formats.keys())[-1]}_mv;", + retry_count=30, + sleep_time=1, + check_callback=lambda res: res.count("\n") == expected_rows_count, + ) + + for format_name, format_opts in list(all_formats.items()): + logging.debug(f"Checking {format_name}") + topic_name = f"{topic_name_prefix}{format_name}" + # shift offsets by 1 if format supports empty value + offsets = ( + [1, 2, 3] if format_opts.get("supports_empty_value", False) else [0, 1, 2] + ) + result = instance.query( + "SELECT * FROM test.kafka_data_{format_name}_mv;".format( + format_name=format_name + ) + ) + expected = raw_expected.format( topic_name=topic_name, offset_0=offsets[0], offset_1=offsets[1], @@ -4504,6 +4546,294 @@ def test_block_based_formats_2(kafka_cluster): kafka_delete_topic(admin_client, format_name) +def test_system_kafka_consumers(kafka_cluster): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + topic = "system_kafka_cons" + kafka_create_topic(admin_client, topic) + + # Check that format_csv_delimiter parameter works now - as part of all available format settings. + kafka_produce( + kafka_cluster, + topic, + ["1|foo", "2|bar", "42|answer", "100|multi\n101|row\n103|message"], + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + + CREATE TABLE test.kafka (a UInt64, b String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_commit_on_select = 1, + kafka_format = 'CSV', + kafka_row_delimiter = '\\n', + format_csv_delimiter = '|'; + """ + ) + + result = instance.query("SELECT * FROM test.kafka ORDER BY a;") + + result_system_kafka_consumers = instance.query( + """ + create or replace function stable_timestamp as + (d)->multiIf(d==toDateTime('1970-01-01 00:00:00'), 'never', abs(dateDiff('second', d, now())) < 30, 'now', toString(d)); + + SELECT database, table, length(consumer_id), assignments.topic, assignments.partition_id, + assignments.current_offset, + if(length(exceptions.time)>0, exceptions.time[1]::String, 'never') as last_exception_time_, + if(length(exceptions.text)>0, exceptions.text[1], 'no exception') as last_exception_, + stable_timestamp(last_poll_time) as last_poll_time_, num_messages_read, stable_timestamp(last_commit_time) as last_commit_time_, + num_commits, stable_timestamp(last_rebalance_time) as last_rebalance_time_, + num_rebalance_revocations, num_rebalance_assignments, is_currently_used + FROM system.kafka_consumers WHERE database='test' and table='kafka' format Vertical; + """ + ) + logging.debug(f"result_system_kafka_consumers: {result_system_kafka_consumers}") + assert ( + result_system_kafka_consumers + == """Row 1: +────── +database: test +table: kafka +length(consumer_id): 67 +assignments.topic: ['system_kafka_cons'] +assignments.partition_id: [0] +assignments.current_offset: [4] +last_exception_time_: never +last_exception_: no exception +last_poll_time_: now +num_messages_read: 4 +last_commit_time_: now +num_commits: 1 +last_rebalance_time_: never +num_rebalance_revocations: 0 +num_rebalance_assignments: 1 +is_currently_used: 0 +""" + ) + + instance.query("DROP TABLE test.kafka") + kafka_delete_topic(admin_client, topic) + + +def test_system_kafka_consumers_rebalance(kafka_cluster, max_retries=15): + # based on test_kafka_consumer_hang2 + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(cluster.kafka_port), + value_serializer=producer_serializer, + key_serializer=producer_serializer, + ) + + topic = "system_kafka_cons2" + kafka_create_topic(admin_client, topic, num_partitions=2) + + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.kafka2; + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_commit_on_select = 1, + kafka_format = 'JSONEachRow'; + + CREATE TABLE test.kafka2 (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_commit_on_select = 1, + kafka_group_name = '{topic}', + kafka_format = 'JSONEachRow'; + """ + ) + + producer.send(topic=topic, value=json.dumps({"key": 1, "value": 1}), partition=0) + producer.send(topic=topic, value=json.dumps({"key": 11, "value": 11}), partition=1) + time.sleep(3) + + # first consumer subscribe the topic, try to poll some data, and go to rest + instance.query("SELECT * FROM test.kafka") + + # second consumer do the same leading to rebalance in the first + # consumer, try to poll some data + instance.query("SELECT * FROM test.kafka2") + + producer.send(topic=topic, value=json.dumps({"key": 1, "value": 1}), partition=0) + producer.send(topic=topic, value=json.dumps({"key": 10, "value": 10}), partition=1) + time.sleep(3) + + instance.query("SELECT * FROM test.kafka") + instance.query("SELECT * FROM test.kafka2") + instance.query("SELECT * FROM test.kafka") + instance.query("SELECT * FROM test.kafka2") + + result_system_kafka_consumers = instance.query( + """ + create or replace function stable_timestamp as + (d)->multiIf(d==toDateTime('1970-01-01 00:00:00'), 'never', abs(dateDiff('second', d, now())) < 30, 'now', toString(d)); + SELECT database, table, length(consumer_id), assignments.topic, assignments.partition_id, + assignments.current_offset, + if(length(exceptions.time)>0, exceptions.time[1]::String, 'never') as last_exception_time_, + if(length(exceptions.text)>0, exceptions.text[1], 'no exception') as last_exception_, + stable_timestamp(last_poll_time) as last_poll_time_, num_messages_read, stable_timestamp(last_commit_time) as last_commit_time_, + num_commits, stable_timestamp(last_rebalance_time) as last_rebalance_time_, + num_rebalance_revocations, num_rebalance_assignments, is_currently_used + FROM system.kafka_consumers WHERE database='test' and table IN ('kafka', 'kafka2') format Vertical; + """ + ) + logging.debug(f"result_system_kafka_consumers: {result_system_kafka_consumers}") + assert ( + result_system_kafka_consumers + == """Row 1: +────── +database: test +table: kafka +length(consumer_id): 67 +assignments.topic: ['system_kafka_cons2'] +assignments.partition_id: [0] +assignments.current_offset: [2] +last_exception_time_: never +last_exception_: no exception +last_poll_time_: now +num_messages_read: 4 +last_commit_time_: now +num_commits: 2 +last_rebalance_time_: now +num_rebalance_revocations: 1 +num_rebalance_assignments: 2 +is_currently_used: 0 + +Row 2: +────── +database: test +table: kafka2 +length(consumer_id): 68 +assignments.topic: ['system_kafka_cons2'] +assignments.partition_id: [1] +assignments.current_offset: [2] +last_exception_time_: never +last_exception_: no exception +last_poll_time_: now +num_messages_read: 1 +last_commit_time_: now +num_commits: 1 +last_rebalance_time_: never +num_rebalance_revocations: 0 +num_rebalance_assignments: 1 +is_currently_used: 0 +""" + ) + + instance.query("DROP TABLE test.kafka") + instance.query("DROP TABLE test.kafka2") + + kafka_delete_topic(admin_client, topic) + + +def test_system_kafka_consumers_rebalance_mv(kafka_cluster, max_retries=15): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(cluster.kafka_port), + value_serializer=producer_serializer, + key_serializer=producer_serializer, + ) + + topic = "system_kafka_cons_mv" + kafka_create_topic(admin_client, topic, num_partitions=2) + + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.kafka2; + DROP TABLE IF EXISTS test.kafka_persistent; + DROP TABLE IF EXISTS test.kafka_persistent2; + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_commit_on_select = 1, + kafka_format = 'JSONEachRow'; + + CREATE TABLE test.kafka2 (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_commit_on_select = 1, + kafka_group_name = '{topic}', + kafka_format = 'JSONEachRow'; + + CREATE TABLE test.kafka_persistent (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE TABLE test.kafka_persistent2 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + + CREATE MATERIALIZED VIEW test.persistent_kafka_mv TO test.kafka_persistent AS + SELECT key, value + FROM test.kafka; + + CREATE MATERIALIZED VIEW test.persistent_kafka_mv2 TO test.kafka_persistent2 AS + SELECT key, value + FROM test.kafka2; + """ + ) + + producer.send(topic=topic, value=json.dumps({"key": 1, "value": 1}), partition=0) + producer.send(topic=topic, value=json.dumps({"key": 11, "value": 11}), partition=1) + time.sleep(3) + + retries = 0 + result_rdkafka_stat = "" + while True: + result_rdkafka_stat = instance.query( + """ + SELECT table, JSONExtractString(rdkafka_stat, 'type') + FROM system.kafka_consumers WHERE database='test' and table = 'kafka' format Vertical; + """ + ) + if result_rdkafka_stat.find("consumer") or retries > max_retries: + break + retries += 1 + time.sleep(1) + + assert ( + result_rdkafka_stat + == """Row 1: +────── +table: kafka +JSONExtractString(rdkafka_stat, 'type'): consumer +""" + ) + + instance.query("DROP TABLE test.kafka") + instance.query("DROP TABLE test.kafka2") + instance.query("DROP TABLE test.kafka_persistent") + instance.query("DROP TABLE test.kafka_persistent2") + instance.query("DROP TABLE test.persistent_kafka_mv") + instance.query("DROP TABLE test.persistent_kafka_mv2") + + kafka_delete_topic(admin_client, topic) + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_nats/clickhouse_path/format_schemas/nats.proto b/tests/integration/test_storage_nats/clickhouse_path/format_schemas/nats.proto deleted file mode 100755 index 090ed917cdd..00000000000 --- a/tests/integration/test_storage_nats/clickhouse_path/format_schemas/nats.proto +++ /dev/null @@ -1,6 +0,0 @@ -syntax = "proto3"; - - message ProtoKeyValue { - uint64 key = 1; - string value = 2; - } diff --git a/tests/integration/test_storage_nats/configs/macros.xml b/tests/integration/test_storage_nats/configs/macros.xml deleted file mode 100644 index 4aa547e049e..00000000000 --- a/tests/integration/test_storage_nats/configs/macros.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - nats1:4444 - macro - JSONEachRow - - diff --git a/tests/integration/test_storage_nats/configs/named_collection.xml b/tests/integration/test_storage_nats/configs/named_collection.xml deleted file mode 100644 index 15817f321f0..00000000000 --- a/tests/integration/test_storage_nats/configs/named_collection.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - nats1:4444 - named - JSONEachRow - 111 - 12 - click - house - - - diff --git a/tests/integration/test_storage_nats/configs/nats.xml b/tests/integration/test_storage_nats/configs/nats.xml deleted file mode 100644 index 0a8be9122ad..00000000000 --- a/tests/integration/test_storage_nats/configs/nats.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - click - house - - diff --git a/tests/integration/test_storage_nats/nats_certs.sh b/tests/integration/test_storage_nats/nats_certs.sh deleted file mode 100755 index 689221c39e4..00000000000 --- a/tests/integration/test_storage_nats/nats_certs.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -set -euxo pipefail - -mkdir -p "${NATS_CERT_DIR}/ca" -mkdir -p "${NATS_CERT_DIR}/nats" -openssl req -newkey rsa:4096 -x509 -days 365 -nodes -batch -keyout "${NATS_CERT_DIR}/ca/ca-key.pem" -out "${NATS_CERT_DIR}/ca/ca-cert.pem" -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=ca" -openssl req -newkey rsa:4096 -nodes -batch -keyout "${NATS_CERT_DIR}/nats/server-key.pem" -out "${NATS_CERT_DIR}/nats/server-req.pem" -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=server" -openssl x509 -req -days 365 -in "${NATS_CERT_DIR}/nats/server-req.pem" -CA "${NATS_CERT_DIR}/ca/ca-cert.pem" -CAkey "${NATS_CERT_DIR}/ca/ca-key.pem" -CAcreateserial -out "${NATS_CERT_DIR}/nats/server-cert.pem" -extfile <( -cat <<-EOF -subjectAltName = DNS:localhost, DNS:nats1 -EOF -) -rm -f "${NATS_CERT_DIR}/nats/server-req.pem" diff --git a/tests/integration/test_storage_nats/nats_pb2.py b/tests/integration/test_storage_nats/nats_pb2.py deleted file mode 100644 index e9e5cb72363..00000000000 --- a/tests/integration/test_storage_nats/nats_pb2.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: clickhouse_path/format_schemas/nats.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n)clickhouse_path/format_schemas/nats.proto"+\n\rProtoKeyValue\x12\x0b\n\x03key\x18\x01 \x01(\x04\x12\r\n\x05value\x18\x02 \x01(\tb\x06proto3' -) - - -_PROTOKEYVALUE = DESCRIPTOR.message_types_by_name["ProtoKeyValue"] -ProtoKeyValue = _reflection.GeneratedProtocolMessageType( - "ProtoKeyValue", - (_message.Message,), - { - "DESCRIPTOR": _PROTOKEYVALUE, - "__module__": "clickhouse_path.format_schemas.nats_pb2" - # @@protoc_insertion_point(class_scope:ProtoKeyValue) - }, -) -_sym_db.RegisterMessage(ProtoKeyValue) - -if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None - _PROTOKEYVALUE._serialized_start = 45 - _PROTOKEYVALUE._serialized_end = 88 -# @@protoc_insertion_point(module_scope) diff --git a/tests/integration/test_storage_nats/test.py b/tests/integration/test_storage_nats/test.py deleted file mode 100644 index 4d7e4cf813d..00000000000 --- a/tests/integration/test_storage_nats/test.py +++ /dev/null @@ -1,1875 +0,0 @@ -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/39185 - -pytestmark = pytest.mark.skip - -import json -import os.path as p -import random -import subprocess -import threading -import logging -import time -from random import randrange -import math - -import asyncio -from google.protobuf.internal.encoder import _VarintBytes -from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster, check_nats_is_available, nats_connect_ssl -from helpers.test_tools import TSV - -from . import nats_pb2 - -cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance( - "instance", - main_configs=[ - "configs/nats.xml", - "configs/macros.xml", - "configs/named_collection.xml", - ], - user_configs=["configs/users.xml"], - with_nats=True, - clickhouse_path_dir="clickhouse_path", -) - - -# Helpers - - -def wait_nats_to_start(nats_port, ssl_ctx=None, timeout=180): - start = time.time() - while time.time() - start < timeout: - try: - if asyncio.run(check_nats_is_available(nats_port, ssl_ctx=ssl_ctx)): - logging.debug("NATS is available") - return - time.sleep(0.5) - except Exception as ex: - logging.debug("Can't connect to NATS " + str(ex)) - time.sleep(0.5) - - -def nats_check_result(result, check=False, ref_file="test_nats_json.reference"): - fpath = p.join(p.dirname(__file__), ref_file) - with open(fpath) as reference: - if check: - assert TSV(result) == TSV(reference) - else: - return TSV(result) == TSV(reference) - - -def kill_nats(nats_id): - p = subprocess.Popen(("docker", "stop", nats_id), stdout=subprocess.PIPE) - p.communicate() - return p.returncode == 0 - - -def revive_nats(nats_id, nats_port): - p = subprocess.Popen(("docker", "start", nats_id), stdout=subprocess.PIPE) - p.communicate() - wait_nats_to_start(nats_port) - - -# Fixtures - - -@pytest.fixture(scope="module") -def nats_cluster(): - try: - cluster.start() - logging.debug("nats_id is {}".format(instance.cluster.nats_docker_id)) - instance.query("CREATE DATABASE test") - - yield cluster - - finally: - cluster.shutdown() - - -@pytest.fixture(autouse=True) -def nats_setup_teardown(): - print("NATS is available - running test") - yield # run test - instance.query("DROP DATABASE test SYNC") - instance.query("CREATE DATABASE test") - - -# Tests - - -async def nats_produce_messages(cluster_inst, subject, messages=(), bytes=None): - nc = await nats_connect_ssl( - cluster_inst.nats_port, - user="click", - password="house", - ssl_ctx=cluster_inst.nats_ssl_context, - ) - logging.debug("NATS connection status: " + str(nc.is_connected)) - - for message in messages: - await nc.publish(subject, message.encode()) - if bytes is not None: - await nc.publish(subject, bytes) - logging.debug("Finished publising to " + subject) - - await nc.close() - return messages - - -def check_table_is_ready(instance, table_name): - try: - instance.query("SELECT * FROM {}".format(table_name)) - return True - except Exception: - return False - - -def test_nats_select(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'select', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "select", messages)) - - # The order of messages in select * from test.nats is not guaranteed, so sleep to collect everything in one select - time.sleep(1) - - result = "" - while True: - result += instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_select_empty(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'empty', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - """ - ) - - assert int(instance.query("SELECT count() FROM test.nats")) == 0 - - -def test_nats_json_without_delimiter(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'json', - nats_format = 'JSONEachRow'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = "" - for i in range(25): - messages += json.dumps({"key": i, "value": i}) + "\n" - - all_messages = [messages] - asyncio.run(nats_produce_messages(nats_cluster, "json", all_messages)) - - messages = "" - for i in range(25, 50): - messages += json.dumps({"key": i, "value": i}) + "\n" - all_messages = [messages] - asyncio.run(nats_produce_messages(nats_cluster, "json", all_messages)) - - time.sleep(1) - - result = "" - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result += instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_csv_with_delimiter(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'csv', - nats_format = 'CSV', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append("{i}, {i}".format(i=i)) - - asyncio.run(nats_produce_messages(nats_cluster, "csv", messages)) - - time.sleep(1) - - result = "" - for _ in range(60): - result += instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_tsv_with_delimiter(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'tsv', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append("{i}\t{i}".format(i=i)) - - asyncio.run(nats_produce_messages(nats_cluster, "tsv", messages)) - - result = "" - for _ in range(60): - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - -# - - -def test_nats_macros(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = '{nats_url}', - nats_subjects = '{nats_subjects}', - nats_format = '{nats_format}' - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - message = "" - for i in range(50): - message += json.dumps({"key": i, "value": i}) + "\n" - asyncio.run(nats_produce_messages(nats_cluster, "macro", [message])) - - time.sleep(1) - - result = "" - for _ in range(60): - result += instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_materialized_view(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'mv', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - - CREATE TABLE test.view2 (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS - SELECT * FROM test.nats group by (key, value); - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT * FROM test.view2 ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_materialized_view_with_subquery(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'mvsq', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM (SELECT * FROM test.nats); - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mvsq", messages)) - - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_many_materialized_views(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.view1; - DROP TABLE IF EXISTS test.view2; - DROP TABLE IF EXISTS test.consumer1; - DROP TABLE IF EXISTS test.consumer2; - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'mmv', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view1 (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE TABLE test.view2 (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer1 TO test.view1 AS - SELECT * FROM test.nats; - CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mmv", messages)) - - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result1 = instance.query("SELECT * FROM test.view1 ORDER BY key") - result2 = instance.query("SELECT * FROM test.view2 ORDER BY key") - if nats_check_result(result1) and nats_check_result(result2): - break - - instance.query( - """ - DROP TABLE test.consumer1; - DROP TABLE test.consumer2; - DROP TABLE test.view1; - DROP TABLE test.view2; - """ - ) - - nats_check_result(result1, True) - nats_check_result(result2, True) - - -def test_nats_protobuf(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value String) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'pb', - nats_format = 'Protobuf', - nats_schema = 'nats.proto:ProtoKeyValue'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - data = b"" - for i in range(0, 20): - msg = nats_pb2.ProtoKeyValue() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - asyncio.run(nats_produce_messages(nats_cluster, "pb", bytes=data)) - data = b"" - for i in range(20, 21): - msg = nats_pb2.ProtoKeyValue() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - asyncio.run(nats_produce_messages(nats_cluster, "pb", bytes=data)) - data = b"" - for i in range(21, 50): - msg = nats_pb2.ProtoKeyValue() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - asyncio.run(nats_produce_messages(nats_cluster, "pb", bytes=data)) - - result = "" - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_big_message(nats_cluster): - # Create batchs of messages of size ~100Kb - nats_messages = 1000 - batch_messages = 1000 - messages = [ - json.dumps({"key": i, "value": "x" * 100}) * batch_messages - for i in range(nats_messages) - ] - - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value String) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'big', - nats_format = 'JSONEachRow'; - CREATE TABLE test.view (key UInt64, value String) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - asyncio.run(nats_produce_messages(nats_cluster, "big", messages)) - - while True: - result = instance.query("SELECT count() FROM test.view") - if int(result) == batch_messages * nats_messages: - break - - assert ( - int(result) == nats_messages * batch_messages - ), "ClickHouse lost some messages: {}".format(result) - - -def test_nats_mv_combo(nats_cluster): - NUM_MV = 5 - NUM_CONSUMERS = 4 - - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'combo', - nats_num_consumers = {}, - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - """.format( - NUM_CONSUMERS - ) - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - for mv_id in range(NUM_MV): - instance.query( - """ - DROP TABLE IF EXISTS test.combo_{0}; - DROP TABLE IF EXISTS test.combo_{0}_mv; - CREATE TABLE test.combo_{0} (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.combo_{0}_mv TO test.combo_{0} AS - SELECT * FROM test.nats; - """.format( - mv_id - ) - ) - - time.sleep(2) - - i = [0] - messages_num = 10000 - - def produce(): - messages = [] - for _ in range(messages_num): - messages.append(json.dumps({"key": i[0], "value": i[0]})) - i[0] += 1 - asyncio.run(nats_produce_messages(nats_cluster, "combo", messages)) - - threads = [] - threads_num = 20 - - for _ in range(threads_num): - threads.append(threading.Thread(target=produce)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - while True: - result = 0 - for mv_id in range(NUM_MV): - result += int( - instance.query("SELECT count() FROM test.combo_{0}".format(mv_id)) - ) - if int(result) == messages_num * threads_num * NUM_MV: - break - time.sleep(1) - - for thread in threads: - thread.join() - - for mv_id in range(NUM_MV): - instance.query( - """ - DROP TABLE test.combo_{0}_mv; - DROP TABLE test.combo_{0}; - """.format( - mv_id - ) - ) - - assert ( - int(result) == messages_num * threads_num * NUM_MV - ), "ClickHouse lost some messages: {}".format(result) - - -def test_nats_insert(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'insert', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - values = [] - for i in range(50): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("insert") - await sub.unsubscribe(50) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - while True: - try: - instance.query("INSERT INTO test.nats VALUES {}".format(values)) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - thread.join() - - result = "\n".join(insert_messages) - nats_check_result(result, True) - - -def test_nats_many_subjects_insert_wrong(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'insert1,insert2.>,insert3.*.foo', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - values = [] - for i in range(50): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - # no subject specified - instance.query_and_get_error("INSERT INTO test.nats VALUES {}".format(values)) - - # can't insert into wildcard subjects - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert2.>' VALUES {}".format( - values - ) - ) - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert3.*.foo' VALUES {}".format( - values - ) - ) - - # specified subject is not among engine's subjects - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert4' VALUES {}".format( - values - ) - ) - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert3.foo.baz' VALUES {}".format( - values - ) - ) - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='foo.insert2' VALUES {}".format( - values - ) - ) - - -def test_nats_many_subjects_insert_right(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'right_insert1,right_insert2', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - values = [] - for i in range(50): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("right_insert1") - await sub.unsubscribe(50) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - while True: - try: - instance.query( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='right_insert1' VALUES {}".format( - values - ) - ) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - thread.join() - - result = "\n".join(insert_messages) - nats_check_result(result, True) - - -def test_nats_many_inserts(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.nats_many; - DROP TABLE IF EXISTS test.nats_consume; - DROP TABLE IF EXISTS test.view_many; - DROP TABLE IF EXISTS test.consumer_many; - CREATE TABLE test.nats_many (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'many_inserts', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.nats_consume (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'many_inserts', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view_many (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer_many TO test.view_many AS - SELECT * FROM test.nats_consume; - """ - ) - while not check_table_is_ready(instance, "test.nats_consume"): - logging.debug("Table test.nats_consume is not yet ready") - time.sleep(0.5) - - messages_num = 10000 - values = [] - for i in range(messages_num): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - def insert(): - while True: - try: - instance.query("INSERT INTO test.nats_many VALUES {}".format(values)) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - - threads = [] - threads_num = 10 - for _ in range(threads_num): - threads.append(threading.Thread(target=insert)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - for thread in threads: - thread.join() - - time_limit_sec = 300 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT count() FROM test.view_many") - print(result, messages_num * threads_num) - if int(result) >= messages_num * threads_num: - break - time.sleep(1) - - instance.query( - """ - DROP TABLE test.nats_consume; - DROP TABLE test.nats_many; - DROP TABLE test.consumer_many; - DROP TABLE test.view_many; - """ - ) - - assert ( - int(result) == messages_num * threads_num - ), "ClickHouse lost some messages or got duplicated ones. Total count: {}".format( - result - ) - - -def test_nats_overloaded_insert(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.view_overload; - DROP TABLE IF EXISTS test.consumer_overload; - DROP TABLE IF EXISTS test.nats_consume; - CREATE TABLE test.nats_consume (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'over', - nats_num_consumers = 5, - nats_max_block_size = 10000, - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.nats_overload (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'over', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view_overload (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key - SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3, - cleanup_thread_preferred_points_per_iteration=0; - CREATE MATERIALIZED VIEW test.consumer_overload TO test.view_overload AS - SELECT * FROM test.nats_consume; - """ - ) - while not check_table_is_ready(instance, "test.nats_consume"): - logging.debug("Table test.nats_consume is not yet ready") - time.sleep(0.5) - - messages_num = 100000 - - def insert(): - values = [] - for i in range(messages_num): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - while True: - try: - instance.query( - "INSERT INTO test.nats_overload VALUES {}".format(values) - ) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - - threads = [] - threads_num = 5 - for _ in range(threads_num): - threads.append(threading.Thread(target=insert)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - time_limit_sec = 300 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT count() FROM test.view_overload") - time.sleep(1) - if int(result) >= messages_num * threads_num: - break - - instance.query( - """ - DROP TABLE test.consumer_overload; - DROP TABLE test.view_overload; - DROP TABLE test.nats_consume; - DROP TABLE test.nats_overload; - """ - ) - - for thread in threads: - thread.join() - - assert ( - int(result) == messages_num * threads_num - ), "ClickHouse lost some messages or got duplicated ones. Total count: {}".format( - result - ) - - -def test_nats_virtual_column(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats_virtuals (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'virtuals', - nats_format = 'JSONEachRow'; - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT value, key, _subject FROM test.nats_virtuals; - """ - ) - while not check_table_is_ready(instance, "test.nats_virtuals"): - logging.debug("Table test.nats_virtuals is not yet ready") - time.sleep(0.5) - - message_num = 10 - i = 0 - messages = [] - for _ in range(message_num): - messages.append(json.dumps({"key": i, "value": i})) - i += 1 - - asyncio.run(nats_produce_messages(nats_cluster, "virtuals", messages)) - - while True: - result = instance.query("SELECT count() FROM test.view") - time.sleep(1) - if int(result) == message_num: - break - - result = instance.query( - """ - SELECT key, value, _subject - FROM test.view ORDER BY key - """ - ) - - expected = """\ -0 0 virtuals -1 1 virtuals -2 2 virtuals -3 3 virtuals -4 4 virtuals -5 5 virtuals -6 6 virtuals -7 7 virtuals -8 8 virtuals -9 9 virtuals -""" - - instance.query( - """ - DROP TABLE test.nats_virtuals; - DROP TABLE test.view; - """ - ) - - assert TSV(result) == TSV(expected) - - -def test_nats_virtual_column_with_materialized_view(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats_virtuals_mv (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'virtuals_mv', - nats_format = 'JSONEachRow'; - CREATE TABLE test.view (key UInt64, value UInt64, subject String) ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT *, _subject as subject - FROM test.nats_virtuals_mv; - """ - ) - while not check_table_is_ready(instance, "test.nats_virtuals_mv"): - logging.debug("Table test.nats_virtuals_mv is not yet ready") - time.sleep(0.5) - - message_num = 10 - i = 0 - messages = [] - for _ in range(message_num): - messages.append(json.dumps({"key": i, "value": i})) - i += 1 - - asyncio.run(nats_produce_messages(nats_cluster, "virtuals_mv", messages)) - - while True: - result = instance.query("SELECT count() FROM test.view") - time.sleep(1) - if int(result) == message_num: - break - - result = instance.query("SELECT key, value, subject FROM test.view ORDER BY key") - expected = """\ -0 0 virtuals_mv -1 1 virtuals_mv -2 2 virtuals_mv -3 3 virtuals_mv -4 4 virtuals_mv -5 5 virtuals_mv -6 6 virtuals_mv -7 7 virtuals_mv -8 8 virtuals_mv -9 9 virtuals_mv -""" - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - DROP TABLE test.nats_virtuals_mv - """ - ) - - assert TSV(result) == TSV(expected) - - -def test_nats_many_consumers_to_each_queue(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.destination; - CREATE TABLE test.destination(key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - """ - ) - - num_tables = 4 - for table_id in range(num_tables): - print(("Setting up table {}".format(table_id))) - instance.query( - """ - DROP TABLE IF EXISTS test.many_consumers_{0}; - DROP TABLE IF EXISTS test.many_consumers_{0}_mv; - CREATE TABLE test.many_consumers_{0} (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'many_consumers', - nats_num_consumers = 2, - nats_queue_group = 'many_consumers', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - CREATE MATERIALIZED VIEW test.many_consumers_{0}_mv TO test.destination AS - SELECT key, value FROM test.many_consumers_{0}; - """.format( - table_id - ) - ) - while not check_table_is_ready( - instance, "test.many_consumers_{}".format(table_id) - ): - logging.debug( - "Table test.many_consumers_{} is not yet ready".format(table_id) - ) - time.sleep(0.5) - - i = [0] - messages_num = 1000 - - def produce(): - messages = [] - for _ in range(messages_num): - messages.append(json.dumps({"key": i[0], "value": i[0]})) - i[0] += 1 - asyncio.run(nats_produce_messages(nats_cluster, "many_consumers", messages)) - - threads = [] - threads_num = 20 - - for _ in range(threads_num): - threads.append(threading.Thread(target=produce)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - result1 = "" - while True: - result1 = instance.query("SELECT count() FROM test.destination") - time.sleep(1) - if int(result1) == messages_num * threads_num: - break - - for thread in threads: - thread.join() - - for consumer_id in range(num_tables): - instance.query( - """ - DROP TABLE test.many_consumers_{0}; - DROP TABLE test.many_consumers_{0}_mv; - """.format( - consumer_id - ) - ) - - instance.query( - """ - DROP TABLE test.destination; - """ - ) - - assert ( - int(result1) == messages_num * threads_num - ), "ClickHouse lost some messages: {}".format(result1) - - -def test_nats_restore_failed_connection_without_losses_on_write(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.consume; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key; - CREATE TABLE test.consume (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'producer_reconnect', - nats_format = 'JSONEachRow', - nats_num_consumers = 2, - nats_row_delimiter = '\\n'; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.consume; - DROP TABLE IF EXISTS test.producer_reconnect; - CREATE TABLE test.producer_reconnect (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'producer_reconnect', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.consume"): - logging.debug("Table test.consume is not yet ready") - time.sleep(0.5) - - messages_num = 100000 - values = [] - for i in range(messages_num): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - while True: - try: - instance.query( - "INSERT INTO test.producer_reconnect VALUES {}".format(values) - ) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - - while int(instance.query("SELECT count() FROM test.view")) == 0: - time.sleep(0.1) - - kill_nats(nats_cluster.nats_docker_id) - time.sleep(4) - revive_nats(nats_cluster.nats_docker_id, nats_cluster.nats_port) - - while True: - result = instance.query("SELECT count(DISTINCT key) FROM test.view") - time.sleep(1) - if int(result) == messages_num: - break - - instance.query( - """ - DROP TABLE test.consume; - DROP TABLE test.producer_reconnect; - """ - ) - - assert int(result) == messages_num, "ClickHouse lost some messages: {}".format( - result - ) - - -def test_nats_no_connection_at_startup_1(nats_cluster): - # no connection when table is initialized - nats_cluster.pause_container("nats1") - instance.query_and_get_error( - """ - CREATE TABLE test.cs (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'cs', - nats_format = 'JSONEachRow', - nats_num_consumers = '5', - nats_row_delimiter = '\\n'; - """ - ) - nats_cluster.unpause_container("nats1") - - -def test_nats_no_connection_at_startup_2(nats_cluster): - instance.query( - """ - CREATE TABLE test.cs (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'cs', - nats_format = 'JSONEachRow', - nats_num_consumers = '5', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.cs; - """ - ) - - instance.query("DETACH TABLE test.cs") - nats_cluster.pause_container("nats1") - instance.query("ATTACH TABLE test.cs") - nats_cluster.unpause_container("nats1") - while not check_table_is_ready(instance, "test.cs"): - logging.debug("Table test.cs is not yet ready") - time.sleep(0.5) - - messages_num = 1000 - messages = [] - for i in range(messages_num): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "cs", messages)) - - for _ in range(20): - result = instance.query("SELECT count() FROM test.view") - time.sleep(1) - if int(result) == messages_num: - break - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.cs; - """ - ) - - assert int(result) == messages_num, "ClickHouse lost some messages: {}".format( - result - ) - - -def test_nats_format_factory_settings(nats_cluster): - instance.query( - """ - CREATE TABLE test.format_settings ( - id String, date DateTime - ) ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'format_settings', - nats_format = 'JSONEachRow', - date_time_input_format = 'best_effort'; - """ - ) - while not check_table_is_ready(instance, "test.format_settings"): - logging.debug("Table test.format_settings is not yet ready") - time.sleep(0.5) - - message = json.dumps( - {"id": "format_settings_test", "date": "2021-01-19T14:42:33.1829214Z"} - ) - expected = instance.query( - """SELECT parseDateTimeBestEffort(CAST('2021-01-19T14:42:33.1829214Z', 'String'))""" - ) - - asyncio.run(nats_produce_messages(nats_cluster, "format_settings", [message])) - - while True: - result = instance.query("SELECT date FROM test.format_settings") - if result == expected: - break - - instance.query( - """ - CREATE TABLE test.view ( - id String, date DateTime - ) ENGINE = MergeTree ORDER BY id; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.format_settings; - """ - ) - - asyncio.run(nats_produce_messages(nats_cluster, "format_settings", [message])) - while True: - result = instance.query("SELECT date FROM test.view") - if result == expected: - break - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.format_settings; - """ - ) - - assert result == expected - - -def test_nats_bad_args(nats_cluster): - instance.query_and_get_error( - """ - CREATE TABLE test.drop (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_secure = true, - nats_format = 'JSONEachRow'; - """ - ) - - -def test_nats_drop_mv(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'mv', - nats_format = 'JSONEachRow'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(20): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - instance.query("DROP VIEW test.consumer") - messages = [] - for i in range(20, 40): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - instance.query( - """ - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - messages = [] - for i in range(40, 50): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - while True: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - instance.query("DROP VIEW test.consumer") - messages = [] - for i in range(50, 60): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - count = 0 - while True: - count = int(instance.query("SELECT count() FROM test.nats")) - if count: - break - - assert count > 0 - - -def test_nats_predefined_configuration(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS(nats1) """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - asyncio.run( - nats_produce_messages( - nats_cluster, "named", [json.dumps({"key": 1, "value": 2})] - ) - ) - while True: - result = instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if result == "1\t2\n": - break - - -def test_format_with_prefix_and_suffix(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'custom', - nats_format = 'CustomSeparated'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("custom") - await sub.unsubscribe(2) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - instance.query( - "INSERT INTO test.nats select number*10 as key, number*100 as value from numbers(2) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" - ) - - thread.join() - - assert ( - "".join(insert_messages) - == "\n0\t0\n\n\n10\t100\n\n" - ) - - -def test_max_rows_per_message(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'custom1', - nats_format = 'CustomSeparated', - nats_max_rows_per_message = 3, - format_custom_result_before_delimiter = '\n', - format_custom_result_after_delimiter = '\n'; - - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - num_rows = 5 - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("custom1") - await sub.unsubscribe(2) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - instance.query( - f"INSERT INTO test.nats select number*10 as key, number*100 as value from numbers({num_rows}) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" - ) - - thread.join() - - assert ( - "".join(insert_messages) - == "\n0\t0\n10\t100\n20\t200\n\n\n30\t300\n40\t400\n\n" - ) - - attempt = 0 - rows = 0 - while attempt < 100: - rows = int(instance.query("SELECT count() FROM test.view")) - if rows == num_rows: - break - attempt += 1 - - assert rows == num_rows - - result = instance.query("SELECT * FROM test.view") - assert result == "0\t0\n10\t100\n20\t200\n30\t300\n40\t400\n" - - -def test_row_based_formats(nats_cluster): - num_rows = 10 - - for format_name in [ - "TSV", - "TSVWithNamesAndTypes", - "TSKV", - "CSV", - "CSVWithNamesAndTypes", - "CustomSeparatedWithNamesAndTypes", - "Values", - "JSON", - "JSONEachRow", - "JSONCompactEachRow", - "JSONCompactEachRowWithNamesAndTypes", - "JSONObjectEachRow", - "Avro", - "RowBinary", - "RowBinaryWithNamesAndTypes", - "MsgPack", - ]: - print(format_name) - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = '{format_name}', - nats_format = '{format_name}'; - - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value FROM test.nats; - """ - ) - - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - insert_messages = 0 - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe(format_name) - await sub.unsubscribe(2) - async for msg in sub.messages: - nonlocal insert_messages - insert_messages += 1 - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - instance.query( - f"INSERT INTO test.nats select number*10 as key, number*100 as value from numbers({num_rows})" - ) - - thread.join() - - assert insert_messages == 2 - - attempt = 0 - rows = 0 - while attempt < 100: - rows = int(instance.query("SELECT count() FROM test.view")) - if rows == num_rows: - break - attempt += 1 - - assert rows == num_rows - - expected = "" - for i in range(num_rows): - expected += str(i * 10) + "\t" + str(i * 100) + "\n" - - result = instance.query("SELECT * FROM test.view") - assert result == expected - - -def test_block_based_formats_1(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'PrettySpace', - nats_format = 'PrettySpace'; - """ - ) - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("PrettySpace") - await sub.unsubscribe(3) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - attempt = 0 - while attempt < 100: - try: - instance.query( - "INSERT INTO test.nats SELECT number * 10 as key, number * 100 as value FROM numbers(5) settings max_block_size=2, optimize_trivial_insert_select=0;" - ) - break - except Exception: - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - attempt += 1 - thread.join() - - data = [] - for message in insert_messages: - splitted = message.split("\n") - assert splitted[0] == " \x1b[1mkey\x1b[0m \x1b[1mvalue\x1b[0m" - assert splitted[1] == "" - assert splitted[-1] == "" - data += [line.split() for line in splitted[2:-1]] - - assert data == [ - ["0", "0"], - ["10", "100"], - ["20", "200"], - ["30", "300"], - ["40", "400"], - ] - - -def test_block_based_formats_2(nats_cluster): - num_rows = 100 - - for format_name in [ - "JSONColumns", - "Native", - "Arrow", - "Parquet", - "ORC", - "JSONCompactColumns", - ]: - print(format_name) - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = '{format_name}', - nats_format = '{format_name}'; - - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value FROM test.nats; - """ - ) - - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - insert_messages = 0 - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe(format_name) - await sub.unsubscribe(9) - async for msg in sub.messages: - nonlocal insert_messages - insert_messages += 1 - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - instance.query( - f"INSERT INTO test.nats SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0;" - ) - - thread.join() - - assert insert_messages == 9 - - attempt = 0 - rows = 0 - while attempt < 100: - rows = int(instance.query("SELECT count() FROM test.view")) - if rows == num_rows: - break - attempt += 1 - - assert rows == num_rows - - result = instance.query("SELECT * FROM test.view ORDER by key") - expected = "" - for i in range(num_rows): - expected += str(i * 10) + "\t" + str(i * 100) + "\n" - assert result == expected - - -if __name__ == "__main__": - cluster.start() - input("Cluster created, press any key to destroy...") - cluster.shutdown() diff --git a/tests/integration/test_storage_nats/test_nats_json.reference b/tests/integration/test_storage_nats/test_nats_json.reference deleted file mode 100644 index 959bb2aad74..00000000000 --- a/tests/integration/test_storage_nats/test_nats_json.reference +++ /dev/null @@ -1,50 +0,0 @@ -0 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 6 -7 7 -8 8 -9 9 -10 10 -11 11 -12 12 -13 13 -14 14 -15 15 -16 16 -17 17 -18 18 -19 19 -20 20 -21 21 -22 22 -23 23 -24 24 -25 25 -26 26 -27 27 -28 28 -29 29 -30 30 -31 31 -32 32 -33 33 -34 34 -35 35 -36 36 -37 37 -38 38 -39 39 -40 40 -41 41 -42 42 -43 43 -44 44 -45 45 -46 46 -47 47 -48 48 -49 49 diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 686eb1ea751..11729a5ab18 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -26,6 +26,10 @@ def started_cluster(): cluster.start() node1.query("CREATE DATABASE test") node2.query("CREATE DATABASE test") + # Wait for the PostgreSQL handler to start. + # cluster.start waits until port 9000 becomes accessible. + # Server opens the PostgreSQL compatibility port a bit later. + node1.wait_for_log_line("PostgreSQL compatibility protocol") yield cluster finally: @@ -726,6 +730,22 @@ def test_auto_close_connection(started_cluster): assert count == 2 +def test_literal_escaping(started_cluster): + cursor = started_cluster.postgres_conn.cursor() + cursor.execute(f"DROP TABLE IF EXISTS escaping") + cursor.execute(f"CREATE TABLE escaping(text varchar(255))") + node1.query( + "CREATE TABLE default.escaping (text String) ENGINE = PostgreSQL('postgres1:5432', 'postgres', 'escaping', 'postgres', 'mysecretpassword')" + ) + node1.query("SELECT * FROM escaping WHERE text = ''''") # ' -> '' + node1.query("SELECT * FROM escaping WHERE text = '\\''") # ' -> '' + node1.query("SELECT * FROM escaping WHERE text = '\\\\\\''") # \' -> \'' + node1.query("SELECT * FROM escaping WHERE text = '\\\\\\''") # \' -> \'' + node1.query("SELECT * FROM escaping WHERE text like '%a''a%'") # %a'a% -> %a''a% + node1.query("SELECT * FROM escaping WHERE text like '%a\\'a%'") # %a'a% -> %a''a% + cursor.execute(f"DROP TABLE escaping") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_postgresql_replica/configs/log_conf.xml b/tests/integration/test_storage_postgresql_replica/configs/log_conf.xml deleted file mode 100644 index 27c7107ce5e..00000000000 --- a/tests/integration/test_storage_postgresql_replica/configs/log_conf.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_storage_postgresql_replica/test.py b/tests/integration/test_storage_postgresql_replica/test.py deleted file mode 100644 index 66495700102..00000000000 --- a/tests/integration/test_storage_postgresql_replica/test.py +++ /dev/null @@ -1,780 +0,0 @@ -import pytest - -# FIXME Tests with MaterializedPostgresSQL are temporarily disabled -# https://github.com/ClickHouse/ClickHouse/issues/36898 -# https://github.com/ClickHouse/ClickHouse/issues/38677 -# https://github.com/ClickHouse/ClickHouse/pull/39272#issuecomment-1190087190 - -pytestmark = pytest.mark.skip - -import time -import psycopg2 -import os.path as p - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry -from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT -from helpers.test_tools import TSV - -import threading - -cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance( - "instance", - main_configs=["configs/log_conf.xml"], - with_postgres=True, - stay_alive=True, -) - -postgres_table_template = """ - CREATE TABLE IF NOT EXISTS {} ( - key Integer NOT NULL, value Integer, PRIMARY KEY(key)) - """ - -queries = [ - "INSERT INTO {} select i, i from generate_series(0, 10000) as t(i);", - "DELETE FROM {} WHERE (value*value) % 3 = 0;", - "UPDATE {} SET value = value + 125 WHERE key % 2 = 0;", - "UPDATE {} SET key=key+20000 WHERE key%2=0", - "INSERT INTO {} select i, i from generate_series(40000, 50000) as t(i);", - "DELETE FROM {} WHERE key % 10 = 0;", - "UPDATE {} SET value = value + 101 WHERE key % 2 = 1;", - "UPDATE {} SET key=key+80000 WHERE key%2=1", - "DELETE FROM {} WHERE value % 2 = 0;", - "UPDATE {} SET value = value + 2000 WHERE key % 5 = 0;", - "INSERT INTO {} select i, i from generate_series(200000, 250000) as t(i);", - "DELETE FROM {} WHERE value % 3 = 0;", - "UPDATE {} SET value = value * 2 WHERE key % 3 = 0;", - "UPDATE {} SET key=key+500000 WHERE key%2=1", - "INSERT INTO {} select i, i from generate_series(1000000, 1050000) as t(i);", - "DELETE FROM {} WHERE value % 9 = 2;", - "UPDATE {} SET key=key+10000000", - "UPDATE {} SET value = value + 2 WHERE key % 3 = 1;", - "DELETE FROM {} WHERE value%5 = 0;", -] - - -@pytest.mark.timeout(30) -def check_tables_are_synchronized( - table_name, order_by="key", postgres_database="postgres_database" -): - while True: - expected = instance.query( - "select * from {}.{} order by {};".format( - postgres_database, table_name, order_by - ) - ) - result = instance.query( - "select * from test.{} order by {};".format(table_name, order_by) - ) - if result == expected: - break - - assert result == expected - - -def get_postgres_conn( - ip, port, database=False, auto_commit=True, database_name="postgres_database" -): - if database == True: - conn_string = "host={} port={} dbname='{}' user='postgres' password='mysecretpassword'".format( - ip, port, database_name - ) - else: - conn_string = ( - "host={} port={} user='postgres' password='mysecretpassword'".format( - ip, port - ) - ) - - conn = psycopg2.connect(conn_string) - if auto_commit: - conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) - conn.autocommit = True - return conn - - -def create_postgres_db(cursor, name): - cursor.execute("CREATE DATABASE {}".format(name)) - - -def create_clickhouse_postgres_db(ip, port, name="postgres_database"): - instance.query( - """ - CREATE DATABASE {} - ENGINE = PostgreSQL('{}:{}', '{}', 'postgres', 'mysecretpassword')""".format( - name, ip, port, name - ) - ) - - -def create_materialized_table(ip, port, table_name="postgresql_replica"): - instance.query( - f""" - CREATE TABLE test.{table_name} (key Int64, value Int64) - ENGINE = MaterializedPostgreSQL( - '{ip}:{port}', 'postgres_database', '{table_name}', 'postgres', 'mysecretpassword') - PRIMARY KEY key; """ - ) - - -def create_postgres_table(cursor, table_name, replica_identity_full=False): - cursor.execute("DROP TABLE IF EXISTS {}".format(table_name)) - cursor.execute(postgres_table_template.format(table_name)) - if replica_identity_full: - cursor.execute("ALTER TABLE {} REPLICA IDENTITY FULL;".format(table_name)) - - -def postgresql_replica_check_result( - result, check=False, ref_file="test_postgresql_replica.reference" -): - fpath = p.join(p.dirname(__file__), ref_file) - with open(fpath) as reference: - if check: - assert TSV(result) == TSV(reference) - else: - return TSV(result) == TSV(reference) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - conn = get_postgres_conn(ip=cluster.postgres_ip, port=cluster.postgres_port) - cursor = conn.cursor() - create_postgres_db(cursor, "postgres_database") - create_clickhouse_postgres_db( - ip=cluster.postgres_ip, port=cluster.postgres_port - ) - - instance.query("CREATE DATABASE test") - yield cluster - - finally: - cluster.shutdown() - - -@pytest.mark.timeout(320) -def test_initial_load_from_snapshot(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.2) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -@pytest.mark.timeout(320) -def test_no_connection_at_startup(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - time.sleep(3) - - instance.query("DETACH TABLE test.postgresql_replica") - started_cluster.pause_container("postgres1") - - instance.query("ATTACH TABLE test.postgresql_replica") - time.sleep(3) - started_cluster.unpause_container("postgres1") - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) == 0: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -@pytest.mark.timeout(320) -def test_detach_attach_is_ok(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) == 0: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - postgresql_replica_check_result(result, True) - - instance.query("DETACH TABLE test.postgresql_replica") - instance.query("ATTACH TABLE test.postgresql_replica") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.5) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -@pytest.mark.timeout(320) -def test_replicating_insert_queries(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(10)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 10: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 10 + number, 10 + number from numbers(10)" - ) - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 20 + number, 20 + number from numbers(10)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 30: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 30 + number, 30 + number from numbers(10)" - ) - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 40 + number, 40 + number from numbers(10)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 50: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -@pytest.mark.timeout(320) -def test_replicating_delete_queries(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.2) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, 50 + number from numbers(50)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 100: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - cursor.execute("DELETE FROM postgresql_replica WHERE key > 49;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.5) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - - -@pytest.mark.timeout(320) -def test_replicating_update_queries(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number + 10 from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 50: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - cursor.execute("UPDATE postgresql_replica SET value = value - 10;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.5) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - - -@pytest.mark.timeout(320) -def test_resume_from_written_version(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number + 10 from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 50: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, 50 + number from numbers(50)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 100: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query("DETACH TABLE test.postgresql_replica") - - cursor.execute("DELETE FROM postgresql_replica WHERE key > 49;") - cursor.execute("UPDATE postgresql_replica SET value = value - 10;") - - instance.query("ATTACH TABLE test.postgresql_replica") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.5) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - - -@pytest.mark.timeout(320) -def test_many_replication_messages(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(100000)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 100000: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - print("SYNC OK") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(100000, 100000)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 200000: - time.sleep(1) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - print("INSERT OK") - - result = instance.query("SELECT key FROM test.postgresql_replica ORDER BY key;") - expected = instance.query("SELECT number from numbers(200000)") - assert result == expected - - cursor.execute("UPDATE postgresql_replica SET value = key + 1 WHERE key < 100000;") - - result = instance.query( - "SELECT key FROM test.postgresql_replica WHERE value = key + 1 ORDER BY key;" - ) - expected = instance.query("SELECT number from numbers(100000)") - - while result != expected: - time.sleep(1) - result = instance.query( - "SELECT key FROM test.postgresql_replica WHERE value = key + 1 ORDER BY key;" - ) - print("UPDATE OK") - - cursor.execute("DELETE FROM postgresql_replica WHERE key % 2 = 1;") - cursor.execute("DELETE FROM postgresql_replica WHERE key != value;") - - result = instance.query( - "SELECT count() FROM (SELECT * FROM test.postgresql_replica);" - ) - while int(result) != 50000: - time.sleep(1) - result = instance.query( - "SELECT count() FROM (SELECT * FROM test.postgresql_replica);" - ) - print("DELETE OK") - - cursor.execute("DROP TABLE postgresql_replica;") - - -@pytest.mark.timeout(320) -def test_connection_loss(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - i = 50 - while i < 100000: - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT {} + number, number from numbers(10000)".format( - i - ) - ) - i += 10000 - - started_cluster.pause_container("postgres1") - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - print(int(result)) - time.sleep(6) - - started_cluster.unpause_container("postgres1") - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) < 100050: - time.sleep(1) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - cursor.execute("DROP TABLE postgresql_replica;") - assert int(result) == 100050 - - -@pytest.mark.timeout(320) -def test_clickhouse_restart(started_cluster): - pytest.skip("Temporary disabled (FIXME)") - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - i = 50 - while i < 100000: - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT {} + number, number from numbers(10000)".format( - i - ) - ) - i += 10000 - - instance.restart_clickhouse() - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) < 100050: - time.sleep(1) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - cursor.execute("DROP TABLE postgresql_replica;") - print(result) - assert int(result) == 100050 - - -def test_rename_table(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(25)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 25: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query( - "RENAME TABLE test.postgresql_replica TO test.postgresql_replica_renamed" - ) - assert ( - int(instance.query("SELECT count() FROM test.postgresql_replica_renamed;")) - == 25 - ) - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(25, 25)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica_renamed;") - while int(result) != 50: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica_renamed;") - - result = instance.query( - "SELECT * FROM test.postgresql_replica_renamed ORDER BY key;" - ) - postgresql_replica_check_result(result, True) - cursor.execute("DROP TABLE postgresql_replica;") - instance.query("DROP TABLE IF EXISTS test.postgresql_replica_renamed") - - -def test_virtual_columns(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(10)" - ) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 10: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - # just check that it works, no check with `expected` because _version is taken as LSN, which will be different each time. - result = instance.query( - "SELECT key, value, _sign, _version FROM test.postgresql_replica;" - ) - print(result) - cursor.execute("DROP TABLE postgresql_replica;") - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -def test_abrupt_connection_loss_while_heavy_replication(started_cluster): - instance.query("DROP DATABASE IF EXISTS test_database") - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - table_name = "postgresql_replica" - create_postgres_table(cursor, table_name) - - instance.query(f"DROP TABLE IF EXISTS test.{table_name}") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - for i in range(len(queries)): - query = queries[i].format(table_name) - cursor.execute(query) - print("query {}".format(query.format(table_name))) - - started_cluster.pause_container("postgres1") - - result = instance.query("SELECT count() FROM test.postgresql_replica") - print(result) # Just debug - - started_cluster.unpause_container("postgres1") - - check_tables_are_synchronized("postgresql_replica") - - result = instance.query("SELECT count() FROM test.postgresql_replica") - print(result) # Just debug - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -def test_abrupt_server_restart_while_heavy_replication(started_cluster): - # FIXME (kssenii) temporary disabled - if instance.is_built_with_sanitizer(): - pytest.skip("Temporary disabled (FIXME)") - - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - table_name = "postgresql_replica_697" - create_postgres_table(cursor, table_name) - - instance.query(f"INSERT INTO postgres_database.{table_name} SELECT -1, 1") - instance.query(f"DROP TABLE IF EXISTS test.{table_name} SYNC") - create_materialized_table( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - table_name=table_name, - ) - - n = 1 - while int(instance.query(f"select count() from test.{table_name}")) != 1: - sleep(1) - n += 1 - if n > 10: - break - - for query in queries: - cursor.execute(query.format(table_name)) - print("query {}".format(query.format(table_name))) - - instance.restart_clickhouse() - - result = instance.query(f"SELECT count() FROM test.{table_name}") - print(result) # Just debug - - check_tables_are_synchronized(table_name) - - result = instance.query(f"SELECT count() FROM test.{table_name}") - print(result) # Just debug - instance.query(f"DROP TABLE test.{table_name} SYNC") - - -def test_drop_table_immediately(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(100000)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - instance.query("DROP TABLE test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - check_tables_are_synchronized("postgresql_replica") - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -if __name__ == "__main__": - cluster.start() - input("Cluster created, press any key to destroy...") - cluster.shutdown() diff --git a/tests/integration/test_storage_postgresql_replica/test_postgresql_replica.reference b/tests/integration/test_storage_postgresql_replica/test_postgresql_replica.reference deleted file mode 100644 index 959bb2aad74..00000000000 --- a/tests/integration/test_storage_postgresql_replica/test_postgresql_replica.reference +++ /dev/null @@ -1,50 +0,0 @@ -0 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 6 -7 7 -8 8 -9 9 -10 10 -11 11 -12 12 -13 13 -14 14 -15 15 -16 16 -17 17 -18 18 -19 19 -20 20 -21 21 -22 22 -23 23 -24 24 -25 25 -26 26 -27 27 -28 28 -29 29 -30 30 -31 31 -32 32 -33 33 -34 34 -35 35 -36 36 -37 37 -38 38 -39 39 -40 40 -41 41 -42 42 -43 43 -44 44 -45 45 -46 46 -47 47 -48 48 -49 49 diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 751279f5e5a..80d2050b394 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -511,69 +511,6 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster): rabbitmq_check_result(result2, True) -@pytest.mark.skip(reason="clichouse_path with rabbitmq.proto fails to be exported") -def test_rabbitmq_protobuf(rabbitmq_cluster): - instance.query( - """ - CREATE TABLE test.rabbitmq (key UInt64, value String) - ENGINE = RabbitMQ - SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_exchange_name = 'pb', - rabbitmq_format = 'Protobuf', - rabbitmq_flush_interval_ms=1000, - rabbitmq_max_block_size=100, - rabbitmq_schema = 'rabbitmq.proto:KeyValueProto'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.rabbitmq; - """ - ) - - credentials = pika.PlainCredentials("root", "clickhouse") - parameters = pika.ConnectionParameters( - rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials - ) - connection = pika.BlockingConnection(parameters) - channel = connection.channel() - - data = "" - for i in range(0, 20): - msg = rabbitmq_pb2.KeyValueProto() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - channel.basic_publish(exchange="pb", routing_key="", body=data) - data = "" - for i in range(20, 21): - msg = rabbitmq_pb2.KeyValueProto() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - channel.basic_publish(exchange="pb", routing_key="", body=data) - data = "" - for i in range(21, 50): - msg = rabbitmq_pb2.KeyValueProto() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - channel.basic_publish(exchange="pb", routing_key="", body=data) - - connection.close() - - result = "" - while True: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if rabbitmq_check_result(result): - break - - rabbitmq_check_result(result, True) - - def test_rabbitmq_big_message(rabbitmq_cluster): # Create batchs of messages of size ~100Kb rabbitmq_messages = 1000 diff --git a/tests/integration/test_storage_s3/configs/users.xml b/tests/integration/test_storage_s3/configs/users.xml index 4b6ba057ecb..dcc4c4b3092 100644 --- a/tests/integration/test_storage_s3/configs/users.xml +++ b/tests/integration/test_storage_s3/configs/users.xml @@ -3,7 +3,7 @@ default - 1 + 1 diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 45437fefa79..6b05379c712 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -944,7 +944,7 @@ def test_predefined_connection_configuration(started_cluster): f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')" ) assert ( - "To execute this query it's necessary to have grant NAMED COLLECTION ON s3_conf1" + "To execute this query, it's necessary to have the grant NAMED COLLECTION ON s3_conf1" in error ) error = instance.query_and_get_error( @@ -952,7 +952,7 @@ def test_predefined_connection_configuration(started_cluster): user="user", ) assert ( - "To execute this query it's necessary to have grant NAMED COLLECTION ON s3_conf1" + "To execute this query, it's necessary to have the grant NAMED COLLECTION ON s3_conf1" in error ) @@ -973,12 +973,12 @@ def test_predefined_connection_configuration(started_cluster): error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") assert ( - "To execute this query it's necessary to have grant NAMED COLLECTION ON no_collection" + "To execute this query, it's necessary to have the grant NAMED COLLECTION ON no_collection" in error ) error = instance.query_and_get_error("SELECT * FROM s3(no_collection)", user="user") assert ( - "To execute this query it's necessary to have grant NAMED COLLECTION ON no_collection" + "To execute this query, it's necessary to have the grant NAMED COLLECTION ON no_collection" in error ) instance = started_cluster.instances["dummy"] # has named collection access @@ -1018,11 +1018,11 @@ def test_url_reconnect_in_the_middle(started_cluster): def select(): global result result = instance.query( - f"""select sum(cityHash64(x)) from (select toUInt64(id) + sleep(0.1) as x from + f"""select count(), sum(cityHash64(x)) from (select toUInt64(id) + sleep(0.1) as x from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{filename}', 'TSV', '{table_format}') settings http_max_tries = 10, http_retry_max_backoff_ms=2000, http_send_timeout=1, http_receive_timeout=1)""" ) - assert int(result) == 3914219105369203805 + assert result == "1000000\t3914219105369203805\n" thread = threading.Thread(target=select) thread.start() @@ -1035,7 +1035,7 @@ def test_url_reconnect_in_the_middle(started_cluster): thread.join() - assert int(result) == 3914219105369203805 + assert result == "1000000\t3914219105369203805\n" def test_seekable_formats(started_cluster): @@ -1408,16 +1408,16 @@ def test_select_columns(started_cluster): instance.query("SYSTEM FLUSH LOGS") result1 = instance.query( - f"SELECT read_bytes FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT value2 FROM {name}'" + f"SELECT ProfileEvents['ReadBufferFromS3Bytes'] FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT value2 FROM {name}'" ) instance.query(f"SELECT * FROM {name}") instance.query("SYSTEM FLUSH LOGS") result2 = instance.query( - f"SELECT read_bytes FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT * FROM {name}'" + f"SELECT ProfileEvents['ReadBufferFromS3Bytes'] FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT * FROM {name}'" ) - assert int(result1) * 3 <= int(result2) + assert round(int(result2) / int(result1)) == 3 def test_insert_select_schema_inference(started_cluster): @@ -1476,12 +1476,16 @@ def test_wrong_format_usage(started_cluster): assert "Not a Parquet file" in result -def check_profile_event_for_query(instance, query, profile_event, amount): +def check_profile_event_for_query( + instance, file, storage_name, started_cluster, bucket, profile_event, amount +): instance.query("system flush logs") - query = query.replace("'", "\\'") + query_pattern = f"{storage_name}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file}'".replace( + "'", "\\'" + ) res = int( instance.query( - f"select ProfileEvents['{profile_event}'] from system.query_log where query='{query}' and type = 'QueryFinish' order by query_start_time_microseconds desc limit 1" + f"select ProfileEvents['{profile_event}'] from system.query_log where query like '%{query_pattern}%' and query not like '%ProfileEvents%' and type = 'QueryFinish' order by query_start_time_microseconds desc limit 1" ) ) @@ -1489,30 +1493,68 @@ def check_profile_event_for_query(instance, query, profile_event, amount): def check_cache_misses(instance, file, storage_name, started_cluster, bucket, amount=1): - query = f"desc {storage_name}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file}')" - check_profile_event_for_query(instance, query, "SchemaInferenceCacheMisses", amount) + check_profile_event_for_query( + instance, + file, + storage_name, + started_cluster, + bucket, + "SchemaInferenceCacheMisses", + amount, + ) def check_cache_hits(instance, file, storage_name, started_cluster, bucket, amount=1): - query = f"desc {storage_name}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file}')" - check_profile_event_for_query(instance, query, "SchemaInferenceCacheHits", amount) + check_profile_event_for_query( + instance, + file, + storage_name, + started_cluster, + bucket, + "SchemaInferenceCacheHits", + amount, + ) def check_cache_invalidations( instance, file, storage_name, started_cluster, bucket, amount=1 ): - query = f"desc {storage_name}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file}')" check_profile_event_for_query( - instance, query, "SchemaInferenceCacheInvalidations", amount + instance, + file, + storage_name, + started_cluster, + bucket, + "SchemaInferenceCacheInvalidations", + amount, ) def check_cache_evictions( instance, file, storage_name, started_cluster, bucket, amount=1 ): - query = f"desc {storage_name}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file}')" check_profile_event_for_query( - instance, query, "SchemaInferenceCacheEvictions", amount + instance, + file, + storage_name, + started_cluster, + bucket, + "SchemaInferenceCacheEvictions", + amount, + ) + + +def check_cahce_num_rows_hits( + instance, file, storage_name, started_cluster, bucket, amount=1 +): + check_profile_event_for_query( + instance, + file, + storage_name, + started_cluster, + bucket, + "SchemaInferenceCacheNumRowsHits", + amount, ) @@ -1521,6 +1563,11 @@ def run_describe_query(instance, file, storage_name, started_cluster, bucket): instance.query(query) +def run_count_query(instance, file, storage_name, started_cluster, bucket): + query = f"select count() from {storage_name}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file}', auto, 'x UInt64')" + return instance.query(query) + + def check_cache(instance, expected_files): sources = instance.query("select source from system.schema_inference_cache") assert sorted(map(lambda x: x.strip().split("/")[-1], sources.split())) == sorted( @@ -1673,6 +1720,112 @@ def test_schema_inference_cache(started_cluster): run_describe_query(instance, files, storage_name, started_cluster, bucket) check_cache_misses(instance, files, storage_name, started_cluster, bucket, 4) + instance.query("system drop schema cache") + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_cache0.csv') select * from numbers(100) settings s3_truncate_on_insert=1" + ) + time.sleep(1) + + res = run_count_query( + instance, "test_cache0.csv", storage_name, started_cluster, bucket + ) + + assert int(res) == 100 + + check_cache(instance, ["test_cache0.csv"]) + check_cache_misses( + instance, "test_cache0.csv", storage_name, started_cluster, bucket + ) + + res = run_count_query( + instance, "test_cache0.csv", storage_name, started_cluster, bucket + ) + assert int(res) == 100 + + check_cache_hits( + instance, "test_cache0.csv", storage_name, started_cluster, bucket + ) + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_cache0.csv') select * from numbers(200) settings s3_truncate_on_insert=1" + ) + time.sleep(1) + + res = run_count_query( + instance, "test_cache0.csv", storage_name, started_cluster, bucket + ) + + assert int(res) == 200 + + check_cache_invalidations( + instance, "test_cache0.csv", storage_name, started_cluster, bucket + ) + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_cache1.csv') select * from numbers(100) settings s3_truncate_on_insert=1" + ) + time.sleep(1) + + res = run_count_query( + instance, "test_cache1.csv", storage_name, started_cluster, bucket + ) + + assert int(res) == 100 + check_cache(instance, ["test_cache0.csv", "test_cache1.csv"]) + check_cache_misses( + instance, "test_cache1.csv", storage_name, started_cluster, bucket + ) + + res = run_count_query( + instance, "test_cache1.csv", storage_name, started_cluster, bucket + ) + assert int(res) == 100 + check_cache_hits( + instance, "test_cache1.csv", storage_name, started_cluster, bucket + ) + + res = run_count_query( + instance, "test_cache{0,1}.csv", storage_name, started_cluster, bucket + ) + assert int(res) == 300 + check_cache_hits( + instance, "test_cache{0,1}.csv", storage_name, started_cluster, bucket, 2 + ) + + instance.query(f"system drop schema cache for {storage_name}") + check_cache(instance, []) + + res = run_count_query( + instance, "test_cache{0,1}.csv", storage_name, started_cluster, bucket + ) + assert int(res) == 300 + check_cache_misses( + instance, "test_cache{0,1}.csv", storage_name, started_cluster, bucket, 2 + ) + + instance.query(f"system drop schema cache for {storage_name}") + check_cache(instance, []) + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_cache.parquet') select * from numbers(100) settings s3_truncate_on_insert=1" + ) + time.sleep(1) + + res = instance.query( + f"select count() from {storage_name}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_cache.parquet')" + ) + assert int(res) == 100 + check_cache_misses( + instance, "test_cache.parquet", storage_name, started_cluster, bucket + ) + check_cache_hits( + instance, "test_cache.parquet", storage_name, started_cluster, bucket + ) + check_cahce_num_rows_hits( + instance, "test_cache.parquet", storage_name, started_cluster, bucket + ) + test("s3") test("url") @@ -1827,3 +1980,95 @@ def test_skip_empty_files(started_cluster): ) assert len(res.strip()) == 0 + + +def test_read_subcolumns(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\troot/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\troot/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "0\troot/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert res == "42\troot/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\t/root/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\t/root/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "0\t/root/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert ( + res == "42\t/root/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + ) + + +def test_filtering_by_file_or_path(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_filter1.tsv', auto, 'x UInt64') select 1" + ) + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_filter2.tsv', auto, 'x UInt64') select 2" + ) + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_filter3.tsv', auto, 'x UInt64') select 3" + ) + + instance.query( + f"select count() from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_filter*.tsv') where _file = 'test_filter1.tsv'" + ) + + instance.query("SYSTEM FLUSH LOGS") + + result = instance.query( + f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query like '%select%s3%test_filter%' AND type='QueryFinish'" + ) + + assert int(result) == 1 diff --git a/tests/integration/test_storage_s3_queue/__init__.py b/tests/integration/test_storage_s3_queue/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_storage_s3_queue/configs/defaultS3.xml b/tests/integration/test_storage_s3_queue/configs/defaultS3.xml new file mode 100644 index 00000000000..7dac6d9fbb5 --- /dev/null +++ b/tests/integration/test_storage_s3_queue/configs/defaultS3.xml @@ -0,0 +1,11 @@ + + + + http://resolver:8080 +
Authorization: Bearer TOKEN
+
+ + http://resolver:8080/root-with-auth/restricteddirectory/ + +
+
diff --git a/tests/integration/test_storage_s3_queue/configs/named_collections.xml b/tests/integration/test_storage_s3_queue/configs/named_collections.xml new file mode 100644 index 00000000000..64674e2a3e3 --- /dev/null +++ b/tests/integration/test_storage_s3_queue/configs/named_collections.xml @@ -0,0 +1,43 @@ + + + + http://minio1:9001/root/test_table + minio + minio123 + + + http://minio1:9001/root/test_parquet + minio + minio123 + + + http://minio1:9001/root/test_parquet_gz + minio + minio123 + + + http://minio1:9001/root/test_orc + minio + minio123 + + + http://minio1:9001/root/test_native + minio + minio123 + + + http://minio1:9001/root/test.arrow + minio + minio123 + + + http://minio1:9001/root/test.parquet + minio + minio123 + + + http://minio1:9001/root/test_cache4.jsonl + true + + + diff --git a/tests/integration/test_storage_nats/configs/users.xml b/tests/integration/test_storage_s3_queue/configs/users.xml similarity index 72% rename from tests/integration/test_storage_nats/configs/users.xml rename to tests/integration/test_storage_s3_queue/configs/users.xml index 2cef0a6de3c..3118ec43654 100644 --- a/tests/integration/test_storage_nats/configs/users.xml +++ b/tests/integration/test_storage_s3_queue/configs/users.xml @@ -2,6 +2,7 @@ 1 + 1 diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py new file mode 100644 index 00000000000..c11bbd43dc6 --- /dev/null +++ b/tests/integration/test_storage_s3_queue/test.py @@ -0,0 +1,879 @@ +import io +import logging +import os +import random +import time + +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster, ClickHouseInstance +import json + +""" +export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-server +export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-client +export CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-odbc-bridge +export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/home/sergey/vkr/ClickHouse/programs/server + +""" + + +def prepare_s3_bucket(started_cluster): + # Allows read-write access for bucket without authorization. + bucket_read_write_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:GetBucketLocation", + "Resource": "arn:aws:s3:::root", + }, + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:ListBucket", + "Resource": "arn:aws:s3:::root", + }, + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:GetObject", + "Resource": "arn:aws:s3:::root/*", + }, + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:PutObject", + "Resource": "arn:aws:s3:::root/*", + }, + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:DeleteObject", + "Resource": "arn:aws:s3:::root/*", + }, + ], + } + + minio_client = started_cluster.minio_client + minio_client.set_bucket_policy( + started_cluster.minio_bucket, json.dumps(bucket_read_write_policy) + ) + + started_cluster.minio_restricted_bucket = "{}-with-auth".format( + started_cluster.minio_bucket + ) + if minio_client.bucket_exists(started_cluster.minio_restricted_bucket): + minio_client.remove_bucket(started_cluster.minio_restricted_bucket) + + minio_client.make_bucket(started_cluster.minio_restricted_bucket) + + +@pytest.fixture(autouse=True) +def s3_queue_setup_teardown(started_cluster): + instance = started_cluster.instances["instance"] + instance_2 = started_cluster.instances["instance2"] + + instance.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") + instance_2.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") + + minio = started_cluster.minio_client + objects = list( + minio.list_objects(started_cluster.minio_restricted_bucket, recursive=True) + ) + for obj in objects: + minio.remove_object(started_cluster.minio_restricted_bucket, obj.object_name) + yield # run test + + +MINIO_INTERNAL_PORT = 9001 +AVAILABLE_MODES = ["unordered", "ordered"] +AUTH = "'minio','minio123'," + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + + +def put_s3_file_content(started_cluster, bucket, filename, data): + buf = io.BytesIO(data) + started_cluster.minio_client.put_object(bucket, filename, buf, len(data)) + + +def generate_random_files( + count, prefix, cluster, bucket, column_num=3, row_num=10, start_ind=0 +): + total_values = [] + to_generate = [ + (f"{prefix}/test_{i}.csv", i) for i in range(start_ind, start_ind + count) + ] + to_generate.sort(key=lambda x: x[0]) + + for filename, i in to_generate: + rand_values = [ + [random.randint(0, 50) for _ in range(column_num)] for _ in range(row_num) + ] + total_values += rand_values + values_csv = ( + "\n".join((",".join(map(str, row)) for row in rand_values)) + "\n" + ).encode() + put_s3_file_content(cluster, bucket, filename, values_csv) + return total_values + + +# Returns content of given S3 file as string. +def get_s3_file_content(started_cluster, bucket, filename, decode=True): + # type: (ClickHouseCluster, str, str, bool) -> str + + data = started_cluster.minio_client.get_object(bucket, filename) + data_str = b"" + for chunk in data.stream(): + data_str += chunk + if decode: + return data_str.decode() + return data_str + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "instance", + user_configs=["configs/users.xml"], + with_minio=True, + with_zookeeper=True, + main_configs=["configs/defaultS3.xml", "configs/named_collections.xml"], + ) + cluster.add_instance( + "instance2", + user_configs=["configs/users.xml"], + with_minio=True, + with_zookeeper=True, + main_configs=["configs/defaultS3.xml", "configs/named_collections.xml"], + ) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + prepare_s3_bucket(cluster) + yield cluster + finally: + cluster.shutdown() + + +def run_query(instance, query, stdin=None, settings=None): + # type: (ClickHouseInstance, str, object, dict) -> str + + logging.info("Running query '{}'...".format(query)) + result = instance.query(query, stdin=stdin, settings=settings) + logging.info("Query finished") + + return result + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_delete_after_processing(started_cluster, mode): + prefix = "delete" + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + total_values = generate_random_files(5, prefix, started_cluster, bucket) + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + CREATE TABLE test.s3_queue ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_delete_{mode}', + s3queue_loading_retries = 3, + after_processing='delete'; + """ + ) + + get_query = f"SELECT * FROM test.s3_queue ORDER BY column1, column2, column3" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == sorted(total_values, key=lambda x: (x[0], x[1], x[2])) + minio = started_cluster.minio_client + objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) + assert len(objects) == 0 + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_failed_retry(started_cluster, mode): + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + values = [ + ["failed", 1, 1], + ] + values_csv = ( + "\n".join((",".join(map(str, row)) for row in values)) + "\n" + ).encode() + filename = f"test.csv" + put_s3_file_content(started_cluster, bucket, filename, values_csv) + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + CREATE TABLE test.s3_queue ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/select_failed_retry_{mode}', + s3queue_loading_retries = 3; + """ + ) + + # first try + get_query = f"SELECT * FROM test.s3_queue" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + # second try + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + # upload correct file + values = [ + [1, 1, 1], + ] + values_csv = ( + "\n".join((",".join(map(str, row)) for row in values)) + "\n" + ).encode() + put_s3_file_content(started_cluster, bucket, filename, values_csv) + + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == values + + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_direct_select_file(started_cluster, mode): + auth = "'minio','minio123'," + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + values = [ + [12549, 2463, 19893], + [64021, 38652, 66703], + [81611, 39650, 83516], + ] + values_csv = ( + "\n".join((",".join(map(str, row)) for row in values)) + "\n" + ).encode() + filename = f"test.csv" + put_s3_file_content(started_cluster, bucket, filename, values_csv) + instance.query( + """ + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.s3_queue_2; + DROP TABLE IF EXISTS test.s3_queue_3; + """ + ) + + instance.query( + f""" + CREATE TABLE test.s3_queue ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/*', {auth}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/select_{mode}' + """ + ) + + get_query = f"SELECT * FROM test.s3_queue" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == values + + instance.query( + f""" + CREATE TABLE test.s3_queue_2 ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/*', {auth}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/select_{mode}' + """ + ) + + get_query = f"SELECT * FROM test.s3_queue" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + # New table with same zookeeper path + get_query = f"SELECT * FROM test.s3_queue_2" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + # New table with different zookeeper path + instance.query( + f""" + CREATE TABLE test.s3_queue_3 ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/*', {auth}'CSV') + SETTINGS + mode = '{mode}', + keeper_path='/clickhouse/select_{mode}_2' + """ + ) + get_query = f"SELECT * FROM test.s3_queue_3" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == values + + values = [ + [1, 1, 1], + ] + values_csv = ( + "\n".join((",".join(map(str, row)) for row in values)) + "\n" + ).encode() + filename = f"t.csv" + put_s3_file_content(started_cluster, bucket, filename, values_csv) + + get_query = f"SELECT * FROM test.s3_queue_3" + if mode == "unordered": + assert [ + list(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + ] == values + elif mode == "ordered": + assert [ + list(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + ] == [] + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_direct_select_multiple_files(started_cluster, mode): + prefix = f"multiple_files_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + instance.query("drop table if exists test.s3_queue") + instance.query( + f""" + CREATE TABLE test.s3_queue ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/select_multiple_{mode}' + """ + ) + + for i in range(5): + rand_values = [[random.randint(0, 50) for _ in range(3)] for _ in range(10)] + + values_csv = ( + "\n".join((",".join(map(str, row)) for row in rand_values)) + "\n" + ).encode() + filename = f"{prefix}/test_{i}.csv" + put_s3_file_content(started_cluster, bucket, filename, values_csv) + + get_query = f"SELECT * FROM test.s3_queue" + assert [ + list(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + ] == rand_values + + total_values = generate_random_files( + 4, prefix, started_cluster, bucket, start_ind=5 + ) + get_query = f"SELECT * FROM test.s3_queue" + assert { + tuple(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + } == set([tuple(i) for i in total_values]) + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_streaming_to_view_(started_cluster, mode): + prefix = f"streaming_files_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + total_values = generate_random_files(10, prefix, started_cluster, bucket) + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue_persistent; + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv; + + CREATE TABLE test.s3_queue_persistent ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/view_{mode}'; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv TO test.s3_queue_persistent AS + SELECT + * + FROM test.s3_queue; + """ + ) + expected_values = set([tuple(i) for i in total_values]) + for i in range(10): + get_query = f"SELECT * FROM test.persistent_s3_queue_mv" + + selected_values = { + tuple(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + } + if selected_values != expected_values: + time.sleep(1) + else: + break + + assert selected_values == expected_values + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_streaming_to_many_views(started_cluster, mode): + prefix = f"streaming_files_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + retry_cnt = 10 + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue_persistent; + DROP TABLE IF EXISTS test.s3_queue_persistent_2; + DROP TABLE IF EXISTS test.s3_queue_persistent_3; + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv_2; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv_3; + + + CREATE TABLE test.s3_queue_persistent ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue_persistent_2 ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue_persistent_3 ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/multiple_view_{mode}'; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv TO test.s3_queue_persistent AS + SELECT + * + FROM test.s3_queue; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv_2 TO test.s3_queue_persistent_2 AS + SELECT + * + FROM test.s3_queue; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv_3 TO test.s3_queue_persistent_3 AS + SELECT + * + FROM test.s3_queue; + """ + ) + total_values = generate_random_files(5, prefix, started_cluster, bucket) + expected_values = set([tuple(i) for i in total_values]) + + for i in range(retry_cnt): + retry = False + for get_query in [ + f"SELECT * FROM test.s3_queue_persistent", + f"SELECT * FROM test.s3_queue_persistent_2", + f"SELECT * FROM test.s3_queue_persistent_3", + ]: + selected_values = { + tuple(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + } + if i == retry_cnt - 1: + assert selected_values == expected_values + if selected_values != expected_values: + retry = True + break + if retry: + time.sleep(1) + else: + break + + +def test_multiple_tables_meta_mismatch(started_cluster): + prefix = f"test_meta" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'ordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + # check mode + failed = False + try: + instance.query( + f""" + CREATE TABLE test.s3_queue_copy ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'unordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + except QueryRuntimeException as e: + assert "Existing table metadata in ZooKeeper differs in engine mode" in str(e) + failed = True + assert failed is True + + # check columns + table_format_copy = table_format + ", column4 UInt32" + try: + instance.query( + f""" + CREATE TABLE test.s3_queue_copy ({table_format_copy}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'ordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + except QueryRuntimeException as e: + assert ( + "Table columns structure in ZooKeeper is different from local table structure" + in str(e) + ) + failed = True + + assert failed is True + + # check format + try: + instance.query( + f""" + CREATE TABLE test.s3_queue_copy ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'TSV') + SETTINGS + mode = 'ordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + except QueryRuntimeException as e: + assert "Existing table metadata in ZooKeeper differs in format name" in str(e) + failed = True + assert failed is True + + # create working engine + instance.query( + f""" + CREATE TABLE test.s3_queue_copy ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'ordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + + +def test_max_set_age(started_cluster): + files_to_generate = 10 + max_age = 1 + prefix = f"test_multiple" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'unordered', + keeper_path = '/clickhouse/test_set_age', + s3queue_tracked_files_limit = 10, + s3queue_tracked_file_ttl_sec = {max_age}; + """ + ) + + total_values = generate_random_files( + files_to_generate, prefix, started_cluster, bucket, row_num=1 + ) + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == total_values + time.sleep(max_age + 1) + + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == total_values + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_multiple_tables_streaming_sync(started_cluster, mode): + files_to_generate = 300 + poll_size = 30 + prefix = f"test_multiple_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.s3_queue_copy; + DROP TABLE IF EXISTS test.s3_queue_copy_2; + + DROP TABLE IF EXISTS test.s3_queue_persistent; + DROP TABLE IF EXISTS test.s3_queue_persistent_copy; + DROP TABLE IF EXISTS test.s3_queue_persistent_copy_2; + + DROP TABLE IF EXISTS test.persistent_s3_queue_mv; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv_copy; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv_copy_2; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_multiple_consumers_sync_{mode}', + s3queue_polling_size = {poll_size}; + + CREATE TABLE test.s3_queue_copy ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_multiple_consumers_sync_{mode}', + s3queue_polling_size = {poll_size}; + + CREATE TABLE test.s3_queue_copy_2 ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_multiple_consumers_sync_{mode}', + s3queue_polling_size = {poll_size}; + + CREATE TABLE test.s3_queue_persistent ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue_persistent_copy ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue_persistent_copy_2 ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv TO test.s3_queue_persistent AS + SELECT + * + FROM test.s3_queue; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv_copy TO test.s3_queue_persistent_copy AS + SELECT + * + FROM test.s3_queue_copy; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv_copy_2 TO test.s3_queue_persistent_copy_2 AS + SELECT + * + FROM test.s3_queue_copy_2; + """ + ) + total_values = generate_random_files( + files_to_generate, prefix, started_cluster, bucket, row_num=1 + ) + + def get_count(table_name): + return int(run_query(instance, f"SELECT count() FROM {table_name}")) + + for _ in range(100): + if ( + get_count("test.s3_queue_persistent") + + get_count("test.s3_queue_persistent_copy") + + get_count("test.s3_queue_persistent_copy_2") + ) == files_to_generate: + break + time.sleep(1) + + get_query = f"SELECT * FROM test.s3_queue_persistent" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + get_query_copy = f"SELECT * FROM test.s3_queue_persistent_copy" + res2 = [ + list(map(int, l.split())) + for l in run_query(instance, get_query_copy).splitlines() + ] + get_query_copy_2 = f"SELECT * FROM test.s3_queue_persistent_copy_2" + res3 = [ + list(map(int, l.split())) + for l in run_query(instance, get_query_copy_2).splitlines() + ] + assert {tuple(v) for v in res1 + res2 + res3} == set( + [tuple(i) for i in total_values] + ) + + # Checking that all files were processed only once + time.sleep(10) + assert ( + get_count("test.s3_queue_persistent") + + get_count("test.s3_queue_persistent_copy") + + get_count("test.s3_queue_persistent_copy_2") + ) == files_to_generate + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_multiple_tables_streaming_sync_distributed(started_cluster, mode): + files_to_generate = 100 + poll_size = 2 + prefix = f"test_multiple_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + instance_2 = started_cluster.instances["instance2"] + + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + for inst in [instance, instance_2]: + inst.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.s3_queue_persistent; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_multiple_consumers_{mode}', + s3queue_polling_size = {poll_size}; + + CREATE TABLE test.s3_queue_persistent ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + """ + ) + + for inst in [instance, instance_2]: + inst.query( + f""" + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv TO test.s3_queue_persistent AS + SELECT + * + FROM test.s3_queue; + """ + ) + + total_values = generate_random_files( + files_to_generate, prefix, started_cluster, bucket, row_num=1 + ) + + def get_count(node, table_name): + return int(run_query(node, f"SELECT count() FROM {table_name}")) + + for _ in range(150): + if ( + get_count(instance, "test.s3_queue_persistent") + + get_count(instance_2, "test.s3_queue_persistent") + ) == files_to_generate: + break + time.sleep(1) + + get_query = f"SELECT * FROM test.s3_queue_persistent" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + res2 = [ + list(map(int, l.split())) for l in run_query(instance_2, get_query).splitlines() + ] + + assert len(res1) + len(res2) == files_to_generate + + # Checking that all engines have made progress + assert len(res1) > 0 + assert len(res2) > 0 + + assert {tuple(v) for v in res1 + res2} == set([tuple(i) for i in total_values]) + + # Checking that all files were processed only once + time.sleep(10) + assert ( + get_count(instance, "test.s3_queue_persistent") + + get_count(instance_2, "test.s3_queue_persistent") + ) == files_to_generate + + +def test_max_set_size(started_cluster): + files_to_generate = 10 + prefix = f"test_multiple" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'unordered', + keeper_path = '/clickhouse/test_set_size', + s3queue_tracked_files_limit = {files_to_generate - 1}; + """ + ) + + total_values = generate_random_files( + files_to_generate, prefix, started_cluster, bucket, start_ind=0, row_num=1 + ) + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == total_values + + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == [total_values[0]] + + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == [total_values[1]] diff --git a/tests/integration/test_storage_url/test.py b/tests/integration/test_storage_url/test.py index 7f359078967..7ff7a871413 100644 --- a/tests/integration/test_storage_url/test.py +++ b/tests/integration/test_storage_url/test.py @@ -68,13 +68,13 @@ def test_url_cluster_with_named_collection(): def test_table_function_url_access_rights(): node1.query("CREATE USER OR REPLACE u1") - expected_error = "necessary to have grant CREATE TEMPORARY TABLE, URL ON *.*" + expected_error = "necessary to have the grant CREATE TEMPORARY TABLE, URL ON *.*" assert expected_error in node1.query_and_get_error( f"SELECT * FROM url('http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')", user="u1", ) - expected_error = "necessary to have grant CREATE TEMPORARY TABLE, URL ON *.*" + expected_error = "necessary to have the grant CREATE TEMPORARY TABLE, URL ON *.*" assert expected_error in node1.query_and_get_error( f"SELECT * FROM url('http://nginx:80/test_1', 'TSV')", user="u1" ) @@ -89,7 +89,7 @@ def test_table_function_url_access_rights(): user="u1", ) == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]]) - expected_error = "necessary to have grant URL ON *.*" + expected_error = "necessary to have the grant URL ON *.*" assert expected_error in node1.query_and_get_error( f"DESCRIBE TABLE url('http://nginx:80/test_1', 'TSV')", user="u1" ) diff --git a/tests/integration/test_storage_url_with_proxy/__init__.py b/tests/integration/test_storage_url_with_proxy/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_storage_url_with_proxy/configs/config.d/proxy_list.xml b/tests/integration/test_storage_url_with_proxy/configs/config.d/proxy_list.xml new file mode 100644 index 00000000000..ff207e7166c --- /dev/null +++ b/tests/integration/test_storage_url_with_proxy/configs/config.d/proxy_list.xml @@ -0,0 +1,7 @@ + + + + http://proxy1 + + + \ No newline at end of file diff --git a/tests/integration/test_storage_url_with_proxy/test.py b/tests/integration/test_storage_url_with_proxy/test.py new file mode 100644 index 00000000000..107aa426836 --- /dev/null +++ b/tests/integration/test_storage_url_with_proxy/test.py @@ -0,0 +1,84 @@ +import logging +import time +from datetime import datetime +import hmac +import hashlib +import base64 + +import pytest +from helpers.cluster import ClickHouseCluster + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + + cluster.add_instance( + "proxy_list_node", + main_configs=["configs/config.d/proxy_list.xml"], + with_minio=True, + ) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def check_proxy_logs(cluster, proxy_instance, http_methods): + minio_ip = cluster.get_instance_ip("minio1") + for i in range(10): + logs = cluster.get_container_logs(proxy_instance) + # Check with retry that all possible interactions with Minio are present + for http_method in http_methods: + method_with_domain = http_method + " http://minio1" + method_with_ip = http_method + f" http://{minio_ip}" + + logging.info(f"Method with ip: {method_with_ip}") + + has_get_minio_logs = ( + logs.find(method_with_domain) >= 0 or logs.find(method_with_ip) >= 0 + ) + if has_get_minio_logs: + return + time.sleep(1) + else: + assert False, "http method not found in logs" + + +def test_s3_with_proxy_list(cluster): + node = cluster.instances["proxy_list_node"] + + # insert into function url uses POST and minio expects PUT + node.query( + """ + INSERT INTO FUNCTION + s3('http://minio1:9001/root/data/ch-proxy-test/test.csv', 'minio', 'minio123', 'CSV', 'key String, value String') + VALUES ('color','red'),('size','10') + """ + ) + + content_type = "application/zstd" + date = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000") + resource = "/root/data/ch-proxy-test/test.csv" + get_sig_string = f"GET\n\n{content_type}\n{date}\n{resource}" + password = "minio123" + + get_digest = hmac.new( + password.encode("utf-8"), get_sig_string.encode("utf-8"), hashlib.sha1 + ).digest() + get_signature = base64.b64encode(get_digest).decode("utf-8") + assert ( + node.query( + "SELECT * FROM url('http://minio1:9001/root/data/ch-proxy-test/test.csv', 'CSV', 'a String, b String'," + f"headers('Host'='minio1', 'Date'= '{date}', 'Content-Type'='{content_type}'," + f"'Authorization'='AWS minio:{get_signature}')) FORMAT Values" + ) + == "('color','red'),('size','10')" + ) + + check_proxy_logs(cluster, "proxy1", ["GET"]) diff --git a/tests/integration/test_system_flush_logs/test.py b/tests/integration/test_system_flush_logs/test.py index d9ab76d2d61..084d342d736 100644 --- a/tests/integration/test_system_flush_logs/test.py +++ b/tests/integration/test_system_flush_logs/test.py @@ -2,11 +2,16 @@ # pylint: disable=unused-argument # pylint: disable=redefined-outer-name +import time import pytest from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node_default") +node = cluster.add_instance( + "node_default", + stay_alive=True, +) system_logs = [ # disabled by default @@ -40,7 +45,11 @@ def test_system_logs(flush_logs, table, exists): if exists: node.query(q) else: - assert "Table {} doesn't exist".format(table) in node.query_and_get_error(q) + response = node.query_and_get_error(q) + assert ( + "Table {} does not exist".format(table) in response + or "Unknown table expression identifier '{}'".format(table) in response + ) # Logic is tricky, let's check that there is no hang in case of message queue @@ -64,3 +73,95 @@ def test_system_suspend(): node.query("SYSTEM SUSPEND FOR 1 SECOND;") node.query("INSERT INTO t VALUES (now());") assert "1\n" == node.query("SELECT max(x) - min(x) >= 1 FROM t;") + + +def test_log_max_size(start_cluster): + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + + 1000000 + 10 + 10 + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """, + ] + ) + node.restart_clickhouse() + for i in range(10): + node.query(f"select {i}") + + assert node.query("select count() >= 10 from system.query_log") == "1\n" + node.exec_in_container( + ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] + ) + + +def test_log_buffer_size_rows_flush_threshold(start_cluster): + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + + 1000000 + 10 + 10000 + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """, + ] + ) + node.restart_clickhouse() + node.query(f"TRUNCATE TABLE IF EXISTS system.query_log") + for i in range(10): + node.query(f"select {i}") + + assert_eq_with_retry( + node, + f"select count() >= 11 from system.query_log", + "1", + sleep_time=0.2, + retry_count=100, + ) + + node.query(f"TRUNCATE TABLE IF EXISTS system.query_log") + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + + 1000000 + 10000 + 10000 + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """, + ] + ) + node.restart_clickhouse() + for i in range(10): + node.query(f"select {i}") + + # Logs aren't flushed + assert_eq_with_retry( + node, + f"select count() < 10 from system.query_log", + "1", + sleep_time=0.2, + retry_count=100, + ) + + node.exec_in_container( + ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] + ) diff --git a/tests/integration/test_system_logs/test_system_logs.py b/tests/integration/test_system_logs/test_system_logs.py index aac5ee53819..72249cd64ee 100644 --- a/tests/integration/test_system_logs/test_system_logs.py +++ b/tests/integration/test_system_logs/test_system_logs.py @@ -88,3 +88,53 @@ def test_system_logs_settings_expr(start_cluster): assert expected in node3.query( "SELECT engine_full FROM system.tables WHERE database='system' and name='query_log'" ) + + +def test_max_size_0(start_cluster): + node1.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + + 0 + 0 + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """, + ] + ) + with pytest.raises(Exception): + node1.restart_clickhouse() + + node1.exec_in_container( + ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] + ) + node1.restart_clickhouse() + + +def test_reserved_size_greater_max_size(start_cluster): + node1.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + + 10 + 11 + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """, + ] + ) + with pytest.raises(Exception): + node1.restart_clickhouse() + + node1.exec_in_container( + ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] + ) + node1.restart_clickhouse() diff --git a/tests/integration/test_system_start_stop_listen/__init__.py b/tests/integration/test_system_start_stop_listen/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_system_start_stop_listen/configs/cluster.xml b/tests/integration/test_system_start_stop_listen/configs/cluster.xml new file mode 100644 index 00000000000..34b6c32c6d0 --- /dev/null +++ b/tests/integration/test_system_start_stop_listen/configs/cluster.xml @@ -0,0 +1,16 @@ + + + + + + main_node + 9000 + + + backup_node + 9000 + + + + + diff --git a/tests/integration/test_system_start_stop_listen/configs/protocols.xml b/tests/integration/test_system_start_stop_listen/configs/protocols.xml new file mode 100644 index 00000000000..1d8608bcaca --- /dev/null +++ b/tests/integration/test_system_start_stop_listen/configs/protocols.xml @@ -0,0 +1,23 @@ + + 0.0.0.0 + + + 9000 + 8123 + 9004 + + + + + tcp + 0.0.0.0 + 9001 + native protocol (tcp) + + + http + 8124 + http protocol + + + diff --git a/tests/integration/test_system_start_stop_listen/test.py b/tests/integration/test_system_start_stop_listen/test.py new file mode 100644 index 00000000000..8a3081e0c15 --- /dev/null +++ b/tests/integration/test_system_start_stop_listen/test.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 + + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.client import Client +import requests + +cluster = ClickHouseCluster(__file__) +main_node = cluster.add_instance( + "main_node", + main_configs=["configs/cluster.xml", "configs/protocols.xml"], + with_zookeeper=True, +) +backup_node = cluster.add_instance( + "backup_node", main_configs=["configs/cluster.xml"], with_zookeeper=True +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def http_works(port=8123): + try: + response = requests.post(f"http://{main_node.ip_address}:{port}/ping") + if response.status_code == 400: + return True + except: + pass + + return False + + +def assert_everything_works(): + custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) + main_node.query(QUERY) + main_node.query(MYSQL_QUERY) + custom_client.query(QUERY) + assert http_works() + assert http_works(8124) + + +QUERY = "SELECT 1" +MYSQL_QUERY = "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', 'default', '', SETTINGS connect_timeout = 100, connection_wait_timeout = 100)" + + +def test_default_protocols(started_cluster): + # TCP + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN TCP") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + backup_node.query("SYSTEM START LISTEN ON CLUSTER default TCP") + + # HTTP + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN HTTP") + assert http_works() == False + main_node.query("SYSTEM START LISTEN HTTP") + + # MySQL + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN MYSQL") + assert "Connections to mysql failed" in main_node.query_and_get_error(MYSQL_QUERY) + main_node.query("SYSTEM START LISTEN MYSQL") + + assert_everything_works() + + +def test_custom_protocols(started_cluster): + # TCP + custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN CUSTOM 'tcp'") + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + main_node.query("SYSTEM START LISTEN CUSTOM 'tcp'") + + # HTTP + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN CUSTOM 'http'") + assert http_works(8124) == False + main_node.query("SYSTEM START LISTEN CUSTOM 'http'") + + assert_everything_works() + + +def test_all_protocols(started_cluster): + custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) + assert_everything_works() + + # STOP LISTEN QUERIES ALL + main_node.query("SYSTEM STOP LISTEN QUERIES ALL") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + assert http_works() == False + assert http_works(8124) == False + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + # STOP LISTEN QUERIES DEFAULT + assert_everything_works() + + main_node.query("SYSTEM STOP LISTEN QUERIES DEFAULT") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + custom_client.query(QUERY) + assert http_works() == False + assert http_works(8124) + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES DEFAULT") + + # STOP LISTEN QUERIES CUSTOM + assert_everything_works() + + main_node.query("SYSTEM STOP LISTEN QUERIES CUSTOM") + main_node.query(QUERY) + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + assert http_works() + assert http_works(8124) == False + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES CUSTOM") + + # Disable all protocols, check first START LISTEN QUERIES DEFAULT then START LISTEN QUERIES CUSTOM + assert_everything_works() + + main_node.query("SYSTEM STOP LISTEN QUERIES ALL") + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES DEFAULT") + main_node.query(QUERY) + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + assert http_works() + assert http_works(8124) == False + + main_node.query("SYSTEM STOP LISTEN QUERIES ALL") + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES CUSTOM") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + custom_client.query(QUERY) + assert http_works() == False + assert http_works(8124) + + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + assert_everything_works() + + +def test_except(started_cluster): + custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) + assert_everything_works() + + # STOP LISTEN QUERIES ALL EXCEPT + main_node.query("SYSTEM STOP LISTEN QUERIES ALL EXCEPT MYSQL, CUSTOM 'tcp'") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + custom_client.query(MYSQL_QUERY) + assert http_works() == False + assert http_works(8124) == False + + # START LISTEN QUERIES ALL EXCEPT + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL EXCEPT TCP") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + custom_client.query(MYSQL_QUERY) + assert http_works() == True + assert http_works(8124) == True + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + assert_everything_works() + + # STOP LISTEN QUERIES DEFAULT EXCEPT + main_node.query("SYSTEM STOP LISTEN QUERIES DEFAULT EXCEPT TCP") + main_node.query(QUERY) + assert "Connections to mysql failed" in custom_client.query_and_get_error( + MYSQL_QUERY + ) + custom_client.query(QUERY) + assert http_works() == False + assert http_works(8124) == True + + # START LISTEN QUERIES DEFAULT EXCEPT + backup_node.query( + "SYSTEM START LISTEN ON CLUSTER default QUERIES DEFAULT EXCEPT HTTP" + ) + main_node.query(QUERY) + main_node.query(MYSQL_QUERY) + custom_client.query(QUERY) + assert http_works() == False + assert http_works(8124) == True + + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + assert_everything_works() + + # STOP LISTEN QUERIES CUSTOM EXCEPT + main_node.query("SYSTEM STOP LISTEN QUERIES CUSTOM EXCEPT CUSTOM 'tcp'") + main_node.query(QUERY) + custom_client.query(MYSQL_QUERY) + custom_client.query(QUERY) + assert http_works() == True + assert http_works(8124) == False + + main_node.query("SYSTEM STOP LISTEN QUERIES CUSTOM") + + # START LISTEN QUERIES DEFAULT EXCEPT + backup_node.query( + "SYSTEM START LISTEN ON CLUSTER default QUERIES CUSTOM EXCEPT CUSTOM 'tcp'" + ) + main_node.query(QUERY) + main_node.query(MYSQL_QUERY) + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + assert http_works() == True + assert http_works(8124) == True + + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + assert_everything_works() diff --git a/tests/integration/test_table_functions_access_rights/test.py b/tests/integration/test_table_functions_access_rights/test.py index 09a05122c07..b1d1a291bc5 100644 --- a/tests/integration/test_table_functions_access_rights/test.py +++ b/tests/integration/test_table_functions_access_rights/test.py @@ -40,7 +40,7 @@ def test_merge(): instance.query("CREATE USER A") assert ( - "it's necessary to have grant CREATE TEMPORARY TABLE ON *.*" + "it's necessary to have the grant CREATE TEMPORARY TABLE ON *.*" in instance.query_and_get_error(select_query, user="A") ) @@ -62,7 +62,7 @@ def test_merge(): instance.query("GRANT SELECT ON default.table1 TO A") instance.query("GRANT INSERT ON default.table2 TO A") assert ( - "it's necessary to have grant SELECT ON default.table2" + "it's necessary to have the grant SELECT ON default.table2" in instance.query_and_get_error(select_query, user="A") ) diff --git a/tests/integration/test_temporary_data_in_cache/test.py b/tests/integration/test_temporary_data_in_cache/test.py index e6142c7eef1..ed06a70cf5a 100644 --- a/tests/integration/test_temporary_data_in_cache/test.py +++ b/tests/integration/test_temporary_data_in_cache/test.py @@ -2,6 +2,7 @@ # pylint: disable=redefined-outer-name import pytest +import fnmatch from helpers.cluster import ClickHouseCluster from helpers.client import QueryRuntimeException @@ -68,7 +69,9 @@ def test_cache_evicted_by_temporary_data(start_cluster): "max_bytes_before_external_sort": "4M", }, ) - assert "Failed to reserve space for the file cache" in str(exc.value) + assert fnmatch.fnmatch( + str(exc.value), "*Failed to reserve * for temporary file*" + ), exc.value # Some data evicted from cache by temporary data cache_size_after_eviction = get_cache_size() @@ -104,6 +107,8 @@ def test_cache_evicted_by_temporary_data(start_cluster): "SELECT randomPrintableASCII(1024) FROM numbers(32 * 1024) FORMAT TSV", params={"buffer_size": 0, "wait_end_of_query": 1}, ) - assert "Failed to reserve space for the file cache" in str(exc.value) + assert fnmatch.fnmatch( + str(exc.value), "*Failed to reserve * for temporary file*" + ), exc.value q("DROP TABLE IF EXISTS t1") diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py index ff8e7154d0d..a27bb472ea8 100644 --- a/tests/integration/test_throttling/test.py +++ b/tests/integration/test_throttling/test.py @@ -114,7 +114,11 @@ def node_update_config(mode, setting, value=None): def assert_took(took, should_took): - assert took >= should_took[0] * 0.9 and took < should_took[1] + # we need to decrease the lower limit because the server limits could + # be enforced by throttling some server background IO instead of query IO + # and we have no control over it + # Note that throttler does not apply any restrictions on upper bound, so we can only tell how much time required "at least", not "at most" + assert took >= should_took * 0.85 @pytest.mark.parametrize( @@ -129,7 +133,7 @@ def assert_took(took, should_took): None, None, None, - (0, 3), + 0, id="no_local_throttling", ), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds @@ -139,7 +143,7 @@ def assert_took(took, should_took): "user", "max_backup_bandwidth", "1M", - (7, 14), + 7, id="user_local_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -149,7 +153,7 @@ def assert_took(took, should_took): "server", "max_backup_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_local_throttling", ), # @@ -161,7 +165,7 @@ def assert_took(took, should_took): None, None, None, - (0, 3), + 0, id="no_remote_to_local_throttling", ), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds @@ -171,7 +175,7 @@ def assert_took(took, should_took): "user", "max_backup_bandwidth", "1M", - (7, 14), + 7, id="user_remote_to_local_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -181,7 +185,7 @@ def assert_took(took, should_took): "server", "max_backup_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_remote_to_local_throttling", ), # @@ -193,7 +197,7 @@ def assert_took(took, should_took): None, None, None, - (0, 3), + 0, id="no_remote_to_remote_throttling", ), # No throttling for S3-to-S3, uses native copy @@ -203,7 +207,7 @@ def assert_took(took, should_took): "user", "max_backup_bandwidth", "1M", - (0, 3), + 0, id="user_remote_to_remote_throttling", ), # No throttling for S3-to-S3, uses native copy @@ -213,7 +217,7 @@ def assert_took(took, should_took): "server", "max_backup_bandwidth_for_server", "2M", - (0, 3), + 0, id="server_remote_to_remote_throttling", ), # @@ -230,7 +234,7 @@ def assert_took(took, should_took): None, None, None, - (0, 3), + 0, id="no_local_to_remote_throttling", ), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds, but for S3Client it is 2x more @@ -240,7 +244,7 @@ def assert_took(took, should_took): "user", "max_backup_bandwidth", "1M", - (7 * 3, 7 * 4 - 1), + 7 * 3, id="user_local_to_remote_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds, but for S3Client it is 2x more @@ -250,7 +254,7 @@ def assert_took(took, should_took): "server", "max_backup_bandwidth_for_server", "2M", - (3 * 3, 3 * 5), + 3 * 3, id="server_local_to_remote_throttling", ), ], @@ -287,7 +291,7 @@ def test_backup_throttling_override(): }, ) # reading 1e6*8 bytes with 500Ki default bandwith should take (8-0.5)/0.5=15 seconds - assert_took(took, (15, 20)) + assert_took(took, 15) @pytest.mark.parametrize( @@ -296,14 +300,14 @@ def test_backup_throttling_override(): # # Local # - pytest.param("default", None, None, None, (0, 3), id="no_local_throttling"), + pytest.param("default", None, None, None, 0, id="no_local_throttling"), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds pytest.param( "default", "user", "max_local_read_bandwidth", "1M", - (7, 14), + 7, id="user_local_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -312,20 +316,20 @@ def test_backup_throttling_override(): "server", "max_local_read_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_local_throttling", ), # # Remote # - pytest.param("s3", None, None, None, (0, 3), id="no_remote_throttling"), + pytest.param("s3", None, None, None, 0, id="no_remote_throttling"), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds pytest.param( "s3", "user", "max_remote_read_network_bandwidth", "1M", - (7, 14), + 7, id="user_remote_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -334,7 +338,7 @@ def test_backup_throttling_override(): "server", "max_remote_read_network_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_remote_throttling", ), ], @@ -358,14 +362,14 @@ def test_read_throttling(policy, mode, setting, value, should_took): # # Local # - pytest.param("default", None, None, None, (0, 3), id="no_local_throttling"), + pytest.param("default", None, None, None, 0, id="no_local_throttling"), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds pytest.param( "default", "user", "max_local_write_bandwidth", "1M", - (7, 14), + 7, id="local_user_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -374,29 +378,29 @@ def test_read_throttling(policy, mode, setting, value, should_took): "server", "max_local_write_bandwidth_for_server", "2M", - (3, 7), + 3, id="local_server_throttling", ), # # Remote # - pytest.param("s3", None, None, None, (0, 3), id="no_remote_throttling"), - # writeing 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + pytest.param("s3", None, None, None, 0, id="no_remote_throttling"), + # writing 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds pytest.param( "s3", "user", "max_remote_write_network_bandwidth", "1M", - (7, 14), + 7, id="user_remote_throttling", ), - # writeing 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # writing 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds pytest.param( "s3", "server", "max_remote_write_network_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_remote_throttling", ), ], diff --git a/tests/integration/test_tlsv1_3/test.py b/tests/integration/test_tlsv1_3/test.py index f5c2be51ed7..87c03c56f91 100644 --- a/tests/integration/test_tlsv1_3/test.py +++ b/tests/integration/test_tlsv1_3/test.py @@ -96,7 +96,9 @@ def test_https_wrong_cert(): with pytest.raises(Exception) as err: execute_query_https("SELECT currentUser()", user="john", cert_name="wrong") err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning(f"Failed attempt with wrong cert, err: {err_str}") continue @@ -202,7 +204,9 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: peter, err: {err_str}" @@ -222,7 +226,9 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: jane, err: {err_str}" diff --git a/tests/integration/test_transactions/test.py b/tests/integration/test_transactions/test.py index a12d30915dd..46660581223 100644 --- a/tests/integration/test_transactions/test.py +++ b/tests/integration/test_transactions/test.py @@ -105,8 +105,6 @@ def test_rollback_unfinished_on_restart1(start_cluster): "0_4_4_0_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n" "0_8_8_0\t0\ttid5\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_1_1_0\t0\ttid0\tcsn1_\ttid1\tcsn_1\n" - "1_1_1_1\t1\ttid1\tcsn_1\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" - "1_1_1_1_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_3_3_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" "1_3_3_0_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_5_5_0\t1\ttid6\tcsn_6\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" @@ -194,6 +192,5 @@ def test_rollback_unfinished_on_restart2(start_cluster): "0_4_4_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" "0_5_5_0\t0\ttid5\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_1_1_0\t0\ttid0\tcsn1_\ttid1\tcsn_1\n" - "1_1_1_1\t1\ttid1\tcsn_1\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" "1_3_3_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" ) diff --git a/tests/integration/test_truncate_database/__init__.py b/tests/integration/test_truncate_database/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_truncate_database/configs/distributed_servers.xml b/tests/integration/test_truncate_database/configs/distributed_servers.xml new file mode 100644 index 00000000000..68b420f36b4 --- /dev/null +++ b/tests/integration/test_truncate_database/configs/distributed_servers.xml @@ -0,0 +1,18 @@ + + + + + + node1 + 9000 + + + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_truncate_database/configs/replicated_servers.xml b/tests/integration/test_truncate_database/configs/replicated_servers.xml new file mode 100644 index 00000000000..8e318d385c3 --- /dev/null +++ b/tests/integration/test_truncate_database/configs/replicated_servers.xml @@ -0,0 +1,21 @@ + + + + + true + + node1 + 9000 + + + node2 + 9000 + + + node3 + 9000 + + + + + diff --git a/tests/integration/test_truncate_database/test_distributed.py b/tests/integration/test_truncate_database/test_distributed.py new file mode 100644 index 00000000000..fed3d16d190 --- /dev/null +++ b/tests/integration/test_truncate_database/test_distributed.py @@ -0,0 +1,53 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "node1", main_configs=["configs/distributed_servers.xml"], with_zookeeper=True +) +node2 = cluster.add_instance( + "node2", with_zookeeper=True, main_configs=["configs/distributed_servers.xml"] +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + for node in (node1, node2): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.local_table(id UInt32, val String) ENGINE = MergeTree ORDER BY id + """ + ) + + node1.query("INSERT INTO test.local_table VALUES (1, 'node1')") + node2.query("INSERT INTO test.local_table VALUES (2, 'node2')") + + node1.query( + "CREATE TABLE test.distributed(id UInt32, val String) ENGINE = Distributed(test_cluster, test, local_table)" + ) + node2.query( + "CREATE TABLE test.distributed(id UInt32, val String) ENGINE = Distributed(test_cluster, test, local_table)" + ) + + yield cluster + + finally: + cluster.shutdown() + + +def test_truncate_database_distributed(started_cluster): + query1 = "SELECT count() FROM test.distributed WHERE (id, val) IN ((1, 'node1'), (2, 'a'), (3, 'b'))" + query2 = "SELECT sum((id, val) IN ((1, 'node1'), (2, 'a'), (3, 'b'))) FROM test.distributed" + assert node1.query(query1) == "1\n" + assert node1.query(query2) == "1\n" + assert node2.query(query1) == "1\n" + assert node2.query(query2) == "1\n" + assert node2.query("SHOW DATABASES LIKE 'test'") == "test\n" + node1.query("TRUNCATE DATABASE test ON CLUSTER test_cluster SYNC") + assert node2.query("SHOW TABLES FROM test") == "" diff --git a/tests/integration/test_truncate_database/test_replicated.py b/tests/integration/test_truncate_database/test_replicated.py new file mode 100644 index 00000000000..59830d44378 --- /dev/null +++ b/tests/integration/test_truncate_database/test_replicated.py @@ -0,0 +1,56 @@ +import time + +import pytest +from helpers.cluster import ClickHouseCluster + + +def fill_nodes(nodes, shard): + for node in nodes: + node.query( + """ + CREATE DATABASE test; + + CREATE TABLE test.test_table(date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{shard}/replicated/test_table', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date); + """.format( + shard=shard, replica=node.name + ) + ) + + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", with_zookeeper=True, main_configs=["configs/replicated_servers.xml"] +) +node2 = cluster.add_instance( + "node2", with_zookeeper=True, main_configs=["configs/replicated_servers.xml"] +) +node3 = cluster.add_instance( + "node3", with_zookeeper=True, main_configs=["configs/replicated_servers.xml"] +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + fill_nodes([node1, node2, node3], 1) + + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + + +def test_truncate_database_replicated(start_cluster): + node1.query( + "INSERT INTO test.test_table SELECT number, toString(number) FROM numbers(100)" + ) + assert node2.query("SELECT id FROM test.test_table LIMIT 1") == "0\n" + assert node3.query("SHOW DATABASES LIKE 'test'") == "test\n" + node3.query("TRUNCATE DATABASE test ON CLUSTER test_cluster SYNC") + assert node2.query("SHOW TABLES FROM test") == "" diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index a2f28e21666..c1c076277bb 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -1529,106 +1529,6 @@ def test_concurrent_alter_with_ttl_move(started_cluster, name, engine): node1.query("DROP TABLE IF EXISTS {name} SYNC".format(name=name)) -@pytest.mark.skip(reason="Flacky test") -@pytest.mark.parametrize( - "name,positive", - [ - pytest.param("test_double_move_while_select_negative", 0, id="negative"), - pytest.param("test_double_move_while_select_positive", 1, id="positive"), - ], -) -def test_double_move_while_select(started_cluster, name, positive): - name = unique_table_name(name) - - try: - node1.query( - """ - CREATE TABLE {name} ( - n Int64, - s String - ) ENGINE = MergeTree - ORDER BY tuple() - PARTITION BY n - SETTINGS storage_policy='small_jbod_with_external',temporary_directories_lifetime=1 - """.format( - name=name - ) - ) - - node1.query( - "INSERT INTO {name} VALUES (1, randomPrintableASCII(10*1024*1024))".format( - name=name - ) - ) - - parts = node1.query( - "SELECT name FROM system.parts WHERE table = '{name}' AND active = 1".format( - name=name - ) - ).splitlines() - assert len(parts) == 1 - - node1.query( - "ALTER TABLE {name} MOVE PART '{part}' TO DISK 'external'".format( - name=name, part=parts[0] - ) - ) - - def long_select(): - if positive: - node1.query( - "SELECT sleep(3), sleep(2), sleep(1), n FROM {name}".format( - name=name - ) - ) - - thread = threading.Thread(target=long_select) - thread.start() - - time.sleep(1) - - node1.query( - "ALTER TABLE {name} MOVE PART '{part}' TO DISK 'jbod1'".format( - name=name, part=parts[0] - ) - ) - - # Fill jbod1 to force ClickHouse to make move of partition 1 to external. - node1.query( - "INSERT INTO {name} VALUES (2, randomPrintableASCII(9*1024*1024))".format( - name=name - ) - ) - node1.query( - "INSERT INTO {name} VALUES (3, randomPrintableASCII(9*1024*1024))".format( - name=name - ) - ) - node1.query( - "INSERT INTO {name} VALUES (4, randomPrintableASCII(9*1024*1024))".format( - name=name - ) - ) - - wait_parts_mover(node1, name, retry_count=40) - - # If SELECT locked old part on external, move shall fail. - assert node1.query( - "SELECT disk_name FROM system.parts WHERE table = '{name}' AND active = 1 AND name = '{part}'".format( - name=name, part=parts[0] - ) - ).splitlines() == ["jbod1" if positive else "external"] - - thread.join() - - assert node1.query( - "SELECT n FROM {name} ORDER BY n".format(name=name) - ).splitlines() == ["1", "2", "3", "4"] - - finally: - node1.query("DROP TABLE IF EXISTS {name} SYNC".format(name=name)) - - @pytest.mark.parametrize( "name,engine,positive", [ diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index 3b031569b8a..117ebe37dd2 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -23,6 +23,7 @@ node4 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) node5 = cluster.add_instance( @@ -35,6 +36,7 @@ node5 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) node6 = cluster.add_instance( "node6", @@ -46,6 +48,7 @@ node6 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) diff --git a/tests/integration/test_user_directories/test.py b/tests/integration/test_user_directories/test.py index 45afb86f464..704fb30b2fd 100644 --- a/tests/integration/test_user_directories/test.py +++ b/tests/integration/test_user_directories/test.py @@ -38,14 +38,14 @@ def test_old_style(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users2.xml"}', 1, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access2\\\\/"}', 2, ], @@ -62,20 +62,20 @@ def test_local_directories(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users3.xml"}', 1, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access3\\\\/"}', 2, ], [ "local directory (ro)", - "local directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access3-ro\\\\/","readonly":true}', 3, ], @@ -92,8 +92,8 @@ def test_relative_path(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users4.xml"}', 1, ] @@ -110,8 +110,8 @@ def test_memory(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users5.xml"}', 1, ], @@ -129,20 +129,20 @@ def test_mixed_style(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users6.xml"}', 1, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access6\\\\/"}', 2, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access6a\\\\/"}', 3, ], @@ -160,14 +160,14 @@ def test_duplicates(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users7.xml"}', 1, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access7\\\\/"}', 2, ], diff --git a/tests/integration/test_version_update/test.py b/tests/integration/test_version_update/test.py index 3332fe69e86..b8fa3e7ebb4 100644 --- a/tests/integration/test_version_update/test.py +++ b/tests/integration/test_version_update/test.py @@ -15,6 +15,7 @@ node2 = cluster.add_instance( tag="21.2", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) # Use differents nodes because if there is node.restart_from_latest_version(), then in later tests @@ -25,6 +26,7 @@ node3 = cluster.add_instance( tag="21.5", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) node4 = cluster.add_instance( "node4", @@ -32,6 +34,7 @@ node4 = cluster.add_instance( tag="21.5", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) node5 = cluster.add_instance( "node5", @@ -39,6 +42,7 @@ node5 = cluster.add_instance( tag="21.5", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) node6 = cluster.add_instance( "node6", @@ -46,6 +50,7 @@ node6 = cluster.add_instance( tag="21.5", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index 416220c93c3..f3ae190ee46 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -16,6 +16,7 @@ node1 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -27,6 +28,7 @@ node2 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) node3 = cluster.add_instance( "node3", @@ -38,6 +40,7 @@ node3 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) @@ -51,12 +54,6 @@ def start_cluster(): cluster.shutdown() -def restart_node(node): - # set force_remove_data_recursively_on_drop (cannot be done before, because the version is too old) - node.put_users_config("configs/force_remove_data_recursively_on_drop.xml") - node.restart_with_latest_version(signal=9, fix_metadata=True) - - def test_mutate_and_upgrade(start_cluster): for node in [node1, node2]: node.query("DROP TABLE IF EXISTS mt") @@ -73,9 +70,10 @@ def test_mutate_and_upgrade(start_cluster): node2.query("DETACH TABLE mt") # stop being leader node1.query("DETACH TABLE mt") # stop being leader - - restart_node(node1) - restart_node(node2) + node1.query("SYSTEM FLUSH LOGS") + node2.query("SYSTEM FLUSH LOGS") + node1.restart_with_latest_version(signal=9, fix_metadata=True) + node2.restart_with_latest_version(signal=9, fix_metadata=True) # After hard restart table can be in readonly mode exec_query_with_retry( @@ -131,7 +129,7 @@ def test_upgrade_while_mutation(start_cluster): # (We could be in process of creating some system table, which will leave empty directory on restart, # so when we start moving system tables from ordinary to atomic db, it will complain about some undeleted files) node3.query("SYSTEM FLUSH LOGS") - restart_node(node3) + node3.restart_with_latest_version(signal=9, fix_metadata=True) # checks for readonly exec_query_with_retry(node3, "OPTIMIZE TABLE mt1", sleep_time=5, retry_count=60) diff --git a/tests/integration/test_wrong_db_or_table_name/__init__.py b/tests/integration/test_wrong_db_or_table_name/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_wrong_db_or_table_name/test.py b/tests/integration/test_wrong_db_or_table_name/test.py new file mode 100644 index 00000000000..68af383b6c3 --- /dev/null +++ b/tests/integration/test_wrong_db_or_table_name/test.py @@ -0,0 +1,108 @@ +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node") + + +@pytest.fixture(scope="module") +def start(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_wrong_database_name(start): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.table_test (i Int64) ENGINE=Memory; + INSERT INTO test.table_test SELECT 1; + """ + ) + + with pytest.raises( + QueryRuntimeException, + match="DB::Exception: Database tes does not exist. Maybe you meant test?.", + ): + node.query("SELECT * FROM tes.table_test LIMIT 1;") + assert int(node.query("SELECT count() FROM test.table_test;")) == 1 + node.query( + """ + DROP TABLE test.table_test; + DROP DATABASE test; + """ + ) + + +def test_drop_wrong_database_name(start): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.table_test (i Int64) ENGINE=Memory; + INSERT INTO test.table_test SELECT 1; + """ + ) + + with pytest.raises( + QueryRuntimeException, + match="DB::Exception: Database tes does not exist. Maybe you meant test?.", + ): + node.query("DROP DATABASE tes;") + assert int(node.query("SELECT count() FROM test.table_test;")) == 1 + node.query("DROP DATABASE test;") + + +def test_wrong_table_name(start): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.table_test (i Int64) ENGINE=Memory; + CREATE TABLE test.table_test2 (i Int64) ENGINE=Memory; + INSERT INTO test.table_test SELECT 1; + """ + ) + with pytest.raises( + QueryRuntimeException, + match="DB::Exception: Table test.table_test1 does not exist. Maybe you meant table_test?.", + ): + node.query( + """ + SELECT * FROM test.table_test1 LIMIT 1; + """ + ) + assert int(node.query("SELECT count() FROM test.table_test;")) == 1 + node.query( + """ + DROP TABLE test.table_test; + DROP TABLE test.table_test2; + DROP DATABASE test; + """ + ) + + +def test_drop_wrong_table_name(start): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.table_test (i Int64) ENGINE=Memory; + INSERT INTO test.table_test SELECT 1; + """ + ) + + with pytest.raises( + QueryRuntimeException, + match="DB::Exception: Table test.table_tes does not exist. Maybe you meant table_test?.", + ): + node.query("DROP TABLE test.table_tes;") + assert int(node.query("SELECT count() FROM test.table_test;")) == 1 + node.query( + """ + DROP TABLE test.table_test; + DROP DATABASE test; + """ + ) diff --git a/tests/integration/test_zero_copy_fetch/configs/users.xml b/tests/integration/test_zero_copy_fetch/configs/users.xml new file mode 100644 index 00000000000..b0990ca3a60 --- /dev/null +++ b/tests/integration/test_zero_copy_fetch/configs/users.xml @@ -0,0 +1,7 @@ + + + + 0 + + + diff --git a/tests/integration/test_zero_copy_fetch/test.py b/tests/integration/test_zero_copy_fetch/test.py index 4f3d42096c3..dc79e5d8723 100644 --- a/tests/integration/test_zero_copy_fetch/test.py +++ b/tests/integration/test_zero_copy_fetch/test.py @@ -19,12 +19,14 @@ def started_cluster(): cluster.add_instance( "node1", main_configs=["configs/storage_conf.xml"], + user_configs=["configs/users.xml"], with_minio=True, with_zookeeper=True, ) cluster.add_instance( "node2", main_configs=["configs/storage_conf.xml"], + user_configs=["configs/users.xml"], with_minio=True, with_zookeeper=True, ) diff --git a/tests/integration/test_zookeeper_config/test.py b/tests/integration/test_zookeeper_config/test.py index 65f82c2286b..0c0f77ec597 100644 --- a/tests/integration/test_zookeeper_config/test.py +++ b/tests/integration/test_zookeeper_config/test.py @@ -2,6 +2,7 @@ import time import pytest import logging from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster( __file__, zookeeper_config_path="configs/zookeeper_config_root_a.xml" @@ -56,10 +57,10 @@ def test_chroot_with_same_root(started_cluster): for j in range(2): # Second insert to test deduplication node.query("INSERT INTO simple VALUES ({0}, {0})".format(i)) - time.sleep(1) - - assert node1.query("select count() from simple").strip() == "2" - assert node2.query("select count() from simple").strip() == "2" + node1.query("SYSTEM SYNC REPLICA simple") + assert_eq_with_retry(node1, "select count() from simple", "2") + node2.query("SYSTEM SYNC REPLICA simple") + assert_eq_with_retry(node2, "select count() from simple", "2") def test_chroot_with_different_root(started_cluster): @@ -76,5 +77,7 @@ def test_chroot_with_different_root(started_cluster): for j in range(2): # Second insert to test deduplication node.query("INSERT INTO simple_different VALUES ({0}, {0})".format(i)) - assert node1.query("select count() from simple_different").strip() == "1" - assert node3.query("select count() from simple_different").strip() == "1" + node1.query("SYSTEM SYNC REPLICA simple_different") + assert_eq_with_retry(node1, "select count() from simple_different", "1") + node3.query("SYSTEM SYNC REPLICA simple_different") + assert_eq_with_retry(node3, "select count() from simple_different", "1") diff --git a/tests/integration/test_zookeeper_fallback_session/__init__.py b/tests/integration/test_zookeeper_fallback_session/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_zookeeper_fallback_session/configs/remote_servers.xml b/tests/integration/test_zookeeper_fallback_session/configs/remote_servers.xml new file mode 100644 index 00000000000..63fdcea5dab --- /dev/null +++ b/tests/integration/test_zookeeper_fallback_session/configs/remote_servers.xml @@ -0,0 +1,23 @@ + + + + + + node1 + 9000 + + + + node2 + 9000 + + + + node3 + 9000 + + + + + + diff --git a/tests/integration/test_zookeeper_fallback_session/configs/zookeeper_load_balancing.xml b/tests/integration/test_zookeeper_fallback_session/configs/zookeeper_load_balancing.xml new file mode 100644 index 00000000000..b0844ab4e73 --- /dev/null +++ b/tests/integration/test_zookeeper_fallback_session/configs/zookeeper_load_balancing.xml @@ -0,0 +1,23 @@ + + + + in_order + + 2 + 4 + + + zoo1 + 2181 + + + zoo2 + 2181 + + + zoo3 + 2181 + + 500 + + diff --git a/tests/integration/test_zookeeper_fallback_session/test.py b/tests/integration/test_zookeeper_fallback_session/test.py new file mode 100644 index 00000000000..570eca4f0a6 --- /dev/null +++ b/tests/integration/test_zookeeper_fallback_session/test.py @@ -0,0 +1,101 @@ +import pytest +from helpers.cluster import ClickHouseCluster, ClickHouseInstance +from helpers.network import PartitionManager + + +cluster = ClickHouseCluster( + __file__, zookeeper_config_path="configs/zookeeper_load_balancing.xml" +) + +node1 = cluster.add_instance( + "node1", + with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_load_balancing.xml"], +) +node2 = cluster.add_instance( + "node2", + with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_load_balancing.xml"], +) +node3 = cluster.add_instance( + "node3", + with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_load_balancing.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + for node in [node1, node2, node3]: + node.query("DROP TABLE IF EXISTS simple SYNC") + node.query( + """ + CREATE TABLE simple (date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id; + """.format( + replica=node.name + ) + ) + yield cluster + finally: + cluster.shutdown() + + +def assert_uses_zk_node(node: ClickHouseInstance, zk_node): + def check_callback(host): + return host.strip() == expected_zk_ip_addr + + expected_zk_ip_addr = node.cluster.get_instance_ip(zk_node) + + host = node.query_with_retry( + "select host from system.zookeeper_connection", check_callback=check_callback + ) + assert host.strip() == expected_zk_ip_addr + + +def test_fallback_session(started_cluster: ClickHouseCluster): + # only leave connecting to zoo3 possible + with PartitionManager() as pm: + for node in started_cluster.instances.values(): + for zk in ["zoo1", "zoo2"]: + pm._add_rule( + { + "source": node.ip_address, + "destination": cluster.get_instance_ip(zk), + "action": "REJECT --reject-with tcp-reset", + } + ) + + for node in [node1, node2, node3]: + # all nodes will have to switch to zoo3 + assert_uses_zk_node(node, "zoo3") + + node1.query_with_retry("INSERT INTO simple VALUES ({0}, {0})".format(1)) + + # and replication still works + for node in [node2, node3]: + assert ( + node.query_with_retry( + "SELECT count() from simple", + check_callback=lambda count: count.strip() == "1", + ) + == "1\n" + ) + + # at this point network partitioning has been reverted. + # the nodes should switch to zoo1 automatically because of `in_order` load-balancing. + # otherwise they would connect to a random replica + for node in [node1, node2, node3]: + assert_uses_zk_node(node, "zoo1") + + node1.query_with_retry("INSERT INTO simple VALUES ({0}, {0})".format(2)) + for node in [node2, node3]: + assert ( + node.query_with_retry( + "SELECT count() from simple", + check_callback=lambda count: count.strip() == "2", + ) + == "2\n" + ) diff --git a/tests/performance/README.md b/tests/performance/README.md index c0c055bba97..f554e96203b 100644 --- a/tests/performance/README.md +++ b/tests/performance/README.md @@ -4,11 +4,11 @@ This directory contains `.xml`-files with performance tests for @akuzm tool. ### How to write performance test -First of all you should check existing tests don't cover your case. If there are no such tests than you should write your own. +First of all you should check existing tests don't cover your case. If there are no such tests then you should write your own. You can use `substitions`, `create`, `fill` and `drop` queries to prepare test. You can find examples in this folder. -If your test continued more than 10 minutes, please, add tag `long` to have an opportunity to run all tests and skip long ones. +If your test takes more than 10 minutes, please, add tag `long` to have an opportunity to run all tests and skip long ones. ### How to run performance test diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml new file mode 100644 index 00000000000..91763c69bb9 --- /dev/null +++ b/tests/performance/aggregate_with_serialized_method.xml @@ -0,0 +1,32 @@ + + + 8 + 0 + 4 + + + + CREATE TABLE t_nullable + ( + key_string1 Nullable(String), + key_string2 Nullable(String), + key_string3 Nullable(String), + key_int64_1 Nullable(Int64), + key_int64_2 Nullable(Int64), + key_int64_3 Nullable(Int64), + key_int64_4 Nullable(Int64), + key_int64_5 Nullable(Int64), + m1 Int64, + m2 Int64 + ) + ENGINE = Memory + + insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%10+1, number%10+2, number%10+3, number%10+4,number%10+5, number%6000+1, number%5000+2 from numbers_mt(20000000) + select key_string1,key_string2,key_string3, min(m1) from t_nullable group by key_string1,key_string2,key_string3 + select key_string3,key_int64_1,key_int64_2, min(m1) from t_nullable group by key_string3,key_int64_1,key_int64_2 + select key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5, min(m1) from t_nullable group by key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5 + select toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5), min(m1) from t_nullable group by toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5) limit 10 + select toDecimal64(key_int64_1, 3),toDecimal64(key_int64_2, 3),toDecimal64(key_int64_3, 3),toDecimal64(key_int64_4, 3),toDecimal64(key_int64_5, 3), min(m1) from t_nullable group by toDecimal64(key_int64_1, 3),toDecimal64(key_int64_2, 3),toDecimal64(key_int64_3, 3),toDecimal64(key_int64_4, 3),toDecimal64(key_int64_5, 3) limit 10 + + drop table if exists t_nullable + \ No newline at end of file diff --git a/tests/performance/count_from_formats.xml b/tests/performance/count_from_formats.xml new file mode 100644 index 00000000000..c55d6f56047 --- /dev/null +++ b/tests/performance/count_from_formats.xml @@ -0,0 +1,68 @@ + + + + 0 + + + + + format + + TabSeparated + TabSeparatedWithNames + TabSeparatedWithNamesAndTypes + CSV + CSVWithNames + CSVWithNamesAndTypes + CustomSeparated + CustomSeparatedWithNames + CustomSeparatedWithNamesAndTypes + Values + JSONEachRow + JSONCompactEachRow + JSONCompactEachRowWithNames + JSONCompactEachRowWithNamesAndTypes + JSON + JSONCompact + JSONColumns + JSONCompactColumns + JSONColumnsWithMetadata + JSONObjectEachRow + BSONEachRow + TSKV + Avro + MsgPack + + + + format_fast + + Protobuf + ProtobufList + CapnProto + ORC + Parquet + + + + +CREATE TABLE IF NOT EXISTS table_{format} ENGINE = File({format}) AS test.hits +CREATE TABLE IF NOT EXISTS table_{format_fast} (s String) ENGINE = File({format_fast}) + +INSERT INTO table_{format} SELECT * FROM test.hits LIMIT 100000 SETTINGS engine_file_truncate_on_insert=1 +INSERT INTO table_{format_fast} SELECT randomString(1000) FROM numbers(1000000) SETTINGS engine_file_truncate_on_insert=1 + +SELECT count() FROM table_{format} FORMAT Null +SELECT count() FROM table_{format_fast} FORMAT Null + +SELECT count() FROM table_{format} group by _file, _path FORMAT Null +SELECT count() FROM table_{format_fast} group by _file, _path FORMAT Null + +SELECT _path, _file FROM table_{format} group by _file, _path FORMAT Null +SELECT _path, _file FROM table_{format_fast} group by _file, _path FORMAT Null + +DROP TABLE IF EXISTS table_{format} +DROP TABLE IF EXISTS table_{format_fast} + + + diff --git a/tests/performance/duplicate_order_by_and_distinct.xml b/tests/performance/duplicate_order_by_and_distinct.xml deleted file mode 100644 index e36bc470512..00000000000 --- a/tests/performance/duplicate_order_by_and_distinct.xml +++ /dev/null @@ -1,8 +0,0 @@ - - 1 - - - SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY EventDate, CounterID FORMAT Null - SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single) FORMAT Null - SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY toStartOfWeek(EventDate) FORMAT Null - diff --git a/tests/performance/encrypt_decrypt_empty_string_slow.xml b/tests/performance/encrypt_decrypt_empty_string_slow.xml deleted file mode 100644 index 4218f377c8c..00000000000 --- a/tests/performance/encrypt_decrypt_empty_string_slow.xml +++ /dev/null @@ -1,55 +0,0 @@ - - - - - - func - - - encrypt('aes-128-cbc', materialize(plaintext), key16, iv16) - encrypt('aes-128-ecb', materialize(plaintext), key16) - encrypt('aes-128-gcm', materialize(plaintext), key16, iv12, 'aadaadaadaad') - - encrypt('aes-192-cbc', materialize(plaintext), key24, iv16) - encrypt('aes-192-ecb', materialize(plaintext), key24) - encrypt('aes-192-gcm', materialize(plaintext), key24, iv12, 'aadaadaadaad') - - encrypt('aes-256-cbc', materialize(plaintext), key32, iv16) - encrypt('aes-256-ecb', materialize(plaintext), key32) - encrypt('aes-256-gcm', materialize(plaintext), key32, iv12, 'aadaadaadaad') - - - decrypt('aes-128-cbc', encrypt('aes-128-cbc', materialize(plaintext), key16, iv16), key16, iv16) - decrypt('aes-128-ecb', encrypt('aes-128-ecb', materialize(plaintext), key16), key16) - decrypt('aes-128-gcm', encrypt('aes-128-gcm', materialize(plaintext), key16, iv12, 'aadaadaadaad'), key16, iv12, 'aadaadaadaad') - - decrypt('aes-192-cbc', encrypt('aes-192-cbc', materialize(plaintext), key24, iv16), key24, iv16) - decrypt('aes-192-ecb', encrypt('aes-192-ecb', materialize(plaintext), key24), key24) - decrypt('aes-192-gcm', encrypt('aes-192-gcm', materialize(plaintext), key24, iv12, 'aadaadaadaad'), key24, iv12, 'aadaadaadaad') - - decrypt('aes-256-cbc', encrypt('aes-256-cbc', materialize(plaintext), key32, iv16), key32, iv16) - decrypt('aes-256-ecb', encrypt('aes-256-ecb', materialize(plaintext), key32), key32) - decrypt('aes-256-gcm', encrypt('aes-256-gcm', materialize(plaintext), key32, iv12, 'aadaadaadaad'), key32, iv12, 'aadaadaadaad') - - - - - table - - numbers(2000000) - - - - plaintext - - '' - - - - - - WITH {plaintext} as plaintext, repeat('k', 32) as key32, substring(key32, 1, 24) as key24, substring(key32, 1, 16) as key16, repeat('iv', 8) as iv16, substring(iv16, 1, 12) as iv12 SELECT count() FROM {table} WHERE NOT ignore({func}) LIMIT 1 - - WITH {plaintext} as plaintext, repeat('k', 32) as key32, substring(key32, 1, 24) as key24, substring(key32, 1, 16) as key16, repeat('iv', 8) as iv16, substring(iv16, 1, 12) as iv12 SELECT count() FROM {table} WHERE NOT ignore({func}) - diff --git a/tests/performance/join_filter_pushdown.xml b/tests/performance/join_filter_pushdown.xml new file mode 100644 index 00000000000..3adbbb3029e --- /dev/null +++ b/tests/performance/join_filter_pushdown.xml @@ -0,0 +1,9 @@ + + create table t(a UInt64) engine=MergeTree order by tuple() + insert into t select * from numbers_mt(5e6) + + select * from t as t0 inner join t as t1 using(a) where t1.a = 100 + + drop table t + + diff --git a/tests/performance/lower_upper_function.xml b/tests/performance/lower_upper_function.xml deleted file mode 100644 index 1b84a334ace..00000000000 --- a/tests/performance/lower_upper_function.xml +++ /dev/null @@ -1,11 +0,0 @@ - - select lower(randomString(16)) - select lower(randomString(32)) - select lower(randomString(64)) - select lower(randomString(128)) - select lower(randomString(256)) - select lower(randomString(512)) - select lower(randomString(1024)) - select lower(randomString(832)) - select lower(randomString(416)) - diff --git a/tests/performance/parquet_filter.xml b/tests/performance/parquet_filter.xml new file mode 100644 index 00000000000..27bcb15ee5e --- /dev/null +++ b/tests/performance/parquet_filter.xml @@ -0,0 +1,9 @@ + + create table if not exists t (key UInt64, value String) engine = File(Parquet) settings output_format_parquet_use_custom_encoder=1, output_format_parquet_row_group_size=100000 + + insert into t select number, toString(number) from numbers(2000000) settings max_threads=16, max_insert_threads=16, max_insert_block_size=100000, max_block_size=100000 + + select sum(cityHash64(*)) from t where key between 1050000 and 1150000 settings max_threads=1 + + drop table if exists t + diff --git a/tests/performance/prepare_hash_before_merge.xml b/tests/performance/prepare_hash_before_merge.xml new file mode 100644 index 00000000000..a96d5d9f95c --- /dev/null +++ b/tests/performance/prepare_hash_before_merge.xml @@ -0,0 +1,6 @@ + + SELECT COUNT(DISTINCT Title) FROM test.hits SETTINGS max_threads = 24 + SELECT COUNT(DISTINCT Title) FROM test.hits SETTINGS max_threads = 56 + SELECT COUNT(DISTINCT Title) FROM test.hits SETTINGS max_threads = 64 + SELECT COUNT(DISTINCT Referer) FROM test.hits SETTINGS max_threads = 22 + diff --git a/tests/performance/storage_join_direct_join.xml b/tests/performance/storage_join_direct_join.xml new file mode 100644 index 00000000000..2fc63c2c926 --- /dev/null +++ b/tests/performance/storage_join_direct_join.xml @@ -0,0 +1,19 @@ + + + 1 + + + CREATE TABLE keys (key UInt64) ENGINE = MergeTree ORDER BY key; + CREATE TABLE dict (key UInt64, value1 UInt64, value2 Float64, value3 String, + value4 String, value5 String, value6 String, value7 String, value8 String, value9 String, + value10 String) ENGINE = Join(ANY, LEFT, key); + + INSERT INTO keys SELECT rand() FROM numbers(10000000); + INSERT INTO dict SELECT rand(), rand()%1000, rand()*0.0001, toString(number), + toString(number), toString(number), toString(number), toString(number), toString(number), + toString(number), toString(number) FROM numbers(1000000); + + SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; + SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null SETTINGS + allow_experimental_analyzer=1 + \ No newline at end of file diff --git a/tests/performance/uniq_to_count.xml b/tests/performance/uniq_to_count.xml new file mode 100644 index 00000000000..64e4cf1cc0d --- /dev/null +++ b/tests/performance/uniq_to_count.xml @@ -0,0 +1,8 @@ + + select uniq(number) from (select DISTINCT number from numbers(1000000)) + select uniq(number) from (select number from numbers(1000000) group by number) + + + select uniq(number) from (select DISTINCT number from numbers(1000000)) SETTINGS allow_experimental_analyzer=1 + select uniq(number) from (select number from numbers(1000000) group by number) SETTINGS allow_experimental_analyzer=1 + diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference index 11b660b54a3..00a2cd14700 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference @@ -1,5 +1,6 @@ runtime messages 0.001 runtime exceptions 0.05 +unknown runtime exceptions 0.01 messages shorter than 10 1 messages shorter than 16 3 exceptions shorter than 30 3 diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index 86fe01dc0e3..f4ec9b79a4c 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -9,14 +9,21 @@ create view logs as select * from system.text_log where now() - toIntervalMinute -- Check that we don't have too many messages formatted with fmt::runtime or strings concatenation. -- 0.001 threshold should be always enough, the value was about 0.00025 -select 'runtime messages', max2(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.001) from logs; +select 'runtime messages', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.001) from logs + where message not like '% Received from %clickhouse-staging.com:9440%'; -- Check the same for exceptions. The value was 0.03 -select 'runtime exceptions', max2(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.05) from logs where message like '%DB::Exception%'; +select 'runtime exceptions', max2(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.05) from logs + where (message like '%DB::Exception%' or message like '%Coordination::Exception%') + and message not like '% Received from %clickhouse-staging.com:9440%'; + +select 'unknown runtime exceptions', max2(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.01) from logs where + (message like '%DB::Exception%' or message like '%Coordination::Exception%') + and message not like '% Received from %' and message not like '%(SYNTAX_ERROR)%'; -- FIXME some of the following messages are not informative and it has to be fixed create temporary table known_short_messages (s String) as select * from (select -['', '({}) Keys: {}', '({}) {}', 'Aggregating', 'Became leader', 'Cleaning queue', +['', '{} ({})', '({}) Keys: {}', '({}) {}', 'Aggregating', 'Became leader', 'Cleaning queue', 'Creating set.', 'Cyclic aliases', 'Detaching {}', 'Executing {}', 'Fire events: {}', 'Found part {}', 'Loaded queue', 'No sharding key', 'No tables', 'Query: {}', 'Removed', 'Removed part {}', 'Removing parts.', 'Request URI: {}', 'Sending part {}', @@ -32,11 +39,13 @@ create temporary table known_short_messages (s String) as select * from (select 'brotli decode error{}', 'Invalid H3 index: {}', 'Too large node state size', 'No additional keys found.', 'Attempt to read after EOF.', 'Replication was stopped', '{} building file infos', 'Cannot parse uuid {}', 'Query was cancelled', 'Cancelled merging parts', 'Cancelled mutating parts', 'Log pulling is cancelled', -'Transaction was cancelled', 'Could not find table: {}', 'Table {} doesn''t exist', -'Database {} doesn''t exist', 'Dictionary ({}) not found', 'Unknown table function {}', +'Transaction was cancelled', 'Could not find table: {}', 'Table {} does not exist', +'Database {} does not exist', 'Dictionary ({}) not found', 'Unknown table function {}', 'Unknown format {}', 'Unknown explain kind ''{}''', 'Unknown setting {}', 'Unknown input format {}', 'Unknown identifier: ''{}''', 'User name is empty', 'Expected function, got: {}', -'Attempt to read after eof', 'String size is too big ({}), maximum: {}', 'API mode: {}' +'Attempt to read after eof', 'String size is too big ({}), maximum: {}', +'Processed: {}%', 'Creating {}: {}', 'Table {}.{} doesn''t exist', 'Invalid cache key hex: {}', +'User has been dropped', 'Illegal type {} of argument of function {}. Should be DateTime or DateTime64' ] as arr) array join arr; -- Check that we don't have too many short meaningless message patterns. @@ -48,7 +57,7 @@ select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_st -- Unlike above, here we look at length of the formatted message, not format string. Most short format strings are fine because they end up decorated with context from outer or inner exceptions, e.g.: -- "Expected end of line" -> "Code: 117. DB::Exception: Expected end of line: (in file/uri /var/lib/clickhouse/user_files/data_02118): (at row 1)" -- But we have to cut out the boilerplate, e.g.: --- "Code: 60. DB::Exception: Table default.a doesn't exist. (UNKNOWN_TABLE), Stack trace" -> "Table default.a doesn't exist." +-- "Code: 60. DB::Exception: Table default.a does not exist. (UNKNOWN_TABLE), Stack trace" -> "Table default.a does not exist." -- This table currently doesn't have enough information to do this reliably, so we just regex search for " (ERROR_NAME_IN_CAPS)" and hope that's good enough. -- For the "Code: 123. DB::Exception: " part, we just subtract 26 instead of searching for it. Because sometimes it's not at the start, e.g.: -- "Unexpected error, will try to restart main thread: Code: 341. DB::Exception: Unexpected error: Code: 57. DB::Exception:[...]" diff --git a/tests/queries/0_stateless/00061_merge_tree_alter.sql b/tests/queries/0_stateless/00061_merge_tree_alter.sql index 2e46b1e16d6..f2a36d6e5a3 100644 --- a/tests/queries/0_stateless/00061_merge_tree_alter.sql +++ b/tests/queries/0_stateless/00061_merge_tree_alter.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check DROP TABLE IF EXISTS alter_00061; set allow_deprecated_syntax_for_merge_tree=1; diff --git a/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference b/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference index f0b6f0e0c41..e4690f20d3e 100644 --- a/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference +++ b/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference @@ -2,8 +2,8 @@ A B numbers one -A 1 TinyLog CREATE TABLE test_show_tables.A (`A` UInt8) ENGINE = TinyLog -B 1 TinyLog CREATE TABLE test_show_tables.B (`A` UInt8) ENGINE = TinyLog +A 1 TinyLog CREATE TABLE default.A (`A` UInt8) ENGINE = TinyLog +B 1 TinyLog CREATE TABLE default.B (`A` UInt8) ENGINE = TinyLog test_temporary_table -['test_show_tables'] ['test_materialized'] +['default'] ['test_materialized'] 0 diff --git a/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql b/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql index 137dfb5b6f0..a58f9ddb0ac 100644 --- a/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql +++ b/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql @@ -1,32 +1,27 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS test_show_tables; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; -CREATE DATABASE test_show_tables; +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier}; -CREATE TABLE test_show_tables.A (A UInt8) ENGINE = TinyLog; -CREATE TABLE test_show_tables.B (A UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.A (A UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.B (A UInt8) ENGINE = TinyLog; -SHOW TABLES from test_show_tables; +SHOW TABLES from {CLICKHOUSE_DATABASE:Identifier}; SHOW TABLES in system where engine like '%System%' and name in ('numbers', 'one'); -SELECT name, toUInt32(metadata_modification_time) > 0, engine_full, create_table_query FROM system.tables WHERE database = 'test_show_tables' ORDER BY name FORMAT TSVRaw; +SELECT name, toUInt32(metadata_modification_time) > 0, engine_full, create_table_query FROM system.tables WHERE database = currentDatabase() ORDER BY name FORMAT TSVRaw; CREATE TEMPORARY TABLE test_temporary_table (id UInt64); SELECT name FROM system.tables WHERE is_temporary = 1 AND name = 'test_temporary_table'; -CREATE TABLE test_show_tables.test_log(id UInt64) ENGINE = Log; -CREATE MATERIALIZED VIEW test_show_tables.test_materialized ENGINE = Log AS SELECT * FROM test_show_tables.test_log; -SELECT dependencies_database, dependencies_table FROM system.tables WHERE name = 'test_log'; - -DROP DATABASE test_show_tables; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test_log(id UInt64) ENGINE = Log; +CREATE MATERIALIZED VIEW {CLICKHOUSE_DATABASE:Identifier}.test_materialized ENGINE = Log AS SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_log; +SELECT dependencies_database, dependencies_table FROM system.tables WHERE name = 'test_log' and database=currentDatabase(); +DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; -- Check that create_table_query works for system tables and unusual Databases -DROP DATABASE IF EXISTS test_DatabaseMemory; -CREATE DATABASE test_DatabaseMemory ENGINE = Memory; -CREATE TABLE test_DatabaseMemory.A (A UInt8) ENGINE = Null; +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier} ENGINE = Memory; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.A (A UInt8) ENGINE = Null; -SELECT sum(ignore(*, metadata_modification_time, engine_full, create_table_query)) FROM system.tables WHERE database = 'test_DatabaseMemory'; - -DROP DATABASE test_DatabaseMemory; +SELECT sum(ignore(*, metadata_modification_time, engine_full, create_table_query)) FROM system.tables WHERE database = '{CLICKHOUSE_DATABASE:String}'; diff --git a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql index 95d46032601..7d925bc4ff1 100644 --- a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql +++ b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql @@ -1,8 +1,3 @@ --- Tags: no-parallel - -CREATE DATABASE IF NOT EXISTS test_00101_0; - -USE test_00101_0; DROP TABLE IF EXISTS test_table; DROP TABLE IF EXISTS test_view; @@ -25,9 +20,9 @@ DROP TABLE test_view_filtered; -- Check only sophisticated constructors and desctructors: -CREATE DATABASE IF NOT EXISTS test_00101_1; +CREATE DATABASE IF NOT EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; -USE test_00101_1; +USE {CLICKHOUSE_DATABASE_1:Identifier}; DROP TABLE IF EXISTS tmp; DROP TABLE IF EXISTS tmp_mv; @@ -57,5 +52,5 @@ EXISTS TABLE `.inner.tmp_mv4`; DROP TABLE tmp; -DROP DATABASE test_00101_0; -DROP DATABASE test_00101_1; +DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; +DROP DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; diff --git a/tests/queries/0_stateless/00158_buffer_and_nonexistent_table.sql b/tests/queries/0_stateless/00158_buffer_and_nonexistent_table.sql index 23c157db875..1d988b38b05 100644 --- a/tests/queries/0_stateless/00158_buffer_and_nonexistent_table.sql +++ b/tests/queries/0_stateless/00158_buffer_and_nonexistent_table.sql @@ -1,11 +1,10 @@ --- Tags: no-parallel -CREATE DATABASE IF NOT EXISTS test2_00158; -DROP TABLE IF EXISTS test2_00158.mt_buffer_00158; -DROP TABLE IF EXISTS test2_00158.mt_00158; -CREATE TABLE test2_00158.mt_buffer_00158 (d Date DEFAULT today(), x UInt64) ENGINE = Buffer(test2_00158, mt_00158, 16, 100, 100, 1000000, 1000000, 1000000000, 1000000000); +CREATE DATABASE IF NOT EXISTS {CLICKHOUSE_DATABASE:Identifier}; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.mt_buffer_00158; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.mt_00158; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.mt_buffer_00158 (d Date DEFAULT today(), x UInt64) ENGINE = Buffer({CLICKHOUSE_DATABASE:Identifier}, mt_00158, 16, 100, 100, 1000000, 1000000, 1000000000, 1000000000); SET send_logs_level = 'fatal'; -- Supress "Destination table test2.mt doesn't exist. Block of data is discarded." -INSERT INTO test2_00158.mt_buffer_00158 (x) SELECT number AS x FROM system.numbers LIMIT 100000; -INSERT INTO test2_00158.mt_buffer_00158 (x) SELECT number AS x FROM system.numbers LIMIT 1000000; -DROP TABLE IF EXISTS test2_00158.mt_buffer_00158; -DROP DATABASE test2_00158; +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.mt_buffer_00158 (x) SELECT number AS x FROM system.numbers LIMIT 100000; +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.mt_buffer_00158 (x) SELECT number AS x FROM system.numbers LIMIT 1000000; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.mt_buffer_00158; +DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.reference b/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.reference index cd4823e219f..ece1f5aa525 100644 --- a/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.reference +++ b/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.reference @@ -1,2 +1,4 @@ [(1,4),(2,5),(3,6)] [(1,4),(2,5),(3,6)] +[(1,4),(2,5),(3,6)] +[(1,4),(2,5),(3,6)] diff --git a/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.sql b/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.sql index b28f4e65487..0189d3a63f5 100644 --- a/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.sql +++ b/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.sql @@ -1,3 +1,4 @@ -- Tags: shard SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) FROM remote('127.0.0.{2,3}', system.one) ORDER BY rand(); +SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) FROM remote('127.0.0.{2,3}') ORDER BY rand(); diff --git a/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.reference b/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.reference index a594e1495c1..d3cd76be236 100644 --- a/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.reference +++ b/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.reference @@ -1,3 +1,6 @@ 1 1 +1 + +1 diff --git a/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.sql b/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.sql index 80a35a4855a..a1e8d907a35 100644 --- a/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.sql +++ b/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.sql @@ -1,3 +1,4 @@ -- Tags: shard SELECT x FROM (SELECT count() AS x FROM remote('127.0.0.2', system.one) WITH TOTALS) LIMIT 1; +SELECT x FROM (SELECT count() AS x FROM remote('127.0.0.2') WITH TOTALS) LIMIT 1; diff --git a/tests/queries/0_stateless/00284_external_aggregation.sql b/tests/queries/0_stateless/00284_external_aggregation.sql index d19f9f5aee8..c1140faaa28 100644 --- a/tests/queries/0_stateless/00284_external_aggregation.sql +++ b/tests/queries/0_stateless/00284_external_aggregation.sql @@ -13,13 +13,13 @@ SET group_by_two_level_threshold = 100000; SET max_bytes_before_external_group_by = '1Mi'; -- method: key_string & key_string_two_level -CREATE TABLE t_00284_str(s String) ENGINE = MergeTree() ORDER BY tuple(); +CREATE TABLE t_00284_str(s String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_00284_str SELECT toString(number) FROM numbers_mt(1e6); INSERT INTO t_00284_str SELECT toString(number) FROM numbers_mt(1e6); SELECT s, count() FROM t_00284_str GROUP BY s ORDER BY s LIMIT 10 OFFSET 42; -- method: low_cardinality_key_string & low_cardinality_key_string_two_level -CREATE TABLE t_00284_lc_str(s LowCardinality(String)) ENGINE = MergeTree() ORDER BY tuple(); +CREATE TABLE t_00284_lc_str(s LowCardinality(String)) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_00284_lc_str SELECT toString(number) FROM numbers_mt(1e6); INSERT INTO t_00284_lc_str SELECT toString(number) FROM numbers_mt(1e6); SELECT s, count() FROM t_00284_lc_str GROUP BY s ORDER BY s LIMIT 10 OFFSET 42; diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index 80053c99a17..2e2e1384534 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -echo === Test input_format_csv_empty_as_default +echo '=== Test input_format_csv_empty_as_default' $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS csv"; $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 1, d Date DEFAULT '2019-06-19') ENGINE = Memory"; @@ -19,7 +19,7 @@ Hello "world", 789 ,2016-01-03 $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY d, s"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -echo === Test datetime +echo '=== Test datetime' $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t DateTime('Asia/Istanbul'), s String) ENGINE = Memory"; echo '"2016-01-01 01:02:03","1" @@ -30,7 +30,7 @@ echo '"2016-01-01 01:02:03","1" $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -echo === Test nullable datetime +echo '=== Test nullable datetime' $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t Nullable(DateTime('Asia/Istanbul')), s Nullable(String)) ENGINE = Memory"; echo 'NULL, NULL @@ -41,7 +41,7 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s NULLS LAST"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -echo === Test ignore extra columns +echo '=== Test ignore extra columns' $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 3, d String DEFAULT 'String4') ENGINE = Memory"; echo '"Hello", 1, "String1" @@ -55,7 +55,7 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -echo === Test missing as default +echo '=== Test missing as default' $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (f1 String, f2 UInt64, f3 UInt256, f4 UInt64 Default 33, f5 Nullable(UInt64), f6 Nullable(UInt64) Default 55, f7 String DEFAULT 'Default') ENGINE = Memory"; echo ' diff --git a/tests/queries/0_stateless/00304_http_external_data.sh b/tests/queries/0_stateless/00304_http_external_data.sh index def17bc5cd1..4a097249cca 100755 --- a/tests/queries/0_stateless/00304_http_external_data.sh +++ b/tests/queries/0_stateless/00304_http_external_data.sh @@ -6,10 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo -ne '1,Hello\n2,World\n' | ${CLICKHOUSE_CURL} -sSF 'file=@-' "${CLICKHOUSE_URL}&query=SELECT+*+FROM+file&file_format=CSV&file_types=UInt8,String"; echo -ne '1@Hello\n2@World\n' | ${CLICKHOUSE_CURL} -sSF 'file=@-' "${CLICKHOUSE_URL}&query=SELECT+*+FROM+file&file_format=CSV&file_types=UInt8,String&format_csv_delimiter=@"; - -# use big-endian version of binary data for s390x -if [[ $(uname -a | grep s390x) ]]; then - echo -ne '\x00\x00\x00\x01\x00\x00\x00\x02' | ${CLICKHOUSE_CURL} -sSF "tmp=@-" "${CLICKHOUSE_URL}&query=SELECT+*+FROM+tmp&tmp_structure=TaskID+UInt32&tmp_format=RowBinary"; -else - echo -ne '\x01\x00\x00\x00\x02\x00\x00\x00' | ${CLICKHOUSE_CURL} -sSF "tmp=@-" "${CLICKHOUSE_URL}&query=SELECT+*+FROM+tmp&tmp_structure=TaskID+UInt32&tmp_format=RowBinary"; -fi +echo -ne '\x01\x00\x00\x00\x02\x00\x00\x00' | ${CLICKHOUSE_CURL} -sSF "tmp=@-" "${CLICKHOUSE_URL}&query=SELECT+*+FROM+tmp&tmp_structure=TaskID+UInt32&tmp_format=RowBinary"; diff --git a/tests/queries/0_stateless/00387_use_client_time_zone.sh b/tests/queries/0_stateless/00387_use_client_time_zone.sh index 2a6d81eebfe..e54d5244eef 100755 --- a/tests/queries/0_stateless/00387_use_client_time_zone.sh +++ b/tests/queries/0_stateless/00387_use_client_time_zone.sh @@ -5,4 +5,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -env TZ=UTC ${CLICKHOUSE_CLIENT} --use_client_time_zone=1 --query="SELECT toDateTime(1000000000)" +# NOTE: session_timezone overrides use_client_time_zone, disable it randomization +env TZ=UTC ${CLICKHOUSE_CLIENT} --session_timezone '' --use_client_time_zone=1 --query="SELECT toDateTime(1000000000)" diff --git a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh index 2b0cae3c1d4..6ec88fae351 100755 --- a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh +++ b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh @@ -10,7 +10,7 @@ result="" lines_expected=4 counter=0 while [ $counter -lt $RETRIES ] && [ "$(echo "$result" | wc -l)" != "$lines_expected" ]; do - result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') + result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]' | sed 's/,\"elapsed_ns[^}]*//') let counter=counter+1 done echo "$result" @@ -19,7 +19,7 @@ result="" lines_expected=12 counter=0 while [ $counter -lt $RETRIES ] && [ "$(echo "$result" | wc -l)" != "$lines_expected" ]; do - result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') + result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]'| sed 's/,\"elapsed_ns[^}]*//') let counter=counter+1 done echo "$result" @@ -46,7 +46,7 @@ ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'CREAT result="" counter=0 while [ $counter -lt $RETRIES ] && [ -z "$result" ]; do - result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&http_headers_progress_interval_ms=0&send_progress_in_http_headers=1" -d 'INSERT INTO insert_number_query (record) SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Summary|^[0-9]') + result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&http_headers_progress_interval_ms=0&send_progress_in_http_headers=1" -d 'INSERT INTO insert_number_query (record) SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Summary|^[0-9]' | sed 's/,\"elapsed_ns[^}]*//') let counter=counter+1 done echo "$result" diff --git a/tests/queries/0_stateless/00417_kill_query.reference b/tests/queries/0_stateless/00417_kill_query.reference index 1a3b47964c0..0dd1d78ac82 100644 --- a/tests/queries/0_stateless/00417_kill_query.reference +++ b/tests/queries/0_stateless/00417_kill_query.reference @@ -1,2 +1,2 @@ -SELECT sleep(1) FROM system.numbers LIMIT 30 -SELECT sleep(1) FROM system.numbers LIMIT 31 +SELECT sleep(1) FROM system.numbers LIMIT 300 +SELECT sleep(1) FROM system.numbers LIMIT 301 diff --git a/tests/queries/0_stateless/00417_kill_query.sh b/tests/queries/0_stateless/00417_kill_query.sh index cd5b788a147..e64e93fd4a0 100755 --- a/tests/queries/0_stateless/00417_kill_query.sh +++ b/tests/queries/0_stateless/00417_kill_query.sh @@ -7,19 +7,26 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -QUERY_FIELND_NUM=4 +QUERY_FIELD_NUM=4 -$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 30" &>/dev/null & -sleep 1 -$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query LIKE 'SELECT sleep(%' AND (elapsed >= 0.) SYNC" | cut -f $QUERY_FIELND_NUM +$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 300" &>/dev/null & -# 31 is for the query to be different from the previous one -$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 31" &>/dev/null & -sleep 1 -$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query = 'SELECT sleep(1) FROM system.numbers LIMIT 31' ASYNC" | cut -f $QUERY_FIELND_NUM +while true +do + $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' AND query LIKE 'SELECT sleep(%' AND (elapsed >= 0.) SYNC" | cut -f $QUERY_FIELD_NUM | grep '.' && break + sleep 0.1 +done + +# 31 is for the query to be different from the previous one +$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 301" &>/dev/null & + +while true +do + $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' AND query = 'SELECT sleep(1) FROM system.numbers LIMIT 301' ASYNC" | cut -f $QUERY_FIELD_NUM | grep '.' && break + sleep 0.1 +done $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 ASYNC" $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 FORMAT TabSeparated" $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 SYNC FORMAT TabSeparated" $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 1 TEST" &>/dev/null - diff --git a/tests/queries/0_stateless/00419_show_sql_queries.sh b/tests/queries/0_stateless/00419_show_sql_queries.sh index 607703b385a..99252eeb1ba 100755 --- a/tests/queries/0_stateless/00419_show_sql_queries.sh +++ b/tests/queries/0_stateless/00419_show_sql_queries.sh @@ -8,3 +8,4 @@ $CLICKHOUSE_CLIENT -q "SHOW PROCESSLIST" &>/dev/null $CLICKHOUSE_CLIENT -q "SHOW DATABASES" &>/dev/null $CLICKHOUSE_CLIENT -q "SHOW TABLES" &>/dev/null $CLICKHOUSE_CLIENT -q "SHOW ENGINES" &>/dev/null +$CLICKHOUSE_CLIENT -q "SHOW FUNCTIONS" &>/dev/null diff --git a/tests/queries/0_stateless/00427_alter_primary_key.sh b/tests/queries/0_stateless/00427_alter_primary_key.sh index 1269e2ad6e3..f9984384d79 100755 --- a/tests/queries/0_stateless/00427_alter_primary_key.sh +++ b/tests/queries/0_stateless/00427_alter_primary_key.sh @@ -7,11 +7,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function perform() { local query=$1 - TZ=UTC $CLICKHOUSE_CLIENT \ - --allow_deprecated_syntax_for_merge_tree=1 \ - --use_client_time_zone=1 \ - --input_format_values_interpret_expressions=0 \ - --query "$query" 2>/dev/null + local settings=( + --allow_deprecated_syntax_for_merge_tree 1 + --session_timezone UTC + --input_format_values_interpret_expressions 0 + ) + TZ=UTC $CLICKHOUSE_CLIENT "${settings[@]}" --query "$query" 2>/dev/null if [ "$?" -ne 0 ]; then echo "query failed" fi diff --git a/tests/queries/0_stateless/00502_custom_partitioning_local.sql b/tests/queries/0_stateless/00502_custom_partitioning_local.sql index 3d5f71429fe..a116f8bca24 100644 --- a/tests/queries/0_stateless/00502_custom_partitioning_local.sql +++ b/tests/queries/0_stateless/00502_custom_partitioning_local.sql @@ -18,7 +18,7 @@ ALTER TABLE not_partitioned DETACH PARTITION ID 'all'; SELECT 'Sum after DETACH PARTITION:'; SELECT sum(x) FROM not_partitioned; SELECT 'system.detached_parts after DETACH PARTITION:'; -SELECT system.detached_parts.* EXCEPT (bytes_on_disk, `path`, disk) FROM system.detached_parts WHERE database = currentDatabase() AND table = 'not_partitioned'; +SELECT system.detached_parts.* EXCEPT (bytes_on_disk, `path`, disk, modification_time) FROM system.detached_parts WHERE database = currentDatabase() AND table = 'not_partitioned'; DROP TABLE not_partitioned; diff --git a/tests/queries/0_stateless/00508_materialized_view_to.sql b/tests/queries/0_stateless/00508_materialized_view_to.sql index 522ceb40404..0d8fb85eeeb 100644 --- a/tests/queries/0_stateless/00508_materialized_view_to.sql +++ b/tests/queries/0_stateless/00508_materialized_view_to.sql @@ -1,9 +1,3 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS test_00508; -CREATE DATABASE test_00508; - -USE test_00508; CREATE TABLE src (x UInt8) ENGINE = Null; CREATE TABLE dst (x UInt8) ENGINE = Memory; @@ -20,17 +14,17 @@ SELECT * FROM dst ORDER BY x; USE default; -- Reattach MV (shortcut) -ATTACH TABLE test_00508.mv_00508; +ATTACH TABLE {CLICKHOUSE_DATABASE:Identifier}.mv_00508; -INSERT INTO test_00508.src VALUES (3); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.src VALUES (3); -SELECT * FROM test_00508.mv_00508 ORDER BY x; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.mv_00508 ORDER BY x; -- Drop the MV and see if the data is still readable -DROP TABLE test_00508.mv_00508; -SELECT * FROM test_00508.dst ORDER BY x; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.mv_00508; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.dst ORDER BY x; -DROP TABLE test_00508.src; -DROP TABLE test_00508.dst; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.src; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.dst; -DROP DATABASE test_00508; +DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/00514_interval_operators.reference b/tests/queries/0_stateless/00514_interval_operators.reference index 43238eecb3d..b420e1679e0 100644 --- a/tests/queries/0_stateless/00514_interval_operators.reference +++ b/tests/queries/0_stateless/00514_interval_operators.reference @@ -37,3 +37,15 @@ 2030-02-28 01:02:03 2017-04-29 01:02:03 2031-02-28 01:02:03 2017-05-29 01:02:03 2015-11-29 01:02:03 +2000-01-15 18:56:07 DateTime +2000-01-01 12:20:34.567 DateTime64(3) +2000-01-01 12:00:01.234567 DateTime64(6) +2000-01-01 12:00:00.001234567 DateTime64(9) +1999-12-18 05:03:53 DateTime +2000-01-01 11:39:25.433 DateTime64(3) +2000-01-01 11:59:58.765433 DateTime64(6) +2000-01-01 11:59:59.998765433 DateTime64(9) +2000-01-01 11:59:48.333 DateTime64(3) +2000-01-01 11:59:48.33398 DateTime64(5) +2000-01-01 11:59:48.325 DateTime64(3) +2299-12-31 12:00:00.000000 diff --git a/tests/queries/0_stateless/00514_interval_operators.sql b/tests/queries/0_stateless/00514_interval_operators.sql index a4b6c983abf..f9f3abbdb54 100644 --- a/tests/queries/0_stateless/00514_interval_operators.sql +++ b/tests/queries/0_stateless/00514_interval_operators.sql @@ -1,5 +1,27 @@ +SET session_timezone = 'Etc/UTC'; + SELECT toDateTime('2017-10-30 08:18:19') + INTERVAL 1 DAY + INTERVAL 1 MONTH - INTERVAL 1 YEAR; SELECT toDateTime('2017-10-30 08:18:19') + INTERVAL 1 HOUR + INTERVAL 1000 MINUTE + INTERVAL 10 SECOND; SELECT toDateTime('2017-10-30 08:18:19') + INTERVAL 1 DAY + INTERVAL number MONTH FROM system.numbers LIMIT 20; SELECT toDateTime('2016-02-29 01:02:03') + INTERVAL number YEAR, toDateTime('2016-02-29 01:02:03') + INTERVAL number MONTH FROM system.numbers LIMIT 16; SELECT toDateTime('2016-02-29 01:02:03') - INTERVAL 1 QUARTER; + +SELECT (toDateTime('2000-01-01 12:00:00') + INTERVAL 1234567 SECOND) x, toTypeName(x); +SELECT (toDateTime('2000-01-01 12:00:00') + INTERVAL 1234567 MILLISECOND) x, toTypeName(x); +SELECT (toDateTime('2000-01-01 12:00:00') + INTERVAL 1234567 MICROSECOND) x, toTypeName(x); +SELECT (toDateTime('2000-01-01 12:00:00') + INTERVAL 1234567 NANOSECOND) x, toTypeName(x); + +SELECT (toDateTime('2000-01-01 12:00:00') - INTERVAL 1234567 SECOND) x, toTypeName(x); +SELECT (toDateTime('2000-01-01 12:00:00') - INTERVAL 1234567 MILLISECOND) x, toTypeName(x); +SELECT (toDateTime('2000-01-01 12:00:00') - INTERVAL 1234567 MICROSECOND) x, toTypeName(x); +SELECT (toDateTime('2000-01-01 12:00:00') - INTERVAL 1234567 NANOSECOND) x, toTypeName(x); + +SELECT (toDateTime64('2000-01-01 12:00:00.678', 3) - INTERVAL 12345 MILLISECOND) x, toTypeName(x); +SELECT (toDateTime64('2000-01-01 12:00:00.67898', 5) - INTERVAL 12345 MILLISECOND) x, toTypeName(x); +SELECT (toDateTime64('2000-01-01 12:00:00.67', 2) - INTERVAL 12345 MILLISECOND) x, toTypeName(x); + +select toDateTime64('3000-01-01 12:00:00.12345', 0) + interval 0 nanosecond; -- { serverError 407 } +select toDateTime64('3000-01-01 12:00:00.12345', 0) + interval 0 microsecond; + +-- Check that the error is thrown during typechecking, not execution. +select materialize(toDate('2000-01-01')) + interval 1 nanosecond from numbers(0); -- { serverError 43 } diff --git a/tests/queries/0_stateless/00522_multidimensional.sql b/tests/queries/0_stateless/00522_multidimensional.sql index c3c41257ab9..ea9881c612a 100644 --- a/tests/queries/0_stateless/00522_multidimensional.sql +++ b/tests/queries/0_stateless/00522_multidimensional.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS multidimensional; -CREATE TABLE multidimensional ENGINE = MergeTree ORDER BY number AS SELECT number, arrayMap(x -> (x, [x], [[x]], (x, toString(x))), arrayMap(x -> range(x), range(number % 10))) AS value FROM system.numbers LIMIT 100000; +CREATE TABLE multidimensional ENGINE = MergeTree ORDER BY number SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi' AS SELECT number, arrayMap(x -> (x, [x], [[x]], (x, toString(x))), arrayMap(x -> range(x), range(number % 10))) AS value FROM system.numbers LIMIT 100000; SELECT sum(cityHash64(toString(value))) FROM multidimensional; diff --git a/tests/queries/0_stateless/00534_functions_bad_arguments.lib b/tests/queries/0_stateless/00534_functions_bad_arguments.lib index 31df60cb362..ae4fc482bd6 100644 --- a/tests/queries/0_stateless/00534_functions_bad_arguments.lib +++ b/tests/queries/0_stateless/00534_functions_bad_arguments.lib @@ -4,3 +4,5 @@ function test_variant { perl -E "say \$_ for map {chomp; (qq{$1})} qx{$CLICKHOUSE_CLIENT -q 'SELECT name FROM system.functions ORDER BY name;'}" | $CLICKHOUSE_CLIENT --calculate_text_stack_trace=0 -n --ignore-error >/dev/null 2>&1 $CLICKHOUSE_CLIENT -q "SELECT 'Still alive'" } + +# vi: ft=bash diff --git a/tests/queries/0_stateless/00538_datediff_plural_units.reference b/tests/queries/0_stateless/00538_datediff_plural_units.reference new file mode 100644 index 00000000000..ebe63974df8 --- /dev/null +++ b/tests/queries/0_stateless/00538_datediff_plural_units.reference @@ -0,0 +1,10 @@ +-1 +-7 +-23 +-104 +-730 +-17520 +-1051200 +-63072000 +-63072000000 +-63072000000000 diff --git a/tests/queries/0_stateless/00538_datediff_plural_units.sql b/tests/queries/0_stateless/00538_datediff_plural_units.sql new file mode 100644 index 00000000000..d1234155a56 --- /dev/null +++ b/tests/queries/0_stateless/00538_datediff_plural_units.sql @@ -0,0 +1,10 @@ +SELECT dateDiff('years', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT dateDiff('quarters', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT dateDiff('months', toDateTime('2017-12-31'), toDateTime('2016-01-01')); +SELECT dateDiff('weeks', toDateTime('2017-12-31'), toDateTime('2016-01-01')); +SELECT dateDiff('days', toDateTime('2017-12-31'), toDateTime('2016-01-01')); +SELECT dateDiff('hours', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); +SELECT dateDiff('minutes', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); +SELECT dateDiff('seconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); +SELECT dateDiff('milliseconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); +SELECT dateDiff('microseconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); diff --git a/tests/queries/0_stateless/00556_array_intersect.reference b/tests/queries/0_stateless/00556_array_intersect.reference index fad9b2f507d..b9841ef42c4 100644 --- a/tests/queries/0_stateless/00556_array_intersect.reference +++ b/tests/queries/0_stateless/00556_array_intersect.reference @@ -5,7 +5,7 @@ [1] [1] [1] -[NULL,1] +[1,NULL] [1] [1] [[1,1]] diff --git a/tests/queries/0_stateless/00571_non_exist_database_when_create_materializ_view.sql b/tests/queries/0_stateless/00571_non_exist_database_when_create_materializ_view.sql index 46fc0dd586d..d24a57187b4 100644 --- a/tests/queries/0_stateless/00571_non_exist_database_when_create_materializ_view.sql +++ b/tests/queries/0_stateless/00571_non_exist_database_when_create_materializ_view.sql @@ -1,32 +1,25 @@ --- Tags: no-parallel -CREATE DATABASE test_00571; - -USE test_00571; - -DROP DATABASE IF EXISTS none; -DROP TABLE IF EXISTS test_00571; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; DROP TABLE IF EXISTS test_materialized_00571; set allow_deprecated_syntax_for_merge_tree=1; -CREATE DATABASE none; +CREATE DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; CREATE TABLE test_00571 ( date Date, platform Enum8('a' = 0, 'b' = 1, 'c' = 2), app Enum8('a' = 0, 'b' = 1) ) ENGINE = MergeTree(date, (platform, app), 8192); CREATE MATERIALIZED VIEW test_materialized_00571 ENGINE = MergeTree(date, (platform, app), 8192) POPULATE AS SELECT date, platform, app FROM (SELECT * FROM test_00571); -USE none; +USE {CLICKHOUSE_DATABASE_1:Identifier}; -INSERT INTO test_00571.test_00571 VALUES('2018-02-16', 'a', 'a'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.test_00571 VALUES('2018-02-16', 'a', 'a'); -SELECT * FROM test_00571.test_00571; -SELECT * FROM test_00571.test_materialized_00571; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_00571; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_materialized_00571; -DETACH TABLE test_00571.test_materialized_00571; -ATTACH TABLE test_00571.test_materialized_00571; +DETACH TABLE {CLICKHOUSE_DATABASE:Identifier}.test_materialized_00571; +ATTACH TABLE {CLICKHOUSE_DATABASE:Identifier}.test_materialized_00571; -SELECT * FROM test_00571.test_materialized_00571; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_materialized_00571; -DROP DATABASE IF EXISTS none; -DROP TABLE IF EXISTS test_00571.test_00571; -DROP TABLE IF EXISTS test_00571.test_materialized_00571; - -DROP DATABASE test_00571; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.test_00571; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.test_materialized_00571; diff --git a/tests/queries/0_stateless/00576_nested_and_prewhere.sql b/tests/queries/0_stateless/00576_nested_and_prewhere.sql index b15af582a19..5916e679f1e 100644 --- a/tests/queries/0_stateless/00576_nested_and_prewhere.sql +++ b/tests/queries/0_stateless/00576_nested_and_prewhere.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS nested; -CREATE TABLE nested (x UInt64, filter UInt8, n Nested(a UInt64)) ENGINE = MergeTree ORDER BY x; +CREATE TABLE nested (x UInt64, filter UInt8, n Nested(a UInt64)) ENGINE = MergeTree ORDER BY x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO nested SELECT number, number % 2, range(number % 10) FROM system.numbers LIMIT 100000; ALTER TABLE nested ADD COLUMN n.b Array(UInt64); diff --git a/tests/queries/0_stateless/00604_show_create_database.reference b/tests/queries/0_stateless/00604_show_create_database.reference index c05b088280e..52fd2c48df1 100644 --- a/tests/queries/0_stateless/00604_show_create_database.reference +++ b/tests/queries/0_stateless/00604_show_create_database.reference @@ -1 +1 @@ -CREATE DATABASE test_00604\nENGINE = Atomic +CREATE DATABASE default\nENGINE = Atomic diff --git a/tests/queries/0_stateless/00604_show_create_database.sql b/tests/queries/0_stateless/00604_show_create_database.sql index c990e7abed3..23ebd23c9f1 100644 --- a/tests/queries/0_stateless/00604_show_create_database.sql +++ b/tests/queries/0_stateless/00604_show_create_database.sql @@ -1,5 +1,3 @@ --- Tags: no-ordinary-database, no-parallel +-- Tags: no-ordinary-database, no-replicated-database -create database if not exists test_00604; -show create database test_00604; -drop database test_00604; +show create database {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/00612_count.sql b/tests/queries/0_stateless/00612_count.sql index 5dd9c770700..9c435bd97fe 100644 --- a/tests/queries/0_stateless/00612_count.sql +++ b/tests/queries/0_stateless/00612_count.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS count; -CREATE TABLE count (x UInt64) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE count (x UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO count SELECT * FROM numbers(1234567); SELECT count() FROM count; diff --git a/tests/queries/0_stateless/00612_http_max_query_size.sh b/tests/queries/0_stateless/00612_http_max_query_size.sh index cfcae330b85..6289470c21e 100755 --- a/tests/queries/0_stateless/00612_http_max_query_size.sh +++ b/tests/queries/0_stateless/00612_http_max_query_size.sh @@ -36,7 +36,7 @@ def gen_data(q): pattern = ''' or toString(number) = '{}'\n''' - for i in range(1, 4 * 1024): + for i in range(0, 1024 * 2): yield pattern.format(str(i).zfill(1024 - len(pattern) + 2)).encode() s = requests.Session() diff --git a/tests/queries/0_stateless/00612_http_max_query_size_for_distributed.sql b/tests/queries/0_stateless/00612_http_max_query_size_for_distributed.sql index 1802fadc57b..462fd0fef61 100644 --- a/tests/queries/0_stateless/00612_http_max_query_size_for_distributed.sql +++ b/tests/queries/0_stateless/00612_http_max_query_size_for_distributed.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS data_00612; DROP TABLE IF EXISTS dist_00612; diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index 399511db701..334025cba28 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-s3-storage, no-upgrade-check +# Tags: zookeeper, no-s3-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) @@ -11,26 +11,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function query_with_retry -{ - local query="$1" && shift - - local retry=0 - until [ $retry -ge 5 ] - do - local result - result="$($CLICKHOUSE_CLIENT "$@" --query="$query" 2>&1)" - if [ "$?" == 0 ]; then - echo -n "$result" - return - else - retry=$((retry + 1)) - sleep 3 - fi - done - echo "Query '$query' failed with '$result'" -} - $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r1;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r2;" diff --git a/tests/queries/0_stateless/00678_murmurhash.reference b/tests/queries/0_stateless/00678_murmurhash.reference index fb4a00ba046..988c022f1bf 100644 --- a/tests/queries/0_stateless/00678_murmurhash.reference +++ b/tests/queries/0_stateless/00678_murmurhash.reference @@ -25,5 +25,5 @@ 9631199822919835226 4334672815104069193 4334672815104069193 -1 -1 +6145F501578671E2877DBA2BE487AF7E +16FE7483905CCE7A85670E43E4678877 diff --git a/tests/queries/0_stateless/00678_murmurhash.sql b/tests/queries/0_stateless/00678_murmurhash.sql index eda29fd17cd..705c62480a0 100644 --- a/tests/queries/0_stateless/00678_murmurhash.sql +++ b/tests/queries/0_stateless/00678_murmurhash.sql @@ -32,7 +32,5 @@ SELECT gccMurmurHash('foo'); SELECT gccMurmurHash('\x01'); SELECT gccMurmurHash(1); --- Comparison with reverse for big endian -SELECT hex(murmurHash3_128('foo')) = hex(reverse(unhex('6145F501578671E2877DBA2BE487AF7E'))) or hex(murmurHash3_128('foo')) = '6145F501578671E2877DBA2BE487AF7E'; --- Comparison with reverse for big endian -SELECT hex(murmurHash3_128('\x01')) = hex(reverse(unhex('16FE7483905CCE7A85670E43E4678877'))) or hex(murmurHash3_128('\x01')) = '16FE7483905CCE7A85670E43E4678877'; +SELECT hex(murmurHash3_128('foo')); +SELECT hex(murmurHash3_128('\x01')); diff --git a/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql b/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql index 5a169403872..c4613acf5f3 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql +++ b/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql @@ -1,6 +1,5 @@ drop table if exists lc_dict_reading; -create table lc_dict_reading (val UInt64, str StringWithDictionary, pat String) engine = MergeTree order by val; +create table lc_dict_reading (val UInt64, str StringWithDictionary, pat String) engine = MergeTree order by val SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into lc_dict_reading select number, if(number < 8192 * 4, number % 100, number) as s, s from system.numbers limit 1000000; select sum(toUInt64(str)), sum(toUInt64(pat)) from lc_dict_reading where val < 8129 or val > 8192 * 4; drop table if exists lc_dict_reading; - diff --git a/tests/queries/0_stateless/00688_low_cardinality_serialization.sql b/tests/queries/0_stateless/00688_low_cardinality_serialization.sql index 3c0e64a9637..b4fe4b29200 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_serialization.sql +++ b/tests/queries/0_stateless/00688_low_cardinality_serialization.sql @@ -8,8 +8,8 @@ select 'MergeTree'; drop table if exists lc_small_dict; drop table if exists lc_big_dict; -create table lc_small_dict (str StringWithDictionary) engine = MergeTree order by str; -create table lc_big_dict (str StringWithDictionary) engine = MergeTree order by str; +create table lc_small_dict (str StringWithDictionary) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +create table lc_big_dict (str StringWithDictionary) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into lc_small_dict select toString(number % 1000) from system.numbers limit 1000000; insert into lc_big_dict select toString(number) from system.numbers limit 1000000; @@ -25,4 +25,3 @@ select sum(toUInt64OrZero(str)) from lc_big_dict; drop table if exists lc_small_dict; drop table if exists lc_big_dict; - diff --git a/tests/queries/0_stateless/00700_decimal_compare.sql b/tests/queries/0_stateless/00700_decimal_compare.sql index 41ff8b38102..7740c75f859 100644 --- a/tests/queries/0_stateless/00700_decimal_compare.sql +++ b/tests/queries/0_stateless/00700_decimal_compare.sql @@ -27,6 +27,7 @@ SELECT a > 0, b > 0, g > 0 FROM decimal ORDER BY a DESC; SELECT a, g > toInt8(0), g > toInt16(0), g > toInt32(0), g > toInt64(0) FROM decimal ORDER BY a; SELECT a, g > toUInt8(0), g > toUInt16(0), g > toUInt32(0), g > toUInt64(0) FROM decimal ORDER BY a; SELECT a, b, g FROM decimal WHERE a IN(42) AND b IN(42) AND g IN(42); +SELECT a, b, g FROM decimal WHERE a IN(42) AND b IN(42) AND g IN(42) SETTINGS allow_experimental_analyzer = 1; SELECT a, b, g FROM decimal WHERE a > 0 AND a <= 42 AND b <= 42 AND g <= 42; SELECT d, e, f from decimal WHERE d > 0 AND d < 1 AND e > 0 AND e < 1 AND f > 0 AND f < 1; diff --git a/tests/queries/0_stateless/00700_decimal_with_default_precision_and_scale.reference b/tests/queries/0_stateless/00700_decimal_with_default_precision_and_scale.reference new file mode 100644 index 00000000000..e4db0ebf5ef --- /dev/null +++ b/tests/queries/0_stateless/00700_decimal_with_default_precision_and_scale.reference @@ -0,0 +1,4 @@ +Decimal(10, 0) +Decimal(18, 0) +Decimal(9, 8) +Decimal(18, 0) Decimal(10, 0) diff --git a/tests/queries/0_stateless/00700_decimal_with_default_precision_and_scale.sql b/tests/queries/0_stateless/00700_decimal_with_default_precision_and_scale.sql new file mode 100644 index 00000000000..5132b593bcc --- /dev/null +++ b/tests/queries/0_stateless/00700_decimal_with_default_precision_and_scale.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS decimal; + +CREATE TABLE IF NOT EXISTS decimal +( + d1 DECIMAL(9, 8), + d2 DECIMAL(18), + d3 DECIMAL +) +ENGINE = MergeTree +PARTITION BY toInt32(d1) +ORDER BY (d2, d3); + +INSERT INTO decimal (d1, d2, d3) VALUES (4.2, 4.2, 4.2); + +SELECT type FROM system.columns WHERE table = 'decimal' AND database = currentDatabase() ORDER BY type; + +SELECT toTypeName(d2), toTypeName(d3) FROM decimal LIMIT 1; + +DROP TABLE decimal; diff --git a/tests/queries/0_stateless/00719_parallel_ddl_db.sh b/tests/queries/0_stateless/00719_parallel_ddl_db.sh index 31ea1dbbe58..004590c21df 100755 --- a/tests/queries/0_stateless/00719_parallel_ddl_db.sh +++ b/tests/queries/0_stateless/00719_parallel_ddl_db.sh @@ -11,7 +11,7 @@ ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" function query() { - for _ in {1..100}; do + for _ in {1..50}; do ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE IF NOT EXISTS parallel_ddl" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" done diff --git a/tests/queries/0_stateless/00719_parallel_ddl_table.sh b/tests/queries/0_stateless/00719_parallel_ddl_table.sh index fdc994aec33..57a7e228341 100755 --- a/tests/queries/0_stateless/00719_parallel_ddl_table.sh +++ b/tests/queries/0_stateless/00719_parallel_ddl_table.sh @@ -10,7 +10,7 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS parallel_ddl" function query() { - for _ in {1..100}; do + for _ in {1..50}; do ${CLICKHOUSE_CLIENT} --query "CREATE TABLE IF NOT EXISTS parallel_ddl(a Int) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS parallel_ddl" done diff --git a/tests/queries/0_stateless/00725_comment_columns_long.sql b/tests/queries/0_stateless/00725_comment_columns_long.sql index fe4ee48909f..139f8ba006f 100644 --- a/tests/queries/0_stateless/00725_comment_columns_long.sql +++ b/tests/queries/0_stateless/00725_comment_columns_long.sql @@ -1,4 +1,5 @@ --- Tags: long +-- Tags: long, no-replicated-database +-- Tag no-replicated-database: Unsupported type of ALTER query DROP TABLE IF EXISTS check_query_comment_column; diff --git a/tests/queries/0_stateless/00732_base64_functions.reference b/tests/queries/0_stateless/00732_base64_functions.reference index 5dc1ba03b89..f97c19427e7 100644 --- a/tests/queries/0_stateless/00732_base64_functions.reference +++ b/tests/queries/0_stateless/00732_base64_functions.reference @@ -12,7 +12,18 @@ foo foob fooba foobar -1 1 +f +fo +foo +foob +fooba +foobar +1 1 +1 1 +fooba +~ Zm9v foo +foo +TEcgT3B0aW11cw== diff --git a/tests/queries/0_stateless/00732_base64_functions.sql b/tests/queries/0_stateless/00732_base64_functions.sql index adba0cdebbd..99268004003 100644 --- a/tests/queries/0_stateless/00732_base64_functions.sql +++ b/tests/queries/0_stateless/00732_base64_functions.sql @@ -5,15 +5,24 @@ SET send_logs_level = 'fatal'; SELECT base64Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val); SELECT base64Decode(val) FROM (select arrayJoin(['', 'Zg==', 'Zm8=', 'Zm9v', 'Zm9vYg==', 'Zm9vYmE=', 'Zm9vYmFy']) val); -SELECT base64Decode(base64Encode('foo')) = 'foo', base64Encode(base64Decode('Zm9v')) == 'Zm9v'; +SELECT tryBase64Decode(val) FROM (select arrayJoin(['', 'Zg==', 'Zm8=', 'Zm9v', 'Zm9vYg==', 'Zm9vYmE=', 'Zm9vYmFy']) val); +SELECT base64Decode(base64Encode('foo')) = 'foo', base64Encode(base64Decode('Zm9v')) == 'Zm9v'; +SELECT tryBase64Decode(base64Encode('foo')) = 'foo', base64Encode(tryBase64Decode('Zm9v')) == 'Zm9v'; + +SELECT base64Encode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64Decode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryBase64Decode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT base64Decode('Zm9vYmF=Zm9v'); -- { serverError INCORRECT_DATA } SELECT tryBase64Decode('Zm9vYmF=Zm9v'); -SELECT base64Encode(val, 'excess argument') FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val); -- { serverError 42 } -SELECT base64Decode(val, 'excess argument') FROM (select arrayJoin(['', 'Zg==', 'Zm8=', 'Zm9v', 'Zm9vYg==', 'Zm9vYmE=', 'Zm9vYmFy']) val); -- { serverError 42 } -SELECT tryBase64Decode('Zm9vYmF=Zm9v', 'excess argument'); -- { serverError 42 } - -SELECT base64Decode('Zm9vYmF=Zm9v'); -- { serverError 117 } +SELECT base64Decode('foo'); -- { serverError INCORRECT_DATA } +SELECT tryBase64Decode('foo'); select base64Encode(toFixedString('foo', 3)); select base64Decode(toFixedString('Zm9v', 4)); +select tryBase64Decode(toFixedString('Zm9v', 4)); + +-- This query reproduces a bug in TurboBase64 library (which we no longer use) +select distinct base64Encode(materialize('LG Optimus')) from numbers(100); diff --git a/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql b/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql index 9c02ac795ed..a1859220c6c 100644 --- a/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql +++ b/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql @@ -1,4 +1,4 @@ --- Tags: long, zookeeper, no-replicated-database, no-upgrade-check +-- Tags: long, zookeeper, no-replicated-database -- Tag no-replicated-database: Fails due to additional replicas or shards SET send_logs_level = 'fatal'; diff --git a/tests/queries/0_stateless/00738_lock_for_inner_table.sh b/tests/queries/0_stateless/00738_lock_for_inner_table.sh index 9a7ae92439d..b62a639d8f4 100755 --- a/tests/queries/0_stateless/00738_lock_for_inner_table.sh +++ b/tests/queries/0_stateless/00738_lock_for_inner_table.sh @@ -13,7 +13,7 @@ uuid=$(${CLICKHOUSE_CLIENT} --query "SELECT reinterpretAsUUID(currentDatabase()) echo "DROP TABLE IF EXISTS tab_00738 SYNC; DROP TABLE IF EXISTS mv SYNC; -CREATE TABLE tab_00738(a Int) ENGINE = MergeTree() ORDER BY a; +CREATE TABLE tab_00738(a Int) ENGINE = MergeTree() ORDER BY a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -- The matview will take at least 2 seconds to be finished (10000000 * 0.0000002) CREATE MATERIALIZED VIEW mv UUID '$uuid' ENGINE = Log AS SELECT sleepEachRow(0.0000002) FROM tab_00738;" | ${CLICKHOUSE_CLIENT} -n @@ -63,4 +63,3 @@ drop_inner_id wait drop_at_exit - diff --git a/tests/queries/0_stateless/00740_database_in_nested_view.sql b/tests/queries/0_stateless/00740_database_in_nested_view.sql index e2debe2859b..42c26a709b1 100644 --- a/tests/queries/0_stateless/00740_database_in_nested_view.sql +++ b/tests/queries/0_stateless/00740_database_in_nested_view.sql @@ -1,10 +1,5 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS test_00740; -CREATE DATABASE test_00740; -USE test_00740; - -DROP TABLE IF EXISTS test_00740; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; DROP TABLE IF EXISTS test_view_00740; DROP TABLE IF EXISTS test_nested_view_00740; DROP TABLE IF EXISTS test_joined_view_00740; @@ -19,12 +14,10 @@ SELECT * FROM test_nested_view_00740; SELECT * FROM test_joined_view_00740; USE default; -SELECT * FROM test_00740.test_view_00740; -SELECT * FROM test_00740.test_nested_view_00740; -SELECT * FROM test_00740.test_joined_view_00740; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_view_00740; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_nested_view_00740; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_joined_view_00740; -DROP TABLE IF EXISTS test_00740.test_00740; -DROP TABLE IF EXISTS test_00740.test_view_00740; -DROP TABLE IF EXISTS test_00740.test_nested_view_00740; - -DROP DATABASE test_00740; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.test_00740; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.test_view_00740; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.test_nested_view_00740; diff --git a/tests/queries/0_stateless/00746_hashing_tuples.reference b/tests/queries/0_stateless/00746_hashing_tuples.reference index e3b896f6077..71d45be5a54 100644 --- a/tests/queries/0_stateless/00746_hashing_tuples.reference +++ b/tests/queries/0_stateless/00746_hashing_tuples.reference @@ -1,11 +1,11 @@ 12940785793559895259 17926972817233444501 7456555839952096623 -1 -1 -1 -1 -1 +CC45107CC4B79F62D831BEF2103C7CBF +DF2EC2F0669B000EDFF6ADEE264E7D68 +4CD1C30C38AB935D418B5269EF197B9E +9D78134EE48654D753CCA1B76185CF8E +389D16428D2AADEC9713905572F42864 955237314186186656 8175794665478042155 9325786087413524176 @@ -18,8 +18,8 @@ 8163029322371165472 8788309436660676487 236561483980029756 -1 -1 +8DD5527CC43D76F4760D26BE0F641F7E +F8F7AD9B6CD4CF117A71E277E2EC2931 12384823029245979431 4507350192761038840 1188926775431157506 diff --git a/tests/queries/0_stateless/00746_hashing_tuples.sql b/tests/queries/0_stateless/00746_hashing_tuples.sql index f17ad6fa77f..466a2184c65 100644 --- a/tests/queries/0_stateless/00746_hashing_tuples.sql +++ b/tests/queries/0_stateless/00746_hashing_tuples.sql @@ -4,11 +4,11 @@ SELECT sipHash64(1, 2, 3); SELECT sipHash64(1, 3, 2); SELECT sipHash64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))); -SELECT hex(sipHash128('foo')) = hex(reverse(unhex('CC45107CC4B79F62D831BEF2103C7CBF'))) or hex(sipHash128('foo')) = 'CC45107CC4B79F62D831BEF2103C7CBF'; -SELECT hex(sipHash128('\x01')) = hex(reverse(unhex('DF2EC2F0669B000EDFF6ADEE264E7D68'))) or hex(sipHash128('\x01')) = 'DF2EC2F0669B000EDFF6ADEE264E7D68'; -SELECT hex(sipHash128('foo', 'foo')) = hex(reverse(unhex('4CD1C30C38AB935D418B5269EF197B9E'))) or hex(sipHash128('foo', 'foo')) = '4CD1C30C38AB935D418B5269EF197B9E'; -SELECT hex(sipHash128('foo', 'foo', 'foo')) = hex(reverse(unhex('9D78134EE48654D753CCA1B76185CF8E'))) or hex(sipHash128('foo', 'foo', 'foo')) = '9D78134EE48654D753CCA1B76185CF8E'; -SELECT hex(sipHash128(1, 2, 3)) = hex(reverse(unhex('389D16428D2AADEC9713905572F42864'))) or hex(sipHash128(1, 2, 3)) = '389D16428D2AADEC9713905572F42864'; +SELECT hex(sipHash128('foo')); +SELECT hex(sipHash128('\x01')); +SELECT hex(sipHash128('foo', 'foo')); +SELECT hex(sipHash128('foo', 'foo', 'foo')); +SELECT hex(sipHash128(1, 2, 3)); SELECT halfMD5(1, 2, 3); SELECT halfMD5(1, 3, 2); @@ -26,8 +26,8 @@ SELECT murmurHash3_64(1, 2, 3); SELECT murmurHash3_64(1, 3, 2); SELECT murmurHash3_64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))); -SELECT hex(murmurHash3_128('foo', 'foo')) = hex(reverse(unhex('8DD5527CC43D76F4760D26BE0F641F7E'))) or hex(murmurHash3_128('foo', 'foo')) = '8DD5527CC43D76F4760D26BE0F641F7E'; -SELECT hex(murmurHash3_128('foo', 'foo', 'foo')) = hex(reverse(unhex('F8F7AD9B6CD4CF117A71E277E2EC2931'))) or hex(murmurHash3_128('foo', 'foo', 'foo')) = 'F8F7AD9B6CD4CF117A71E277E2EC2931'; +SELECT hex(murmurHash3_128('foo', 'foo')); +SELECT hex(murmurHash3_128('foo', 'foo', 'foo')); SELECT gccMurmurHash(1, 2, 3); SELECT gccMurmurHash(1, 3, 2); diff --git a/tests/queries/0_stateless/00748_insert_array_with_null.sql b/tests/queries/0_stateless/00748_insert_array_with_null.sql index ca36352c2cf..ac55d4e9d8c 100644 --- a/tests/queries/0_stateless/00748_insert_array_with_null.sql +++ b/tests/queries/0_stateless/00748_insert_array_with_null.sql @@ -1,6 +1,7 @@ DROP TABLE IF EXISTS arraytest; set allow_deprecated_syntax_for_merge_tree=1; +set input_format_null_as_default=0; CREATE TABLE arraytest ( created_date Date DEFAULT toDate(created_at), created_at DateTime DEFAULT now(), strings Array(String) DEFAULT emptyArrayString()) ENGINE = MergeTree(created_date, cityHash64(created_at), (created_date, cityHash64(created_at)), 8192); INSERT INTO arraytest (created_at, strings) VALUES (now(), ['aaaaa', 'bbbbb', 'ccccc']); diff --git a/tests/queries/0_stateless/00751_default_databasename_for_view.reference b/tests/queries/0_stateless/00751_default_databasename_for_view.reference index b3f1875ae91..4899e230924 100644 --- a/tests/queries/0_stateless/00751_default_databasename_for_view.reference +++ b/tests/queries/0_stateless/00751_default_databasename_for_view.reference @@ -1,4 +1,4 @@ -CREATE MATERIALIZED VIEW test_00751.t_mv_00751 +CREATE MATERIALIZED VIEW default.t_mv_00751 ( `date` Date, `platform` Enum8('a' = 0, 'b' = 1), @@ -11,14 +11,14 @@ SELECT date, platform, app -FROM test_00751.t_00751 +FROM default.t_00751 WHERE (app = ( SELECT min(app) - FROM test_00751.u_00751 + FROM default.u_00751 )) AND (platform = ( SELECT ( SELECT min(platform) - FROM test_00751.v_00751 + FROM default.v_00751 ) )) 2000-01-01 a a diff --git a/tests/queries/0_stateless/00751_default_databasename_for_view.sql b/tests/queries/0_stateless/00751_default_databasename_for_view.sql index 9292b612d87..599ef5b89ae 100644 --- a/tests/queries/0_stateless/00751_default_databasename_for_view.sql +++ b/tests/queries/0_stateless/00751_default_databasename_for_view.sql @@ -1,7 +1,3 @@ --- Tags: no-parallel - -CREATE DATABASE IF NOT EXISTS test_00751; -USE test_00751; DROP TABLE IF EXISTS t_00751; DROP TABLE IF EXISTS t_mv_00751; @@ -25,25 +21,24 @@ CREATE MATERIALIZED VIEW t_mv_00751 ENGINE = MergeTree ORDER BY date AS SELECT date, platform, app FROM t_00751 WHERE app = (SELECT min(app) from u_00751) AND platform = (SELECT (SELECT min(platform) from v_00751)); -SHOW CREATE TABLE test_00751.t_mv_00751 FORMAT TabSeparatedRaw; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.t_mv_00751 FORMAT TabSeparatedRaw; USE default; -DETACH TABLE test_00751.t_mv_00751; -ATTACH TABLE test_00751.t_mv_00751; +DETACH TABLE {CLICKHOUSE_DATABASE:Identifier}.t_mv_00751; +ATTACH TABLE {CLICKHOUSE_DATABASE:Identifier}.t_mv_00751; -INSERT INTO test_00751.t_00751 VALUES ('2000-01-01', 'a', 'a') ('2000-01-02', 'b', 'b'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.t_00751 VALUES ('2000-01-01', 'a', 'a') ('2000-01-02', 'b', 'b'); -INSERT INTO test_00751.u_00751 VALUES ('a'); -INSERT INTO test_00751.v_00751 VALUES ('a'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.u_00751 VALUES ('a'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.v_00751 VALUES ('a'); -INSERT INTO test_00751.t_00751 VALUES ('2000-01-03', 'a', 'a') ('2000-01-04', 'b', 'b'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.t_00751 VALUES ('2000-01-03', 'a', 'a') ('2000-01-04', 'b', 'b'); -SELECT * FROM test_00751.t_00751 ORDER BY date; -SELECT * FROM test_00751.t_mv_00751 ORDER BY date; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.t_00751 ORDER BY date; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.t_mv_00751 ORDER BY date; -DROP TABLE test_00751.t_00751; -DROP TABLE test_00751.t_mv_00751; -DROP TABLE test_00751.u_00751; -DROP TABLE test_00751.v_00751; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.t_00751; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.t_mv_00751; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.u_00751; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.v_00751; -DROP DATABASE test_00751; diff --git a/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards.reference b/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards.reference index aa509893230..add8c239ade 100644 --- a/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards.reference +++ b/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards.reference @@ -1,7 +1,7 @@ OK OK 1 -FAIL +OK 0 4 2 @@ -9,8 +9,8 @@ FAIL 1 1 4 -FAIL -FAIL -FAIL -FAIL -FAIL +OK +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere.reference b/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere.reference index 611f0fd2585..4c66ccfd2a2 100644 --- a/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere.reference +++ b/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere.reference @@ -1,15 +1,15 @@ OK OK 1 -FAIL +OK 0 1 4 4 2 4 -FAIL -FAIL -FAIL -FAIL -FAIL +OK +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/00800_versatile_storage_join.sql b/tests/queries/0_stateless/00800_versatile_storage_join.sql index 3690ea6bf89..cccc655e34b 100644 --- a/tests/queries/0_stateless/00800_versatile_storage_join.sql +++ b/tests/queries/0_stateless/00800_versatile_storage_join.sql @@ -1,8 +1,3 @@ --- Tags: no-parallel - -CREATE DATABASE IF NOT EXISTS test_00800; - -USE test_00800; DROP TABLE IF EXISTS join_any_inner; DROP TABLE IF EXISTS join_any_left; @@ -54,24 +49,22 @@ SELECT joinGet('join_string_key', 'x', 'abc'), joinGet('join_string_key', 'k', ' USE default; -DROP TABLE test_00800.join_any_inner; -DROP TABLE test_00800.join_any_left; -DROP TABLE test_00800.join_any_left_null; -DROP TABLE test_00800.join_all_inner; -DROP TABLE test_00800.join_all_left; -DROP TABLE test_00800.join_string_key; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.join_any_inner; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.join_any_left; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.join_any_left_null; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.join_all_inner; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.join_all_left; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.join_string_key; -- test provided by Alexander Zaitsev -DROP TABLE IF EXISTS test_00800.join_test; -CREATE TABLE test_00800.join_test (a UInt8, b UInt8) Engine = Join(ANY, LEFT, a); +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.join_test; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.join_test (a UInt8, b UInt8) Engine = Join(ANY, LEFT, a); -USE test_00800; +USE {CLICKHOUSE_DATABASE:Identifier}; select joinGet('join_test', 'b', 1); USE system; -SELECT joinGet('test_00800.join_test', 'b', 1); +SELECT joinGet({CLICKHOUSE_DATABASE:String} || '.join_test', 'b', 1); USE default; -DROP TABLE test_00800.join_test; - -DROP DATABASE test_00800; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.join_test; diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql index 8a256567e80..d8c28a7d9d7 100644 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql @@ -1,6 +1,6 @@ ---Tags: no-fasttest, no-cpu-aarch64 +--Tags: no-fasttest, no-cpu-aarch64, no-cpu-s390x -- no-fasttest because DEFLATE_QPL isn't available in fasttest --- no-cpu-aarch64 because DEFLATE_QPL is x86-only +-- no-cpu-aarch64 and no-cpu-s390x because DEFLATE_QPL is x86-only -- A bunch of random DDLs to test the DEFLATE_QPL codec. diff --git a/tests/queries/0_stateless/00815_left_join_on_stepanel.sql b/tests/queries/0_stateless/00815_left_join_on_stepanel.sql index 13172f8c18d..725c9523cd7 100644 --- a/tests/queries/0_stateless/00815_left_join_on_stepanel.sql +++ b/tests/queries/0_stateless/00815_left_join_on_stepanel.sql @@ -1,7 +1,3 @@ --- Tags: no-parallel - -CREATE DATABASE IF NOT EXISTS test_00815; -USE test_00815; DROP TABLE IF EXISTS fact_cpc_clicks; DROP TABLE IF EXISTS dim_model; @@ -16,6 +12,6 @@ select f.model_id from fact_cpc_clicks as f left join dim_model as d on f.model_ USE default; -select f.model_id from test_00815.fact_cpc_clicks as f left join test_00815.dim_model as d on f.model_id=d.model_id limit 10; +select f.model_id from {CLICKHOUSE_DATABASE:Identifier}.fact_cpc_clicks as f left join {CLICKHOUSE_DATABASE:Identifier}.dim_model as d on f.model_id=d.model_id limit 10; -DROP DATABASE test_00815; +DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh index 6714d8b35ca..cbe37de6651 100755 --- a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh +++ b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: deadlock, no-parallel +# Tags: deadlock, no-parallel, no-debug # NOTE: database = $CLICKHOUSE_DATABASE is unwanted diff --git a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql index e7f034131a2..2044a9b8d22 100644 --- a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql +++ b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql @@ -1,7 +1,4 @@ --- Tags: global, no-parallel -CREATE DATABASE IF NOT EXISTS test_00857; -USE test_00857; DROP TABLE IF EXISTS local_table; DROP TABLE IF EXISTS other_table; @@ -27,7 +24,7 @@ INSERT INTO other_table VALUES(100, 'One Hundred', now(), 1000); INSERT INTO other_table VALUES(200, 'Two Hundred', now(), 2000); select t2.name from remote('127.0.0.2', currentDatabase(), 'local_table') as t1 -left join test_00857.other_table as t2 -- FIXME: doesn't work properly on remote without explicit database prefix +left join {CLICKHOUSE_DATABASE:Identifier}.other_table as t2 -- FIXME: doesn't work properly on remote without explicit database prefix on t1.oth_id = t2.id order by t2.name; @@ -58,4 +55,3 @@ order by other_table.name; DROP TABLE local_table; DROP TABLE other_table; -DROP DATABASE test_00857; diff --git a/tests/queries/0_stateless/00900_long_parquet_load.sh b/tests/queries/0_stateless/00900_long_parquet_load.sh index 8142c5b5810..2f19cce9107 100755 --- a/tests/queries/0_stateless/00900_long_parquet_load.sh +++ b/tests/queries/0_stateless/00900_long_parquet_load.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest +# Tags: long, no-fasttest, no-debug # # Load all possible .parquet files found in submodules. @@ -43,9 +43,9 @@ for NAME in $(find "$DATA_DIR"/*.parquet -print0 | xargs -0 -n 1 basename | LC_A JSON=$DATA_DIR/$NAME.json COLUMNS_FILE=$DATA_DIR/$NAME.columns - ([ -z "$PARQUET_READER" ] || [ ! -s "$PARQUET_READER" ]) && [ ! -s "$COLUMNS_FILE" ] && continue + { [ -z "$PARQUET_READER" ] || [ ! -s "$PARQUET_READER" ]; } && [ ! -s "$COLUMNS_FILE" ] && continue - echo === Try load data from "$NAME" + echo "=== Try load data from $NAME" # If you want change or add .parquet file - rm data_parquet/*.json data_parquet/*.columns [ -n "$PARQUET_READER" ] && [ ! -s "$COLUMNS_FILE" ] && [ ! -s "$JSON" ] && "$PARQUET_READER" --json "$DATA_DIR"/"$NAME" > "$JSON" @@ -64,7 +64,7 @@ CREATE TABLE parquet_load ($COLUMNS) ENGINE = Memory; EOF # Some files contain unsupported data structures, exception is ok. - cat "$DATA_DIR"/"$NAME" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO parquet_load FORMAT Parquet" 2>&1 | sed 's/Exception/Ex---tion/' + ${CLICKHOUSE_CLIENT} --query="INSERT INTO parquet_load FORMAT Parquet" < "$DATA_DIR"/"$NAME" 2>&1 | sed 's/Exception/Ex---tion/' ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load ORDER BY tuple(*) LIMIT 100" ${CLICKHOUSE_CLIENT} --query="DROP TABLE parquet_load" diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql index cd2ceabcf6d..55eacd0db44 100644 --- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql +++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql @@ -1,5 +1,5 @@ drop table if exists lc_00906; -create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b; +create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into lc_00906 select '0123456789' from numbers(100000000); select count(), b from lc_00906 group by b; drop table if exists lc_00906; diff --git a/tests/queries/0_stateless/00908_bloom_filter_index.reference b/tests/queries/0_stateless/00908_bloom_filter_index.reference index 641e4f035dd..c0cbd6c0335 100644 --- a/tests/queries/0_stateless/00908_bloom_filter_index.reference +++ b/tests/queries/0_stateless/00908_bloom_filter_index.reference @@ -40,3 +40,4 @@ 13 abc "rows_read": 3, 2 +::1 diff --git a/tests/queries/0_stateless/00908_bloom_filter_index.sh b/tests/queries/0_stateless/00908_bloom_filter_index.sh index 92b5634c1db..88fc7944236 100755 --- a/tests/queries/0_stateless/00908_bloom_filter_index.sh +++ b/tests/queries/0_stateless/00908_bloom_filter_index.sh @@ -150,3 +150,15 @@ CREATE TABLE bloom_filter_idx_na INDEX bf na TYPE bloom_filter(0.1) GRANULARITY 1 ) ENGINE = MergeTree() ORDER BY na" 2>&1 | grep -c 'DB::Exception: Unexpected type Array(Array(String)) of bloom filter index' + +# NGRAM BF with IPv6 +$CLICKHOUSE_CLIENT -n --query=" +CREATE TABLE bloom_filter_ipv6_idx +( + foo IPv6, + INDEX fooIndex foo TYPE ngrambf_v1(8,512,3,0) GRANULARITY 1 +) ENGINE = MergeTree() ORDER BY foo;" + +$CLICKHOUSE_CLIENT --query="INSERT INTO bloom_filter_ipv6_idx VALUES ('::1.2.3.4'),('::0'),('::1')" +$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_ipv6_idx WHERE foo IN ('::1')" +$CLICKHOUSE_CLIENT --query="DROP TABLE bloom_filter_ipv6_idx" diff --git a/tests/queries/0_stateless/00921_datetime64_basic.sql b/tests/queries/0_stateless/00921_datetime64_basic.sql index 28205c72ef0..13abe3e64d0 100644 --- a/tests/queries/0_stateless/00921_datetime64_basic.sql +++ b/tests/queries/0_stateless/00921_datetime64_basic.sql @@ -3,17 +3,17 @@ DROP TABLE IF EXISTS A; SELECT CAST(1 as DateTime64('abc')); -- { serverError 43 } # Invalid scale parameter type SELECT CAST(1 as DateTime64(100)); -- { serverError 69 } # too big scale SELECT CAST(1 as DateTime64(-1)); -- { serverError 43 } # signed scale parameter type -SELECT CAST(1 as DateTime64(3, 'qqq')); -- { serverError 1000 } # invalid timezone +SELECT CAST(1 as DateTime64(3, 'qqq')); -- { serverError BAD_ARGUMENTS } # invalid timezone SELECT toDateTime64('2019-09-16 19:20:11.234', 'abc'); -- { serverError 43 } # invalid scale SELECT toDateTime64('2019-09-16 19:20:11.234', 100); -- { serverError 69 } # too big scale SELECT toDateTime64(CAST([['CLb5Ph ']], 'String'), uniqHLL12('2Gs1V', 752)); -- { serverError 44 } # non-const string and non-const scale -SELECT toDateTime64('2019-09-16 19:20:11.234', 3, 'qqq'); -- { serverError 1000 } # invalid timezone +SELECT toDateTime64('2019-09-16 19:20:11.234', 3, 'qqq'); -- { serverError BAD_ARGUMENTS } # invalid timezone SELECT ignore(now64(gccMurmurHash())); -- { serverError 43 } # Illegal argument type SELECT ignore(now64('abcd')); -- { serverError 43 } # Illegal argument type SELECT ignore(now64(number)) FROM system.numbers LIMIT 10; -- { serverError 43 } # Illegal argument type -SELECT ignore(now64(3, 'invalid timezone')); -- { serverError 1000 } +SELECT ignore(now64(3, 'invalid timezone')); -- { serverError BAD_ARGUMENTS } SELECT ignore(now64(3, 1111)); -- { serverError 44 } # invalid timezone parameter type WITH 'UTC' as timezone SELECT timezone, timeZoneOf(now64(3, timezone)) == timezone; diff --git a/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh b/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh index 22d9e0690b3..d06037fb836 100755 --- a/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh +++ b/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh @@ -5,22 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function query_with_retry -{ - retry=0 - until [ $retry -ge 5 ] - do - result=$($CLICKHOUSE_CLIENT $2 --query="$1" 2>&1) - if [ "$?" == 0 ]; then - echo -n "$result" - return - else - retry=$(($retry + 1)) - sleep 3 - fi - done - echo "Query '$1' failed with '$result'" -} $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS ttl_repl1" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS ttl_repl2" diff --git a/tests/queries/0_stateless/00933_ttl_simple.sql b/tests/queries/0_stateless/00933_ttl_simple.sql index 2bf686822d5..ad40e7c7e47 100644 --- a/tests/queries/0_stateless/00933_ttl_simple.sql +++ b/tests/queries/0_stateless/00933_ttl_simple.sql @@ -1,3 +1,15 @@ +-- disable timezone randomization since otherwise TTL may fail at particular datetime, i.e.: +-- +-- SELECT +-- now(), +-- toDate(toTimeZone(now(), 'America/Mazatlan')), +-- today() +-- +-- ┌───────────────now()─┬─toDate(toTimeZone(now(), 'America/Mazatlan'))─┬────today()─┐ +-- │ 2023-07-24 06:24:06 │ 2023-07-23 │ 2023-07-24 │ +-- └─────────────────────┴───────────────────────────────────────────────┴────────────┘ +set session_timezone = ''; + drop table if exists ttl_00933_1; -- Column TTL works only with wide parts, because it's very expensive to apply it for compact parts diff --git a/tests/queries/0_stateless/00942_dataparts_500.sh b/tests/queries/0_stateless/00942_dataparts_500.sh index a6c3fcd4303..91c95816590 100755 --- a/tests/queries/0_stateless/00942_dataparts_500.sh +++ b/tests/queries/0_stateless/00942_dataparts_500.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check # Test fix for issue #5066 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/00956_sensitive_data_masking.sh b/tests/queries/0_stateless/00956_sensitive_data_masking.sh index ccd9bbcf10e..926557e4ba6 100755 --- a/tests/queries/0_stateless/00956_sensitive_data_masking.sh +++ b/tests/queries/0_stateless/00956_sensitive_data_masking.sh @@ -65,6 +65,7 @@ echo 5 # run in background rm -f "$tmp_file2" >/dev/null 2>&1 bash -c "$CLICKHOUSE_CLIENT \ + --function_sleep_max_microseconds_per_block 60000000 \ --query=\"select sleepEachRow(1) from numbers(10) where ignore('find_me_TOPSECRET=TOPSECRET')=0 and ignore('fwerkh_that_magic_string_make_me_unique') = 0 FORMAT Null\" \ --log_queries=1 --ignore-error --multiquery |& grep -v '^(query: ' > $tmp_file2" & diff --git a/tests/queries/0_stateless/00975_move_partition_merge_tree.sql b/tests/queries/0_stateless/00975_move_partition_merge_tree.sql index 2fc82b96403..c17f7c57de0 100644 --- a/tests/queries/0_stateless/00975_move_partition_merge_tree.sql +++ b/tests/queries/0_stateless/00975_move_partition_merge_tree.sql @@ -6,14 +6,14 @@ CREATE TABLE IF NOT EXISTS test_move_partition_src ( val UInt32 ) Engine = MergeTree() PARTITION BY pk - ORDER BY (pk, val); + ORDER BY (pk, val) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE IF NOT EXISTS test_move_partition_dest ( pk UInt8, val UInt32 ) Engine = MergeTree() PARTITION BY pk - ORDER BY (pk, val); + ORDER BY (pk, val) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO test_move_partition_src SELECT number % 2, number FROM system.numbers LIMIT 10000000; diff --git a/tests/queries/0_stateless/00979_set_index_not.reference b/tests/queries/0_stateless/00979_set_index_not.reference index 455708dfe99..033fcb0467b 100644 --- a/tests/queries/0_stateless/00979_set_index_not.reference +++ b/tests/queries/0_stateless/00979_set_index_not.reference @@ -1,2 +1,4 @@ Jon alive Jon alive +Ramsey rip +Ramsey rip diff --git a/tests/queries/0_stateless/00979_set_index_not.sql b/tests/queries/0_stateless/00979_set_index_not.sql index 2ad27e337f9..13a0b4cbb09 100644 --- a/tests/queries/0_stateless/00979_set_index_not.sql +++ b/tests/queries/0_stateless/00979_set_index_not.sql @@ -11,5 +11,7 @@ insert into set_index_not values ('Jon','alive'),('Ramsey','rip'); select * from set_index_not where status!='rip'; select * from set_index_not where NOT (status ='rip'); +select * from set_index_not where NOT (status!='rip'); +select * from set_index_not where NOT (NOT (status ='rip')); DROP TABLE set_index_not; diff --git a/tests/queries/0_stateless/00981_topK_topKWeighted_long.sql b/tests/queries/0_stateless/00981_topK_topKWeighted_long.sql index 48d9dedc61c..7ee38867b53 100644 --- a/tests/queries/0_stateless/00981_topK_topKWeighted_long.sql +++ b/tests/queries/0_stateless/00981_topK_topKWeighted_long.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS topk; -CREATE TABLE topk (val1 String, val2 UInt32) ENGINE = MergeTree ORDER BY val1; +CREATE TABLE topk (val1 String, val2 UInt32) ENGINE = MergeTree ORDER BY val1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO topk WITH number % 7 = 0 AS frequent SELECT toString(frequent ? number % 10 : number), frequent ? 999999999 : number FROM numbers(4000000); diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh index 811681794a5..6025279e570 100755 --- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh +++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel, no-upgrade-check, no-replicated-database +# Tags: race, zookeeper, no-parallel, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -59,7 +59,8 @@ function thread6() CREATE TABLE alter_table_$REPLICA (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r_$REPLICA') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, - cleanup_thread_preferred_points_per_iteration=0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50));"; + cleanup_thread_preferred_points_per_iteration=0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50)), + index_granularity = 8192, index_granularity_bytes = '10Mi';"; sleep 0.$RANDOM; done } diff --git a/tests/queries/0_stateless/00995_exception_while_insert.sh b/tests/queries/0_stateless/00995_exception_while_insert.sh index 927ac6a54e5..732dba6c6f1 100755 --- a/tests/queries/0_stateless/00995_exception_while_insert.sh +++ b/tests/queries/0_stateless/00995_exception_while_insert.sh @@ -7,8 +7,8 @@ CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS check;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE check (x UInt64, y UInt64 DEFAULT throwIf(x > 1500000)) ENGINE = Memory;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE check (x UInt64, y UInt64 DEFAULT throwIf(x = 1500000)) ENGINE = Memory;" -seq 1 2000000 | $CLICKHOUSE_CLIENT --query="INSERT INTO check(x) FORMAT TSV" 2>&1 | grep -q "Value passed to 'throwIf' function is non-zero." && echo 'OK' || echo 'FAIL' ||: +seq 1 1500000 | $CLICKHOUSE_CLIENT --query="INSERT INTO check(x) FORMAT TSV" 2>&1 | grep -q "Value passed to 'throwIf' function is non-zero." && echo 'OK' || echo 'FAIL' ||: $CLICKHOUSE_CLIENT --query="DROP TABLE check;" diff --git a/tests/queries/0_stateless/01012_show_tables_limit.sql b/tests/queries/0_stateless/01012_show_tables_limit.sql index 82cde6917d0..18a11f66d08 100644 --- a/tests/queries/0_stateless/01012_show_tables_limit.sql +++ b/tests/queries/0_stateless/01012_show_tables_limit.sql @@ -1,22 +1,15 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS test_show_limit; - -CREATE DATABASE test_show_limit; - -CREATE TABLE test_show_limit.test1 (test UInt8) ENGINE = TinyLog; -CREATE TABLE test_show_limit.test2 (test UInt8) ENGINE = TinyLog; -CREATE TABLE test_show_limit.test3 (test UInt8) ENGINE = TinyLog; -CREATE TABLE test_show_limit.test4 (test UInt8) ENGINE = TinyLog; -CREATE TABLE test_show_limit.test5 (test UInt8) ENGINE = TinyLog; -CREATE TABLE test_show_limit.test6 (test UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test1 (test UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test2 (test UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test3 (test UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test4 (test UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test5 (test UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test6 (test UInt8) ENGINE = TinyLog; SELECT '*** Should show 6: ***'; -SHOW TABLES FROM test_show_limit; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier}; SELECT '*** Should show 2: ***'; -SHOW TABLES FROM test_show_limit LIMIT 2; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier} LIMIT 2; SELECT '*** Should show 4: ***'; -SHOW TABLES FROM test_show_limit LIMIT 2 * 2; - -DROP DATABASE test_show_limit; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier} LIMIT 2 * 2; diff --git a/tests/queries/0_stateless/01015_attach_part.sql b/tests/queries/0_stateless/01015_attach_part.sql index a2f949d3499..9ff505efd8f 100644 --- a/tests/queries/0_stateless/01015_attach_part.sql +++ b/tests/queries/0_stateless/01015_attach_part.sql @@ -1,4 +1,3 @@ --- Tags: no-parallel DROP TABLE IF EXISTS table_01; diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.reference b/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.reference index abc3218ce6c..5ee4e7592f6 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.reference +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.reference @@ -4,4 +4,3 @@ 2 2 2 -2 diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.sh b/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.sh index 6826ed677db..8a40129ad4f 100755 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.sh +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.sh @@ -9,21 +9,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "DROP DICTIONARY IF EXISTS dict1" -# Simple layout, but with two keys -$CLICKHOUSE_CLIENT -q " - CREATE DICTIONARY dict1 - ( - key1 UInt64, - key2 UInt64, - value String - ) - PRIMARY KEY key1, key2 - LAYOUT(HASHED()) - SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict1' DB '$CLICKHOUSE_DATABASE')) - LIFETIME(MIN 1 MAX 10) -" 2>&1 | grep -c 'Primary key for simple dictionary must contain exactly one element' - - # Simple layout, but with non existing key $CLICKHOUSE_CLIENT -q " CREATE DICTIONARY dict1 diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference b/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference index f69302fb90f..17a77b91d45 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference @@ -15,12 +15,12 @@ dict1 Dictionary dict2 Dictionary table_for_dict MergeTree -database_for_dict dict1 ComplexKeyCache -database_for_dict dict2 Hashed +default dict1 ComplexKeyCache +default dict2 Hashed 6 6 6 6 6 -database_for_dict.dict3 6 +default.dict3 6 6 diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql index 748c733bf9a..523b057d4e1 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql @@ -1,13 +1,9 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-fasttest SET send_logs_level = 'fatal'; SET check_table_dependencies=0; -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - -CREATE TABLE database_for_dict.table_for_dict +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_for_dict ( key_column UInt64, second_column UInt8, @@ -17,9 +13,9 @@ CREATE TABLE database_for_dict.table_for_dict ENGINE = MergeTree() ORDER BY key_column; -INSERT INTO database_for_dict.table_for_dict SELECT number, number % 17, toString(number * number), number / 2.0 from numbers(100); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_for_dict SELECT number, number % 17, toString(number * number), number / 2.0 from numbers(100); -CREATE DICTIONARY database_for_dict.dict1 +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1 ( key_column UInt64 DEFAULT 0, second_column UInt8 DEFAULT 1, @@ -27,35 +23,35 @@ CREATE DICTIONARY database_for_dict.dict1 fourth_column Float64 DEFAULT 42.0 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); -SELECT second_column FROM database_for_dict.dict1 WHERE key_column = 11; -SELECT dictGetString('database_for_dict.dict1', 'third_column', toUInt64(12)); -SELECT third_column FROM database_for_dict.dict1 WHERE key_column = 12; -SELECT dictGetFloat64('database_for_dict.dict1', 'fourth_column', toUInt64(14)); -SELECT fourth_column FROM database_for_dict.dict1 WHERE key_column = 14; +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', toUInt64(11)); +SELECT second_column FROM {CLICKHOUSE_DATABASE:Identifier}.dict1 WHERE key_column = 11; +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict1', 'third_column', toUInt64(12)); +SELECT third_column FROM {CLICKHOUSE_DATABASE:Identifier}.dict1 WHERE key_column = 12; +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict1', 'fourth_column', toUInt64(14)); +SELECT fourth_column FROM {CLICKHOUSE_DATABASE:Identifier}.dict1 WHERE key_column = 14; -SELECT count(distinct(dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(number)))) from numbers(100); +SELECT count(distinct(dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', toUInt64(number)))) from numbers(100); -DETACH DICTIONARY database_for_dict.dict1; +DETACH DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1; -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); -- {serverError 36} +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', toUInt64(11)); -- {serverError 36} -ATTACH DICTIONARY database_for_dict.dict1; +ATTACH DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1; -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', toUInt64(11)); -DROP DICTIONARY database_for_dict.dict1; +DROP DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1; -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); -- {serverError 36} +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', toUInt64(11)); -- {serverError 36} -- SOURCE(CLICKHOUSE(...)) uses default params if not specified -DROP DICTIONARY IF EXISTS database_for_dict.dict1; +DROP DICTIONARY IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.dict1; -CREATE DICTIONARY database_for_dict.dict1 +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1 ( key_column UInt64 DEFAULT 0, second_column UInt8 DEFAULT 1, @@ -63,17 +59,17 @@ CREATE DICTIONARY database_for_dict.dict1 fourth_column Float64 DEFAULT 42.0 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(TABLE 'table_for_dict' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', toUInt64(11)); -SELECT count(distinct(dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(number)))) from numbers(100); +SELECT count(distinct(dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', toUInt64(number)))) from numbers(100); -DROP DICTIONARY database_for_dict.dict1; +DROP DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1; -CREATE DICTIONARY database_for_dict.dict1 +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1 ( key_column UInt64 DEFAULT 0, second_column UInt8 DEFAULT 1, @@ -81,69 +77,68 @@ CREATE DICTIONARY database_for_dict.dict1 fourth_column Float64 DEFAULT 42.0 ) PRIMARY KEY key_column, third_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(MIN 1 MAX 10) LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 1)); -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', tuple(toUInt64(11), '121')); -SELECT dictGetFloat64('database_for_dict.dict1', 'fourth_column', tuple(toUInt64(14), '196')); +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', tuple(toUInt64(11), '121')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict1', 'fourth_column', tuple(toUInt64(14), '196')); -DETACH DICTIONARY database_for_dict.dict1; +DETACH DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1; -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', tuple(toUInt64(11), '121')); -- {serverError 36} +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', tuple(toUInt64(11), '121')); -- {serverError 36} -ATTACH DICTIONARY database_for_dict.dict1; +ATTACH DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1; -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', tuple(toUInt64(11), '121')); +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', tuple(toUInt64(11), '121')); -CREATE DICTIONARY database_for_dict.dict2 +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict2 ( key_column UInt64 DEFAULT 0, some_column String EXPRESSION toString(fourth_column), fourth_column Float64 DEFAULT 42.0 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(MIN 1 MAX 10) LAYOUT(HASHED()); -SELECT dictGetString('database_for_dict.dict2', 'some_column', toUInt64(12)); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict2', 'some_column', toUInt64(12)); -SELECT name, engine FROM system.tables WHERE database = 'database_for_dict' ORDER BY name; +-- NOTE: database = currentDatabase() is not mandatory +SELECT name, engine FROM system.tables WHERE database = {CLICKHOUSE_DATABASE:String} ORDER BY name; -SELECT database, name, type FROM system.dictionaries WHERE database = 'database_for_dict' ORDER BY name; +SELECT database, name, type FROM system.dictionaries WHERE database = {CLICKHOUSE_DATABASE:String} ORDER BY name; -- check dictionary will not update -CREATE DICTIONARY database_for_dict.dict3 +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict3 ( key_column UInt64 DEFAULT 0, some_column String EXPRESSION toString(fourth_column), fourth_column Float64 DEFAULT 42.0 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(0) LAYOUT(HASHED()); -SELECT dictGetString('database_for_dict.dict3', 'some_column', toUInt64(12)); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict3', 'some_column', toUInt64(12)); -- dictGet with table name -USE database_for_dict; +USE {CLICKHOUSE_DATABASE:Identifier}; SELECT dictGetString(dict3, 'some_column', toUInt64(12)); -SELECT dictGetString(database_for_dict.dict3, 'some_column', toUInt64(12)); +SELECT dictGetString({CLICKHOUSE_DATABASE:Identifier}.dict3, 'some_column', toUInt64(12)); SELECT dictGetString(default.dict3, 'some_column', toUInt64(12)); -- {serverError 36} SELECT dictGet(dict3, 'some_column', toUInt64(12)); -SELECT dictGet(database_for_dict.dict3, 'some_column', toUInt64(12)); +SELECT dictGet({CLICKHOUSE_DATABASE:Identifier}.dict3, 'some_column', toUInt64(12)); SELECT dictGet(default.dict3, 'some_column', toUInt64(12)); -- {serverError 36} USE default; -- alias should be handled correctly -SELECT 'database_for_dict.dict3' as n, dictGet(n, 'some_column', toUInt64(12)); +SELECT {CLICKHOUSE_DATABASE:String} || '.dict3' as n, dictGet(n, 'some_column', toUInt64(12)); -DROP TABLE database_for_dict.table_for_dict; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.table_for_dict; SYSTEM RELOAD DICTIONARIES; -- {serverError 60} -SELECT dictGetString('database_for_dict.dict3', 'some_column', toUInt64(12)); - -DROP DATABASE IF EXISTS database_for_dict; +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict3', 'some_column', toUInt64(12)); diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_special.sql b/tests/queries/0_stateless/01018_ddl_dictionaries_special.sql index 1877c289d3f..51e1eb2e12f 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_special.sql +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_special.sql @@ -1,14 +1,10 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-fasttest SET send_logs_level = 'fatal'; -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - SELECT '***date dict***'; -CREATE TABLE database_for_dict.date_table +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.date_table ( CountryID UInt64, StartDate Date, @@ -18,11 +14,11 @@ CREATE TABLE database_for_dict.date_table ENGINE = MergeTree() ORDER BY CountryID; -INSERT INTO database_for_dict.date_table VALUES(1, toDate('2019-05-05'), toDate('2019-05-20'), 0.33); -INSERT INTO database_for_dict.date_table VALUES(1, toDate('2019-05-21'), toDate('2019-05-30'), 0.42); -INSERT INTO database_for_dict.date_table VALUES(2, toDate('2019-05-21'), toDate('2019-05-30'), 0.46); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.date_table VALUES(1, toDate('2019-05-05'), toDate('2019-05-20'), 0.33); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.date_table VALUES(1, toDate('2019-05-21'), toDate('2019-05-30'), 0.42); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.date_table VALUES(2, toDate('2019-05-21'), toDate('2019-05-30'), 0.46); -CREATE DICTIONARY database_for_dict.dict1 +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1 ( CountryID UInt64, StartDate Date, @@ -30,19 +26,19 @@ CREATE DICTIONARY database_for_dict.dict1 Tax Float64 ) PRIMARY KEY CountryID -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB currentDatabase())) LIFETIME(MIN 1 MAX 1000) LAYOUT(RANGE_HASHED()) RANGE(MIN StartDate MAX EndDate); -SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(1), toDate('2019-05-15')); -SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(1), toDate('2019-05-29')); -SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(2), toDate('2019-05-29')); -SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(2), toDate('2019-05-31')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict1', 'Tax', toUInt64(1), toDate('2019-05-15')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict1', 'Tax', toUInt64(1), toDate('2019-05-29')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict1', 'Tax', toUInt64(2), toDate('2019-05-29')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict1', 'Tax', toUInt64(2), toDate('2019-05-31')); SELECT '***datetime dict***'; -CREATE TABLE database_for_dict.datetime_table +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.datetime_table ( CountryID UInt64, StartDate DateTime, @@ -52,11 +48,11 @@ CREATE TABLE database_for_dict.datetime_table ENGINE = MergeTree() ORDER BY CountryID; -INSERT INTO database_for_dict.datetime_table VALUES(1, toDateTime('2019-05-05 00:00:00'), toDateTime('2019-05-20 00:00:00'), 0.33); -INSERT INTO database_for_dict.datetime_table VALUES(1, toDateTime('2019-05-21 00:00:00'), toDateTime('2019-05-30 00:00:00'), 0.42); -INSERT INTO database_for_dict.datetime_table VALUES(2, toDateTime('2019-05-21 00:00:00'), toDateTime('2019-05-30 00:00:00'), 0.46); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.datetime_table VALUES(1, toDateTime('2019-05-05 00:00:00'), toDateTime('2019-05-20 00:00:00'), 0.33); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.datetime_table VALUES(1, toDateTime('2019-05-21 00:00:00'), toDateTime('2019-05-30 00:00:00'), 0.42); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.datetime_table VALUES(2, toDateTime('2019-05-21 00:00:00'), toDateTime('2019-05-30 00:00:00'), 0.46); -CREATE DICTIONARY database_for_dict.dict2 +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict2 ( CountryID UInt64, StartDate DateTime, @@ -64,19 +60,19 @@ CREATE DICTIONARY database_for_dict.dict2 Tax Float64 ) PRIMARY KEY CountryID -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'datetime_table' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'datetime_table' DB currentDatabase())) LIFETIME(MIN 1 MAX 1000) LAYOUT(RANGE_HASHED()) RANGE(MIN StartDate MAX EndDate); -SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(1), toDateTime('2019-05-15 00:00:00')); -SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(1), toDateTime('2019-05-29 00:00:00')); -SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(2), toDateTime('2019-05-29 00:00:00')); -SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(2), toDateTime('2019-05-31 00:00:00')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict2', 'Tax', toUInt64(1), toDateTime('2019-05-15 00:00:00')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict2', 'Tax', toUInt64(1), toDateTime('2019-05-29 00:00:00')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict2', 'Tax', toUInt64(2), toDateTime('2019-05-29 00:00:00')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict2', 'Tax', toUInt64(2), toDateTime('2019-05-31 00:00:00')); SELECT '***hierarchy dict***'; -CREATE TABLE database_for_dict.table_with_hierarchy +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_with_hierarchy ( RegionID UInt64, ParentRegionID UInt64, @@ -85,24 +81,24 @@ CREATE TABLE database_for_dict.table_with_hierarchy ENGINE = MergeTree() ORDER BY RegionID; -INSERT INTO database_for_dict.table_with_hierarchy VALUES (3, 2, 'Hamovniki'), (2, 1, 'Moscow'), (1, 10000, 'Russia') (7, 10000, 'Ulan-Ude'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_with_hierarchy VALUES (3, 2, 'Hamovniki'), (2, 1, 'Moscow'), (1, 10000, 'Russia') (7, 10000, 'Ulan-Ude'); -CREATE DICTIONARY database_for_dict.dictionary_with_hierarchy +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dictionary_with_hierarchy ( RegionID UInt64, ParentRegionID UInt64 HIERARCHICAL, RegionName String ) PRIMARY KEY RegionID -SOURCE(CLICKHOUSE(host 'localhost' port tcpPort() user 'default' db 'database_for_dict' table 'table_with_hierarchy')) +SOURCE(CLICKHOUSE(host 'localhost' port tcpPort() user 'default' db currentDatabase() table 'table_with_hierarchy')) LAYOUT(HASHED()) LIFETIME(MIN 1 MAX 1000); -SELECT dictGetString('database_for_dict.dictionary_with_hierarchy', 'RegionName', toUInt64(2)); -SELECT dictGetHierarchy('database_for_dict.dictionary_with_hierarchy', toUInt64(3)); -SELECT dictIsIn('database_for_dict.dictionary_with_hierarchy', toUInt64(3), toUInt64(2)); -SELECT dictIsIn('database_for_dict.dictionary_with_hierarchy', toUInt64(7), toUInt64(10000)); -SELECT dictIsIn('database_for_dict.dictionary_with_hierarchy', toUInt64(1), toUInt64(5)); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dictionary_with_hierarchy', 'RegionName', toUInt64(2)); +SELECT dictGetHierarchy({CLICKHOUSE_DATABASE:String} || '.dictionary_with_hierarchy', toUInt64(3)); +SELECT dictIsIn({CLICKHOUSE_DATABASE:String} || '.dictionary_with_hierarchy', toUInt64(3), toUInt64(2)); +SELECT dictIsIn({CLICKHOUSE_DATABASE:String} || '.dictionary_with_hierarchy', toUInt64(7), toUInt64(10000)); +SELECT dictIsIn({CLICKHOUSE_DATABASE:String} || '.dictionary_with_hierarchy', toUInt64(1), toUInt64(5)); -DROP DATABASE IF EXISTS database_for_dict; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/01018_ip_dictionary_long.sql b/tests/queries/0_stateless/01018_ip_dictionary_long.sql index 647c36429cc..bb7f120163c 100644 --- a/tests/queries/0_stateless/01018_ip_dictionary_long.sql +++ b/tests/queries/0_stateless/01018_ip_dictionary_long.sql @@ -1,13 +1,9 @@ --- Tags: long, no-parallel +-- Tags: long SET send_logs_level = 'fatal'; -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - SELECT '***ipv4 trie dict***'; -CREATE TABLE database_for_dict.table_ipv4_trie +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie ( prefix String, asn UInt32, @@ -16,648 +12,648 @@ CREATE TABLE database_for_dict.table_ipv4_trie engine = TinyLog; -- numbers reordered to test sorting criteria too -INSERT INTO database_for_dict.table_ipv4_trie +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie SELECT '255.255.255.255/' || toString((number + 1) * 13 % 33) AS prefix, toUInt32((number + 1) * 13 % 33) AS asn, 'NA' as cca2 FROM system.numbers LIMIT 33; -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.0.2', 1272, 'RU'); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.0.0/8', 1270, 'RU'); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.0.2', 1272, 'RU'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.0.0/8', 1270, 'RU'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); -- non-unique entries will be squashed into one -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('101.79.55.22', 11212, 'UK'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('101.79.55.22', 11212, 'UK'); -CREATE DICTIONARY database_for_dict.dict_ipv4_trie +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict_ipv4_trie ( prefix String, asn UInt32, cca2 String ) PRIMARY KEY prefix -SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ipv4_trie')) +SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db currentDatabase() table 'table_ipv4_trie')) LAYOUT(IP_TRIE()) LIFETIME(MIN 10 MAX 100); -- fuzzer -SELECT '127.0.0.0/24' = dictGetString('database_for_dict.dict_ipv4_trie', 'prefixprefixprefixprefix', tuple(IPv4StringToNumOrDefault('127.0.0.0127.0.0.0'))); -- { serverError 36 } +SELECT '127.0.0.0/24' = dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefixprefixprefixprefix', tuple(IPv4StringToNumOrDefault('127.0.0.0127.0.0.0'))); -- { serverError 36 } -SELECT 0 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('0.0.0.0'))); -SELECT 1 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('128.0.0.0'))); -SELECT 2 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('192.0.0.0'))); -SELECT 3 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('224.0.0.0'))); -SELECT 4 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('240.0.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('248.0.0.0'))); -SELECT 6 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('252.0.0.0'))); -SELECT 7 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('254.0.0.0'))); -SELECT 8 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.0.0.0'))); -SELECT 9 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.128.0.0'))); -SELECT 10 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.192.0.0'))); -SELECT 11 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.224.0.0'))); -SELECT 12 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.240.0.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.248.0.0'))); -SELECT 14 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.252.0.0'))); -SELECT 15 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.254.0.0'))); -SELECT 16 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.0.0'))); -SELECT 17 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.128.0'))); -SELECT 18 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.192.0'))); -SELECT 19 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.224.0'))); -SELECT 20 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.240.0'))); -SELECT 21 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.248.0'))); -SELECT 22 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.252.0'))); -SELECT 23 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.254.0'))); -SELECT 24 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.0'))); -SELECT 25 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.128'))); -SELECT 26 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.192'))); -SELECT 27 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.224'))); -SELECT 28 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.240'))); -SELECT 29 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.248'))); -SELECT 30 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.252'))); -SELECT 31 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.254'))); -SELECT 32 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.255'))); +SELECT 0 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('0.0.0.0'))); +SELECT 1 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('128.0.0.0'))); +SELECT 2 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('192.0.0.0'))); +SELECT 3 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('224.0.0.0'))); +SELECT 4 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('240.0.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('248.0.0.0'))); +SELECT 6 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('252.0.0.0'))); +SELECT 7 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('254.0.0.0'))); +SELECT 8 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.0.0.0'))); +SELECT 9 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.128.0.0'))); +SELECT 10 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.192.0.0'))); +SELECT 11 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.224.0.0'))); +SELECT 12 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.240.0.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.248.0.0'))); +SELECT 14 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.252.0.0'))); +SELECT 15 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.254.0.0'))); +SELECT 16 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.0.0'))); +SELECT 17 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.128.0'))); +SELECT 18 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.192.0'))); +SELECT 19 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.224.0'))); +SELECT 20 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.240.0'))); +SELECT 21 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.248.0'))); +SELECT 22 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.252.0'))); +SELECT 23 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.254.0'))); +SELECT 24 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.0'))); +SELECT 25 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.128'))); +SELECT 26 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.192'))); +SELECT 27 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.224'))); +SELECT 28 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.240'))); +SELECT 29 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.248'))); +SELECT 30 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.252'))); +SELECT 31 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.254'))); +SELECT 32 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.255'))); -SELECT 'RU' == dictGetString('database_for_dict.dict_ipv4_trie', 'cca2', tuple(IPv4StringToNum('127.0.0.1'))); +SELECT 'RU' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'cca2', tuple(IPv4StringToNum('127.0.0.1'))); -SELECT 1270 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.0'))); -SELECT 1270 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.1'))); -SELECT 1272 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.2'))); -SELECT 1270 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.3'))); -SELECT 1270 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.255'))); +SELECT 1270 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.0'))); +SELECT 1270 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.1'))); +SELECT 1272 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.2'))); +SELECT 1270 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.3'))); +SELECT 1270 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.255'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.0'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.1'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.2'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.3'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.255'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.0'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.1'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.2'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.3'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.255'))); -SELECT 11212 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('101.79.55.22'))); -SELECT 11212 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv6StringToNum('::ffff:654f:3716'))); -SELECT 11212 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); +SELECT 11212 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('101.79.55.22'))); +SELECT 11212 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv6StringToNum('::ffff:654f:3716'))); +SELECT 11212 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); -SELECT 11211 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('202.79.32.2'))); +SELECT 11211 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('202.79.32.2'))); -- check that dictionary works with aliased types `IPv4` and `IPv6` -SELECT 11211 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(toIPv4('202.79.32.2'))); -SELECT 11212 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(toIPv6('::ffff:101.79.55.22'))); +SELECT 11211 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(toIPv4('202.79.32.2'))); +SELECT 11212 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(toIPv6('::ffff:101.79.55.22'))); -CREATE TABLE database_for_dict.table_from_ipv4_trie_dict +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_from_ipv4_trie_dict ( prefix String, asn UInt32, cca2 String -) ENGINE = Dictionary(database_for_dict.dict_ipv4_trie); +) ENGINE = Dictionary({CLICKHOUSE_DATABASE:Identifier}.dict_ipv4_trie); -SELECT 1272 == asn AND 'RU' == cca2 FROM database_for_dict.table_from_ipv4_trie_dict +SELECT 1272 == asn AND 'RU' == cca2 FROM {CLICKHOUSE_DATABASE:Identifier}.table_from_ipv4_trie_dict WHERE prefix == '127.0.0.2/32'; -SELECT 37 == COUNT(*) FROM database_for_dict.table_from_ipv4_trie_dict; -SELECT 37 == COUNT(DISTINCT prefix) FROM database_for_dict.table_from_ipv4_trie_dict; +SELECT 37 == COUNT(*) FROM {CLICKHOUSE_DATABASE:Identifier}.table_from_ipv4_trie_dict; +SELECT 37 == COUNT(DISTINCT prefix) FROM {CLICKHOUSE_DATABASE:Identifier}.table_from_ipv4_trie_dict; -DROP TABLE IF EXISTS database_for_dict.table_from_ipv4_trie_dict; -DROP DICTIONARY IF EXISTS database_for_dict.dict_ipv4_trie; -DROP TABLE IF EXISTS database_for_dict.table_ipv4_trie; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.table_from_ipv4_trie_dict; +DROP DICTIONARY IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.dict_ipv4_trie; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie; SELECT '***ipv4 trie dict mask***'; -CREATE TABLE database_for_dict.table_ipv4_trie +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie ( prefix String, val UInt32 ) engine = TinyLog; -INSERT INTO database_for_dict.table_ipv4_trie +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie SELECT '255.255.255.255/' || toString(number) AS prefix, toUInt32(number) AS val FROM VALUES ('number UInt32', 5, 13, 24, 30); -CREATE DICTIONARY database_for_dict.dict_ipv4_trie +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict_ipv4_trie ( prefix String, val UInt32 ) PRIMARY KEY prefix -SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ipv4_trie')) +SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db currentDatabase() table 'table_ipv4_trie')) LAYOUT(IP_TRIE()) LIFETIME(MIN 10 MAX 100); -SELECT 0 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('0.0.0.0'))); -SELECT 0 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('128.0.0.0'))); -SELECT 0 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('192.0.0.0'))); -SELECT 0 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('224.0.0.0'))); -SELECT 0 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('240.0.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('248.0.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('252.0.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('254.0.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.0.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.128.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.192.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.224.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.240.0.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.248.0.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.252.0.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.254.0.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.0.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.128.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.192.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.224.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.240.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.248.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.252.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.254.0'))); -SELECT 24 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.0'))); -SELECT 24 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.128'))); -SELECT 24 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.192'))); -SELECT 24 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.224'))); -SELECT 24 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.240'))); -SELECT 24 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.248'))); -SELECT 30 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.252'))); -SELECT 30 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.254'))); -SELECT 30 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.255'))); +SELECT 0 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('0.0.0.0'))); +SELECT 0 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('128.0.0.0'))); +SELECT 0 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('192.0.0.0'))); +SELECT 0 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('224.0.0.0'))); +SELECT 0 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('240.0.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('248.0.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('252.0.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('254.0.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.0.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.128.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.192.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.224.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.240.0.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.248.0.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.252.0.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.254.0.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.0.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.128.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.192.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.224.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.240.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.248.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.252.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.254.0'))); +SELECT 24 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.0'))); +SELECT 24 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.128'))); +SELECT 24 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.192'))); +SELECT 24 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.224'))); +SELECT 24 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.240'))); +SELECT 24 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.248'))); +SELECT 30 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.252'))); +SELECT 30 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.254'))); +SELECT 30 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.255'))); -DROP DICTIONARY IF EXISTS database_for_dict.dict_ipv4_trie; -DROP TABLE IF EXISTS database_for_dict.table_from_ipv4_trie_dict; -DROP TABLE IF EXISTS database_for_dict.table_ipv4_trie; +DROP DICTIONARY IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.dict_ipv4_trie; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.table_from_ipv4_trie_dict; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie; SELECT '***ipv4 trie dict pt2***'; -CREATE TABLE database_for_dict.table_ipv4_trie ( prefix String, val UInt32 ) engine = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie ( prefix String, val UInt32 ) engine = TinyLog; -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.0.0/8', 1); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.0.0/16', 2); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.0.0/24', 3); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.0.1/32', 4); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.127.0/32', 5); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.128.1/32', 6); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.255.0/32', 7); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.255.1/32', 8); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.255.255/32', 9); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.1.0.0/16', 10); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.1.1.0', 11); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.1.255.0/24', 12); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.254.0.0/15', 13); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.254.0.127', 14); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.0.0/16', 15); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.128.0/24', 16); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.128.1/32', 17); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.128.10/32', 18); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.128.128/25', 19); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.255.128/32', 20); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.255.255/32', 21); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.0.0/8', 1); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.0.0/16', 2); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.0.0/24', 3); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.0.1/32', 4); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.127.0/32', 5); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.128.1/32', 6); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.255.0/32', 7); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.255.1/32', 8); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.255.255/32', 9); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.1.0.0/16', 10); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.1.1.0', 11); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.1.255.0/24', 12); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.254.0.0/15', 13); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.254.0.127', 14); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.255.0.0/16', 15); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.255.128.0/24', 16); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.255.128.1/32', 17); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.255.128.10/32', 18); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.255.128.128/25', 19); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.255.255.128/32', 20); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.255.255.255/32', 21); -CREATE DICTIONARY database_for_dict.dict_ipv4_trie ( prefix String, val UInt32 ) +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict_ipv4_trie ( prefix String, val UInt32 ) PRIMARY KEY prefix -SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ipv4_trie')) +SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db currentDatabase() table 'table_ipv4_trie')) LAYOUT(IP_TRIE(ACCESS_TO_KEY_FROM_ATTRIBUTES 1)) LIFETIME(MIN 10 MAX 100); -SELECT '127.0.0.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.0'))); -SELECT '127.0.0.1/32' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.1'))); -SELECT '127.0.0.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.127'))); -SELECT '127.0.0.0/16' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.255.127'))); -SELECT '127.255.0.0/16' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.127.127'))); -SELECT '127.255.128.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.9'))); -SELECT '127.255.128.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.127'))); -SELECT '127.255.128.10/32' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.10'))); -SELECT '127.255.128.128/25' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.255'))); -SELECT '127.255.255.128/32' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.255.128'))); +SELECT '127.0.0.0/24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.0'))); +SELECT '127.0.0.1/32' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.1'))); +SELECT '127.0.0.0/24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.127'))); +SELECT '127.0.0.0/16' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.255.127'))); +SELECT '127.255.0.0/16' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.127.127'))); +SELECT '127.255.128.0/24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.9'))); +SELECT '127.255.128.0/24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.127'))); +SELECT '127.255.128.10/32' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.10'))); +SELECT '127.255.128.128/25' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.255'))); +SELECT '127.255.255.128/32' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.255.128'))); -SELECT 3 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.0'))); -SELECT 4 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.1'))); -SELECT 3 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.127'))); -SELECT 2 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.255.127'))); -SELECT 15 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.127.127'))); -SELECT 16 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.9'))); -SELECT 16 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.127'))); -SELECT 18 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.10'))); -SELECT 19 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.255'))); -SELECT 20 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.255.128'))); +SELECT 3 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.0'))); +SELECT 4 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.1'))); +SELECT 3 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.127'))); +SELECT 2 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.255.127'))); +SELECT 15 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.127.127'))); +SELECT 16 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.9'))); +SELECT 16 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.127'))); +SELECT 18 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.10'))); +SELECT 19 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.255'))); +SELECT 20 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.255.128'))); -SELECT 3 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:0'))); -SELECT 4 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:1'))); -SELECT 3 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:7f'))); -SELECT 2 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:ff7f'))); -SELECT 15 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:7f7f'))); -SELECT 16 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:8009'))); -SELECT 16 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:807f'))); -SELECT 18 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:800a'))); -SELECT 19 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:80ff'))); -SELECT 20 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:ff80'))); +SELECT 3 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:0'))); +SELECT 4 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:1'))); +SELECT 3 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:7f'))); +SELECT 2 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:ff7f'))); +SELECT 15 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:7f7f'))); +SELECT 16 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:8009'))); +SELECT 16 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:807f'))); +SELECT 18 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:800a'))); +SELECT 19 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:80ff'))); +SELECT 20 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:ff80'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.0'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.1'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.127'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.255.127'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.127.127'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.9'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.127'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.10'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.255'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.255.128'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.0'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.1'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.127'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.255.127'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.127.127'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.9'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.127'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.10'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.255'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.255.128'))); -SELECT 0 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('128.127.127.127'))); -SELECT 0 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('128.127.127.0'))); -SELECT 0 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('255.127.127.0'))); -SELECT 0 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('255.0.0.0'))); -SELECT 0 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('0.0.0.0'))); -SELECT 0 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('1.1.1.1'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('128.127.127.127'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('128.127.127.0'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('255.127.127.0'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('255.0.0.0'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('0.0.0.0'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('1.1.1.1'))); SELECT '***ipv6 trie dict***'; -CREATE TABLE database_for_dict.table_ip_trie +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie ( prefix String, val String ) engine = TinyLog; -INSERT INTO database_for_dict.table_ip_trie VALUES ('101.79.55.22', 'JA'), ('127.0.0.1', 'RU'), ('2620:0:870::/48', 'US'), ('2a02:6b8:1::/48', 'UK'), ('2001:db8::/32', 'ZZ'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie VALUES ('101.79.55.22', 'JA'), ('127.0.0.1', 'RU'), ('2620:0:870::/48', 'US'), ('2a02:6b8:1::/48', 'UK'), ('2001:db8::/32', 'ZZ'); -INSERT INTO database_for_dict.table_ip_trie +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie SELECT 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/' || toString((number + 1) * 13 % 129) AS prefix, toString((number + 1) * 13 % 129) AS val FROM system.numbers LIMIT 129; -CREATE DICTIONARY database_for_dict.dict_ip_trie +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict_ip_trie ( prefix String, val String ) PRIMARY KEY prefix -SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ip_trie')) +SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db currentDatabase() table 'table_ip_trie')) LAYOUT(IP_TRIE(ACCESS_TO_KEY_FROM_ATTRIBUTES 1)) LIFETIME(MIN 10 MAX 100); -SELECT 'US' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('2620:0:870::'))); -SELECT 'UK' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('2a02:6b8:1::'))); -SELECT 'ZZ' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('2001:db8::'))); -SELECT 'ZZ' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('2001:db8:ffff::'))); +SELECT 'US' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('2620:0:870::'))); +SELECT 'UK' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('2a02:6b8:1::'))); +SELECT 'ZZ' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('2001:db8::'))); +SELECT 'ZZ' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('2001:db8:ffff::'))); -SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('2001:db8:ffff::'))); -SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('2001:db8:ffff:ffff::'))); -SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('2001:db8:ffff:1::'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv6StringToNum('2001:db8:ffff::'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv6StringToNum('2001:db8:ffff:ffff::'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv6StringToNum('2001:db8:ffff:1::'))); -SELECT '0' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('654f:3716::'))); +SELECT '0' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('654f:3716::'))); -SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:654f:3716'))); -SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); -SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('101.79.55.22'))); -SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv4StringToNum('127.0.0.1'))); -SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('::ffff:127.0.0.1'))); +SELECT 'JA' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:654f:3716'))); +SELECT 'JA' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); +SELECT 'JA' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('101.79.55.22'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv4StringToNum('127.0.0.1'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv6StringToNum('::ffff:127.0.0.1'))); -SELECT '2620:0:870::/48' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2620:0:870::'))); -SELECT '2a02:6b8:1::/48' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2a02:6b8:1::1'))); -SELECT '2001:db8::/32' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2001:db8::1'))); -SELECT '::ffff:101.79.55.22/128' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('::ffff:654f:3716'))); -SELECT '::ffff:101.79.55.22/128' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); +SELECT '2620:0:870::/48' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2620:0:870::'))); +SELECT '2a02:6b8:1::/48' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2a02:6b8:1::1'))); +SELECT '2001:db8::/32' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2001:db8::1'))); +SELECT '::ffff:101.79.55.22/128' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('::ffff:654f:3716'))); +SELECT '::ffff:101.79.55.22/128' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); -SELECT '0' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::0'))); -SELECT '1' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('8000::'))); -SELECT '2' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('c000::'))); -SELECT '3' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('e000::'))); -SELECT '4' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('f000::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('f800::'))); -SELECT '6' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fc00::'))); -SELECT '7' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fe00::'))); -SELECT '8' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff00::'))); -SELECT '9' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff80::'))); -SELECT '10' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffc0::'))); -SELECT '11' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffe0::'))); -SELECT '12' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff0::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff8::'))); -SELECT '14' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffc::'))); -SELECT '15' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffe::'))); -SELECT '16' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff::'))); -SELECT '17' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:8000::'))); -SELECT '18' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); -SELECT '19' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); -SELECT '20' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); -SELECT '21' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); -SELECT '22' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); -SELECT '18' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); -SELECT '19' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); -SELECT '20' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); -SELECT '21' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); -SELECT '22' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); -SELECT '23' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fe00::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff00::'))); -SELECT '25' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff80::'))); -SELECT '26' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffc0::'))); -SELECT '27' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffe0::'))); -SELECT '28' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff0::'))); -SELECT '29' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff8::'))); -SELECT '30' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffc::'))); -SELECT '31' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffe::'))); -SELECT '32' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff::'))); -SELECT '33' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:8000::'))); -SELECT '34' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:c000::'))); -SELECT '35' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:e000::'))); -SELECT '36' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f000::'))); -SELECT '37' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f800::'))); -SELECT '38' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fc00::'))); -SELECT '39' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fe00::'))); -SELECT '40' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff00::'))); -SELECT '41' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff80::'))); -SELECT '42' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffc0::'))); -SELECT '43' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffe0::'))); -SELECT '44' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff0::'))); -SELECT '45' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff8::'))); -SELECT '46' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffc::'))); -SELECT '47' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffe::'))); -SELECT '48' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:8000::'))); -SELECT '50' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:c000::'))); -SELECT '51' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:e000::'))); -SELECT '52' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f000::'))); -SELECT '53' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f800::'))); -SELECT '54' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fc00::'))); -SELECT '55' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fe00::'))); -SELECT '56' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff00::'))); -SELECT '57' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff80::'))); -SELECT '58' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffc0::'))); -SELECT '59' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffe0::'))); -SELECT '60' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff0::'))); -SELECT '61' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff8::'))); -SELECT '62' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffc::'))); -SELECT '63' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffe::'))); -SELECT '64' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff::'))); -SELECT '65' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:8000::'))); -SELECT '66' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:c000::'))); -SELECT '67' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:e000::'))); -SELECT '68' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f000::'))); -SELECT '69' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f800::'))); -SELECT '70' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fc00::'))); -SELECT '71' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fe00::'))); -SELECT '72' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff00::'))); -SELECT '73' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff80::'))); -SELECT '74' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffc0::'))); -SELECT '75' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffe0::'))); -SELECT '76' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff0::'))); -SELECT '77' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff8::'))); -SELECT '78' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffc::'))); -SELECT '79' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffe::'))); -SELECT '80' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff::'))); -SELECT '81' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:8000::'))); -SELECT '82' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:c000::'))); -SELECT '83' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:e000::'))); -SELECT '84' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f000::'))); -SELECT '85' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f800::'))); -SELECT '86' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fc00::'))); -SELECT '87' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fe00::'))); -SELECT '88' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff00::'))); -SELECT '89' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff80::'))); -SELECT '90' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffc0::'))); -SELECT '91' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffe0::'))); -SELECT '92' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff0::'))); -SELECT '93' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff8::'))); -SELECT '94' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffc::'))); -SELECT '95' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffe::'))); -SELECT '96' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff::'))); -SELECT '97' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:8000:0'))); -SELECT '98' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:c000:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:e000:0'))); -SELECT '100' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f000:0'))); -SELECT '101' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f800:0'))); -SELECT '102' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fc00:0'))); -SELECT '103' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fe00:0'))); -SELECT '104' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff00:0'))); -SELECT '105' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff80:0'))); -SELECT '106' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffc0:0'))); -SELECT '107' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffe0:0'))); -SELECT '108' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff0:0'))); -SELECT '109' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff8:0'))); -SELECT '110' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffc:0'))); -SELECT '111' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffe:0'))); -SELECT '112' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:0'))); -SELECT '113' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:8000'))); -SELECT '114' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:c000'))); -SELECT '115' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:e000'))); -SELECT '116' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f000'))); -SELECT '117' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f800'))); -SELECT '118' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fc00'))); -SELECT '119' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fe00'))); -SELECT '120' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff00'))); -SELECT '121' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff80'))); -SELECT '122' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffc0'))); -SELECT '123' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffe0'))); -SELECT '124' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0'))); -SELECT '125' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff8'))); -SELECT '126' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffc'))); -SELECT '127' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe'))); -SELECT '128' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'))); +SELECT '0' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('::0'))); +SELECT '1' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('8000::'))); +SELECT '2' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('c000::'))); +SELECT '3' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('e000::'))); +SELECT '4' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('f000::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('f800::'))); +SELECT '6' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fc00::'))); +SELECT '7' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fe00::'))); +SELECT '8' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff00::'))); +SELECT '9' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff80::'))); +SELECT '10' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffc0::'))); +SELECT '11' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffe0::'))); +SELECT '12' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff0::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff8::'))); +SELECT '14' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffc::'))); +SELECT '15' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffe::'))); +SELECT '16' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff::'))); +SELECT '17' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:8000::'))); +SELECT '18' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); +SELECT '19' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); +SELECT '20' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); +SELECT '21' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); +SELECT '22' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); +SELECT '18' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); +SELECT '19' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); +SELECT '20' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); +SELECT '21' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); +SELECT '22' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); +SELECT '23' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fe00::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff00::'))); +SELECT '25' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff80::'))); +SELECT '26' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffc0::'))); +SELECT '27' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffe0::'))); +SELECT '28' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff0::'))); +SELECT '29' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff8::'))); +SELECT '30' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffc::'))); +SELECT '31' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffe::'))); +SELECT '32' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff::'))); +SELECT '33' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:8000::'))); +SELECT '34' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:c000::'))); +SELECT '35' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:e000::'))); +SELECT '36' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f000::'))); +SELECT '37' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f800::'))); +SELECT '38' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fc00::'))); +SELECT '39' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fe00::'))); +SELECT '40' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff00::'))); +SELECT '41' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff80::'))); +SELECT '42' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffc0::'))); +SELECT '43' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffe0::'))); +SELECT '44' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff0::'))); +SELECT '45' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff8::'))); +SELECT '46' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffc::'))); +SELECT '47' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffe::'))); +SELECT '48' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:8000::'))); +SELECT '50' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:c000::'))); +SELECT '51' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:e000::'))); +SELECT '52' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f000::'))); +SELECT '53' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f800::'))); +SELECT '54' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fc00::'))); +SELECT '55' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fe00::'))); +SELECT '56' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff00::'))); +SELECT '57' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff80::'))); +SELECT '58' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffc0::'))); +SELECT '59' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffe0::'))); +SELECT '60' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff0::'))); +SELECT '61' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff8::'))); +SELECT '62' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffc::'))); +SELECT '63' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffe::'))); +SELECT '64' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff::'))); +SELECT '65' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:8000::'))); +SELECT '66' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:c000::'))); +SELECT '67' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:e000::'))); +SELECT '68' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f000::'))); +SELECT '69' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f800::'))); +SELECT '70' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fc00::'))); +SELECT '71' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fe00::'))); +SELECT '72' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff00::'))); +SELECT '73' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff80::'))); +SELECT '74' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffc0::'))); +SELECT '75' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffe0::'))); +SELECT '76' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff0::'))); +SELECT '77' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff8::'))); +SELECT '78' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffc::'))); +SELECT '79' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffe::'))); +SELECT '80' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff::'))); +SELECT '81' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:8000::'))); +SELECT '82' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:c000::'))); +SELECT '83' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:e000::'))); +SELECT '84' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f000::'))); +SELECT '85' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f800::'))); +SELECT '86' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fc00::'))); +SELECT '87' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fe00::'))); +SELECT '88' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff00::'))); +SELECT '89' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff80::'))); +SELECT '90' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffc0::'))); +SELECT '91' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffe0::'))); +SELECT '92' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff0::'))); +SELECT '93' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff8::'))); +SELECT '94' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffc::'))); +SELECT '95' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffe::'))); +SELECT '96' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff::'))); +SELECT '97' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:8000:0'))); +SELECT '98' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:c000:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:e000:0'))); +SELECT '100' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f000:0'))); +SELECT '101' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f800:0'))); +SELECT '102' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fc00:0'))); +SELECT '103' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fe00:0'))); +SELECT '104' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff00:0'))); +SELECT '105' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff80:0'))); +SELECT '106' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffc0:0'))); +SELECT '107' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffe0:0'))); +SELECT '108' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff0:0'))); +SELECT '109' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff8:0'))); +SELECT '110' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffc:0'))); +SELECT '111' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffe:0'))); +SELECT '112' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:0'))); +SELECT '113' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:8000'))); +SELECT '114' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:c000'))); +SELECT '115' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:e000'))); +SELECT '116' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f000'))); +SELECT '117' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f800'))); +SELECT '118' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fc00'))); +SELECT '119' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fe00'))); +SELECT '120' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff00'))); +SELECT '121' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff80'))); +SELECT '122' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffc0'))); +SELECT '123' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffe0'))); +SELECT '124' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0'))); +SELECT '125' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff8'))); +SELECT '126' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffc'))); +SELECT '127' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe'))); +SELECT '128' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'))); -CREATE TABLE database_for_dict.table_from_ip_trie_dict +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_from_ip_trie_dict ( prefix String, val String -) ENGINE = Dictionary(database_for_dict.dict_ip_trie); +) ENGINE = Dictionary({CLICKHOUSE_DATABASE:Identifier}.dict_ip_trie); -SELECT MIN(val == 'US') FROM database_for_dict.table_from_ip_trie_dict +SELECT MIN(val == 'US') FROM {CLICKHOUSE_DATABASE:Identifier}.table_from_ip_trie_dict WHERE prefix == '2620:0:870::/48'; -SELECT 134 == COUNT(*) FROM database_for_dict.table_from_ip_trie_dict; +SELECT 134 == COUNT(*) FROM {CLICKHOUSE_DATABASE:Identifier}.table_from_ip_trie_dict; -DROP TABLE IF EXISTS database_for_dict.table_from_ip_trie_dict; -DROP DICTIONARY IF EXISTS database_for_dict.dict_ip_trie; -DROP TABLE IF EXISTS database_for_dict.table_ip_trie; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.table_from_ip_trie_dict; +DROP DICTIONARY IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.dict_ip_trie; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie; SELECT '***ipv6 trie dict mask***'; -CREATE TABLE database_for_dict.table_ip_trie +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie ( prefix String, val String ) engine = TinyLog; -INSERT INTO database_for_dict.table_ip_trie +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie SELECT 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/' || toString(number) AS prefix, toString(number) AS val FROM VALUES ('number UInt32', 5, 13, 24, 48, 49, 99, 127); -INSERT INTO database_for_dict.table_ip_trie VALUES ('101.79.55.22', 'JA'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie VALUES ('101.79.55.22', 'JA'); -INSERT INTO database_for_dict.table_ip_trie +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie SELECT '255.255.255.255/' || toString(number) AS prefix, toString(number) AS val FROM VALUES ('number UInt32', 5, 13, 24, 30); -CREATE DICTIONARY database_for_dict.dict_ip_trie +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict_ip_trie ( prefix String, val String ) PRIMARY KEY prefix -SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ip_trie')) +SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db currentDatabase() table 'table_ip_trie')) LAYOUT(IP_TRIE()) LIFETIME(MIN 10 MAX 100); -SELECT 0 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('::ffff:1:1'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv6StringToNum('::ffff:1:1'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('654f:3716::'))); -SELECT 0 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('654f:3716::'))); -SELECT 0 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('654f:3716:ffff::'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('654f:3716::'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv6StringToNum('654f:3716::'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv6StringToNum('654f:3716:ffff::'))); -SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:654f:3716'))); -SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); -SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('101.79.55.22'))); +SELECT 'JA' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:654f:3716'))); +SELECT 'JA' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); +SELECT 'JA' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('101.79.55.22'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::0'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('8000::'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('c000::'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('e000::'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('f000::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('f800::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fc00::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fe00::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff00::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff80::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffc0::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffe0::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff0::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff8::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffc::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffe::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:8000::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fe00::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff00::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff80::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffc0::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffe0::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff0::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff8::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffc::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffe::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:8000::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:c000::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:e000::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f000::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f800::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fc00::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fe00::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff00::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff80::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffc0::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffe0::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff0::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff8::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffc::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffe::'))); -SELECT '48' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:8000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:c000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:e000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f800::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fc00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fe00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff80::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffc0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffe0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff8::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffc::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffe::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:8000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:c000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:e000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f800::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fc00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fe00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff80::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffc0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffe0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff8::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffc::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffe::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:8000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:c000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:e000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f800::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fc00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fe00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff80::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffc0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffe0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff8::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffc::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffe::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:8000:0'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:c000:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:e000:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f000:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f800:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fc00:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fe00:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff00:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff80:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffc0:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffe0:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff0:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff8:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffc:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffe:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:8000'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:c000'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:e000'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f000'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f800'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fc00'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fe00'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff00'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff80'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffc0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffe0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff8'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffc'))); -SELECT '127' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe'))); -SELECT '127' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('::0'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('8000::'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('c000::'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('e000::'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('f000::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('f800::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fc00::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fe00::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff00::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff80::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffc0::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffe0::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff0::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff8::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffc::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffe::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:8000::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fe00::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff00::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff80::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffc0::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffe0::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff0::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff8::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffc::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffe::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:8000::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:c000::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:e000::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f000::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f800::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fc00::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fe00::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff00::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff80::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffc0::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffe0::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff0::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff8::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffc::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffe::'))); +SELECT '48' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:8000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:c000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:e000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f800::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fc00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fe00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff80::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffc0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffe0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff8::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffc::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffe::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:8000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:c000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:e000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f800::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fc00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fe00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff80::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffc0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffe0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff8::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffc::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffe::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:8000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:c000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:e000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f800::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fc00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fe00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff80::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffc0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffe0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff8::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffc::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffe::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:8000:0'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:c000:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:e000:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f000:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f800:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fc00:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fe00:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff00:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff80:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffc0:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffe0:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff0:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff8:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffc:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffe:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:8000'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:c000'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:e000'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f000'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f800'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fc00'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fe00'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff00'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff80'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffc0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffe0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff8'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffc'))); +SELECT '127' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe'))); +SELECT '127' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('0.0.0.0'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('128.0.0.0'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('240.0.0.0'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('248.0.0.0'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('252.0.0.0'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.240.0.0'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.248.0.0'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.252.0.0'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.254.0'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.0'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.128'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.248'))); -SELECT '30' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.252'))); -SELECT '30' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.254'))); -SELECT '30' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.255'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('0.0.0.0'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('128.0.0.0'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('240.0.0.0'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('248.0.0.0'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('252.0.0.0'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.240.0.0'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.248.0.0'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.252.0.0'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.254.0'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.0'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.128'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.248'))); +SELECT '30' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.252'))); +SELECT '30' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.254'))); +SELECT '30' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.255'))); -DROP DATABASE IF EXISTS database_for_dict; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/01021_only_tuple_columns.sql b/tests/queries/0_stateless/01021_only_tuple_columns.sql index 02db21bc0b2..d8d146f59fd 100644 --- a/tests/queries/0_stateless/01021_only_tuple_columns.sql +++ b/tests/queries/0_stateless/01021_only_tuple_columns.sql @@ -1,4 +1,3 @@ --- Tags: no-parallel CREATE TABLE test ( diff --git a/tests/queries/0_stateless/01033_dictionaries_lifetime.sql b/tests/queries/0_stateless/01033_dictionaries_lifetime.sql index 67e1adf5574..e74ac8bde30 100644 --- a/tests/queries/0_stateless/01033_dictionaries_lifetime.sql +++ b/tests/queries/0_stateless/01033_dictionaries_lifetime.sql @@ -1,12 +1,7 @@ --- Tags: no-parallel SET send_logs_level = 'fatal'; -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - -CREATE TABLE database_for_dict.table_for_dict +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_for_dict ( key_column UInt64, second_column UInt8, @@ -15,34 +10,34 @@ CREATE TABLE database_for_dict.table_for_dict ENGINE = MergeTree() ORDER BY key_column; -INSERT INTO database_for_dict.table_for_dict VALUES (1, 100, 'Hello world'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_for_dict VALUES (1, 100, 'Hello world'); -DROP DATABASE IF EXISTS ordinary_db; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; -CREATE DATABASE ordinary_db; +CREATE DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; -CREATE DICTIONARY ordinary_db.dict1 +CREATE DICTIONARY {CLICKHOUSE_DATABASE_1:Identifier}.dict1 ( key_column UInt64 DEFAULT 0, second_column UInt8 DEFAULT 1, third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); SELECT 'INITIALIZING DICTIONARY'; -SELECT dictGetUInt8('ordinary_db.dict1', 'second_column', toUInt64(100500)); +SELECT dictGetUInt8({CLICKHOUSE_DATABASE_1:String}||'.dict1', 'second_column', toUInt64(100500)); -SELECT lifetime_min, lifetime_max FROM system.dictionaries WHERE database='ordinary_db' AND name = 'dict1'; +SELECT lifetime_min, lifetime_max FROM system.dictionaries WHERE database={CLICKHOUSE_DATABASE_1:String} AND name = 'dict1'; -DROP DICTIONARY IF EXISTS ordinary_db.dict1; +DROP DICTIONARY IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}.dict1; -DROP DATABASE IF EXISTS ordinary_db; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; -DROP TABLE IF EXISTS database_for_dict.table_for_dict; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.table_for_dict; -DROP DATABASE IF EXISTS database_for_dict; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh index e0a84323dbd..39c5742e7a7 100755 --- a/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh @@ -7,23 +7,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function query_with_retry -{ - retry=0 - until [ $retry -ge 5 ] - do - result=$($CLICKHOUSE_CLIENT $2 --query="$1" 2>&1) - if [ "$?" == 0 ]; then - echo -n "$result" - return - else - retry=$(($retry + 1)) - sleep 3 - fi - done - echo "Query '$1' failed with '$result'" -} - $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst;" diff --git a/tests/queries/0_stateless/01034_sample_final_distributed.sql b/tests/queries/0_stateless/01034_sample_final_distributed.sql index a81fef645db..64bafd17b2d 100644 --- a/tests/queries/0_stateless/01034_sample_final_distributed.sql +++ b/tests/queries/0_stateless/01034_sample_final_distributed.sql @@ -3,7 +3,7 @@ set allow_experimental_parallel_reading_from_replicas = 0; drop table if exists sample_final; -create table sample_final (CounterID UInt32, EventDate Date, EventTime DateTime, UserID UInt64, Sign Int8) engine = CollapsingMergeTree(Sign) order by (CounterID, EventDate, intHash32(UserID), EventTime) sample by intHash32(UserID); +create table sample_final (CounterID UInt32, EventDate Date, EventTime DateTime, UserID UInt64, Sign Int8) engine = CollapsingMergeTree(Sign) order by (CounterID, EventDate, intHash32(UserID), EventTime) sample by intHash32(UserID) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into sample_final select number / (8192 * 4), toDate('2019-01-01'), toDateTime('2019-01-01 00:00:01') + number, number / (8192 * 2), number % 3 = 1 ? -1 : 1 from numbers(1000000); select 'count'; diff --git a/tests/queries/0_stateless/01035_avg.sql b/tests/queries/0_stateless/01035_avg.sql index d683ada0aec..a3cb35a80ec 100644 --- a/tests/queries/0_stateless/01035_avg.sql +++ b/tests/queries/0_stateless/01035_avg.sql @@ -22,7 +22,7 @@ CREATE TABLE IF NOT EXISTS test_01035_avg ( d64 Decimal64(18) DEFAULT toDecimal64(u64 / 1000000, 8), d128 Decimal128(20) DEFAULT toDecimal128(i128 / 100000, 20), d256 Decimal256(40) DEFAULT toDecimal256(i256 / 100000, 40) -) ENGINE = MergeTree() ORDER BY i64; +) ENGINE = MergeTree() ORDER BY i64 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; SELECT avg(i8), avg(i16), avg(i32), avg(i64), avg(i128), avg(i256), avg(u8), avg(u16), avg(u32), avg(u64), avg(u128), avg(u256), diff --git a/tests/queries/0_stateless/01035_avg_weighted_long.sh b/tests/queries/0_stateless/01035_avg_weighted_long.sh index 138aa03fbb3..8838b07a3d7 100755 --- a/tests/queries/0_stateless/01035_avg_weighted_long.sh +++ b/tests/queries/0_stateless/01035_avg_weighted_long.sh @@ -11,36 +11,36 @@ ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, y) FROM (select toDecimal256 ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, y) FROM (select toDecimal32(1, 0) x, toDecimal256(1, 1) y);" types=("Int8" "Int16" "Int32" "Int64" "UInt8" "UInt16" "UInt32" "UInt64" "Float32" "Float64") - -for left in "${types[@]}" -do - for right in "${types[@]}" - do - ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, w) FROM values('x ${left}, w ${right}', (4, 1), (1, 0), (10, 2))" - ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, w) FROM values('x ${left}, w ${right}', (0, 0), (1, 0))" - done -done - exttypes=("Int128" "Int256" "UInt256") - -for left in "${exttypes[@]}" -do - for right in "${exttypes[@]}" - do - ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(to${left}(1), to${right}(2))" - done -done - # Decimal types dtypes=("32" "64" "128" "256") -for left in "${dtypes[@]}" -do - for right in "${dtypes[@]}" +( + for left in "${types[@]}" do - ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(toDecimal${left}(2, 4), toDecimal${right}(1, 4))" + for right in "${types[@]}" + do + echo "SELECT avgWeighted(x, w) FROM values('x ${left}, w ${right}', (4, 1), (1, 0), (10, 2));" + echo "SELECT avgWeighted(x, w) FROM values('x ${left}, w ${right}', (0, 0), (1, 0));" + done done -done + + for left in "${exttypes[@]}" + do + for right in "${exttypes[@]}" + do + echo "SELECT avgWeighted(to${left}(1), to${right}(2));" + done + done + + for left in "${dtypes[@]}" + do + for right in "${dtypes[@]}" + do + echo "SELECT avgWeighted(toDecimal${left}(2, 4), toDecimal${right}(1, 4));" + done + done +) | clickhouse-client -nm echo "$(${CLICKHOUSE_CLIENT} --server_logs_file=/dev/null --query="SELECT avgWeighted(['string'], toFloat64(0))" 2>&1)" \ | grep -c 'Code: 43. DB::Exception: .* DB::Exception:.* Types .* are non-conforming as arguments for aggregate function avgWeighted' diff --git a/tests/queries/0_stateless/01041_create_dictionary_if_not_exists.sql b/tests/queries/0_stateless/01041_create_dictionary_if_not_exists.sql index ad364237544..ab0b5a243ba 100644 --- a/tests/queries/0_stateless/01041_create_dictionary_if_not_exists.sql +++ b/tests/queries/0_stateless/01041_create_dictionary_if_not_exists.sql @@ -1,10 +1,5 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS dictdb; - -CREATE DATABASE dictdb; - -CREATE TABLE dictdb.table_for_dict +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_for_dict ( key_column UInt64, value Float64 @@ -12,33 +7,32 @@ CREATE TABLE dictdb.table_for_dict ENGINE = MergeTree() ORDER BY key_column; -INSERT INTO dictdb.table_for_dict VALUES (1, 1.1); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_for_dict VALUES (1, 1.1); -CREATE DICTIONARY IF NOT EXISTS dictdb.dict_exists +CREATE DICTIONARY IF NOT EXISTS {CLICKHOUSE_DATABASE:Identifier}.dict_exists ( key_column UInt64, value Float64 DEFAULT 77.77 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'dictdb')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(1) LAYOUT(FLAT()); -SELECT dictGetFloat64('dictdb.dict_exists', 'value', toUInt64(1)); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict_exists', 'value', toUInt64(1)); -CREATE DICTIONARY IF NOT EXISTS dictdb.dict_exists +CREATE DICTIONARY IF NOT EXISTS {CLICKHOUSE_DATABASE:Identifier}.dict_exists ( key_column UInt64, value Float64 DEFAULT 77.77 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'dictdb')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(1) LAYOUT(FLAT()); -SELECT dictGetFloat64('dictdb.dict_exists', 'value', toUInt64(1)); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict_exists', 'value', toUInt64(1)); -DROP DICTIONARY dictdb.dict_exists; -DROP TABLE dictdb.table_for_dict; -DROP DATABASE dictdb; +DROP DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict_exists; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.table_for_dict; diff --git a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh index f2b30e05040..9d34470c38d 100755 --- a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh +++ b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh @@ -7,8 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -o pipefail -# Run the client. -$CLICKHOUSE_CLIENT --multiquery <<'EOF' +# NOTE: dictionaries TTLs works with server timezone, so session_timeout cannot be used +$CLICKHOUSE_CLIENT --session_timezone '' --multiquery <<'EOF' DROP DATABASE IF EXISTS dictdb_01042; CREATE DATABASE dictdb_01042; CREATE TABLE dictdb_01042.table(x Int64, y Int64, insert_time DateTime) ENGINE = MergeTree ORDER BY tuple(); diff --git a/tests/queries/0_stateless/01043_dictionary_attribute_properties_values.sql b/tests/queries/0_stateless/01043_dictionary_attribute_properties_values.sql index 5d629d4e8db..4f078499972 100644 --- a/tests/queries/0_stateless/01043_dictionary_attribute_properties_values.sql +++ b/tests/queries/0_stateless/01043_dictionary_attribute_properties_values.sql @@ -1,13 +1,9 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS dictdb_01043; -CREATE DATABASE dictdb_01043; - -CREATE TABLE dictdb_01043.dicttbl(key Int64, value_default String, value_expression String) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO dictdb_01043.dicttbl VALUES (12, 'hello', '55:66:77'); +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.dicttbl(key Int64, value_default String, value_expression String) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.dicttbl VALUES (12, 'hello', '55:66:77'); -CREATE DICTIONARY dictdb_01043.dict +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict ( key Int64 DEFAULT -1, value_default String DEFAULT 'world', @@ -15,15 +11,13 @@ CREATE DICTIONARY dictdb_01043.dict ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dicttbl' DB 'dictdb_01043')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dicttbl' DB currentDatabase())) LAYOUT(FLAT()) LIFETIME(1); -SELECT dictGetString('dictdb_01043.dict', 'value_default', toUInt64(12)); -SELECT dictGetString('dictdb_01043.dict', 'value_default', toUInt64(14)); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict', 'value_default', toUInt64(12)); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict', 'value_default', toUInt64(14)); -SELECT dictGetString('dictdb_01043.dict', 'value_expression', toUInt64(12)); -SELECT dictGetString('dictdb_01043.dict', 'value_expression', toUInt64(14)); - -DROP DATABASE IF EXISTS dictdb_01043; +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict', 'value_expression', toUInt64(12)); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict', 'value_expression', toUInt64(14)); diff --git a/tests/queries/0_stateless/01045_dictionaries_restrictions.sql b/tests/queries/0_stateless/01045_dictionaries_restrictions.sql index d41be7482f8..b4dbd741767 100644 --- a/tests/queries/0_stateless/01045_dictionaries_restrictions.sql +++ b/tests/queries/0_stateless/01045_dictionaries_restrictions.sql @@ -1,10 +1,5 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS dictdb_01045; - -CREATE DATABASE dictdb_01045; - -CREATE DICTIONARY dictdb_01045.restricted_dict ( +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.restricted_dict ( key UInt64, value String ) @@ -14,10 +9,9 @@ LIFETIME(MIN 0 MAX 1) LAYOUT(CACHE(SIZE_IN_CELLS 10)); -- because of lazy load we can check only in dictGet query -select dictGetString('dictdb_01045.restricted_dict', 'value', toUInt64(1)); -- {serverError 482} +select dictGetString({CLICKHOUSE_DATABASE:String} || '.restricted_dict', 'value', toUInt64(1)); -- {serverError 482} select 'Ok.'; -DROP DICTIONARY IF EXISTS dictdb_01045.restricted_dict; +DROP DICTIONARY IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.restricted_dict; -DROP DATABASE IF EXISTS dictdb_01045; diff --git a/tests/queries/0_stateless/01045_zookeeper_system_mutations_with_parts_names.sh b/tests/queries/0_stateless/01045_zookeeper_system_mutations_with_parts_names.sh index 68c511b80ac..cd6501bbebf 100755 --- a/tests/queries/0_stateless/01045_zookeeper_system_mutations_with_parts_names.sh +++ b/tests/queries/0_stateless/01045_zookeeper_system_mutations_with_parts_names.sh @@ -21,7 +21,7 @@ function wait_mutation_to_start() ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS table_for_mutations" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE table_for_mutations(k UInt32, v1 UInt64) ENGINE MergeTree ORDER BY k PARTITION BY modulo(k, 2)" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE table_for_mutations(k UInt32, v1 UInt64) ENGINE MergeTree ORDER BY k PARTITION BY modulo(k, 2) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" ${CLICKHOUSE_CLIENT} --query="SYSTEM STOP MERGES table_for_mutations" @@ -48,7 +48,7 @@ ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS table_for_mutations" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS replicated_table_for_mutations" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE replicated_table_for_mutations(k UInt32, v1 UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/replicated_table_for_mutations', '1') ORDER BY k PARTITION BY modulo(k, 2)" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE replicated_table_for_mutations(k UInt32, v1 UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/replicated_table_for_mutations', '1') ORDER BY k PARTITION BY modulo(k, 2) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" ${CLICKHOUSE_CLIENT} --query="SYSTEM STOP MERGES replicated_table_for_mutations" diff --git a/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference b/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference index 53df91c5523..bb4fb9ddb47 100644 --- a/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference +++ b/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference @@ -1,32 +1,32 @@ ---TUMBLE--- ||---WINDOW COLUMN NAME--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ||---DATA COLUMN ALIAS--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY b\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY b\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---PARTITION--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `count(a)` AggregateFunction(count, Int32),\n `windowID(____timestamp, toIntervalSecond(\'1\'))` UInt32\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `count(a)` AggregateFunction(count, Int32),\n `windowID(____timestamp, toIntervalSecond(\'1\'))` UInt32\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ||---JOIN--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ---HOP--- ||---WINDOW COLUMN NAME--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 ||---DATA COLUMN ALIAS--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY b\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY b\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---PARTITION--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `count(a)` AggregateFunction(count, Int32),\n `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `count(a)` AggregateFunction(count, Int32),\n `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 ||---JOIN--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql index bf1ac254783..e292447512c 100644 --- a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql +++ b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql @@ -1,110 +1,109 @@ --- Tags: no-parallel SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; -DROP DATABASE IF EXISTS test_01047; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; set allow_deprecated_database_ordinary=1; -CREATE DATABASE test_01047 ENGINE=Ordinary; +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier} ENGINE=Ordinary; -DROP TABLE IF EXISTS test_01047.mt; -DROP TABLE IF EXISTS test_01047.mt_2; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.mt; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.mt_2; -CREATE TABLE test_01047.mt(a Int32, b Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE TABLE test_01047.mt_2(a Int32, b Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.mt(a Int32, b Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.mt_2(a Int32, b Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); SELECT '---TUMBLE---'; SELECT '||---WINDOW COLUMN NAME---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a), tumbleEnd(wid) AS count FROM test_01047.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) as wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a), tumbleEnd(wid) AS count FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) as wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---DATA COLUMN ALIAS---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY id ENGINE Memory AS SELECT count(a) AS count, b as id FROM test_01047.mt GROUP BY id, tumble(timestamp, INTERVAL '1' SECOND); -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY id ENGINE Memory AS SELECT count(a) AS count, b as id FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY id, tumble(timestamp, INTERVAL '1' SECOND); +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---IDENTIFIER---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), b) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), b) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---FUNCTION---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), plus(a, b)) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), plus(a, b)) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---PARTITION---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid ENGINE Memory AS SELECT count(a) AS count, tumble(now(), INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid ENGINE Memory AS SELECT count(a) AS count, tumble(now(), INTERVAL '1' SECOND) AS wid FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---JOIN---'; -DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY tumble({CLICKHOUSE_DATABASE:Identifier}.mt.timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count({CLICKHOUSE_DATABASE:Identifier}.mt.a), count({CLICKHOUSE_DATABASE:Identifier}.mt_2.b), wid FROM {CLICKHOUSE_DATABASE:Identifier}.mt JOIN {CLICKHOUSE_DATABASE:Identifier}.mt_2 ON {CLICKHOUSE_DATABASE:Identifier}.mt.timestamp = {CLICKHOUSE_DATABASE:Identifier}.mt_2.timestamp GROUP BY tumble({CLICKHOUSE_DATABASE:Identifier}.mt.timestamp, INTERVAL '1' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; -DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count({CLICKHOUSE_DATABASE:Identifier}.mt.a), count({CLICKHOUSE_DATABASE:Identifier}.mt_2.b), wid FROM {CLICKHOUSE_DATABASE:Identifier}.mt JOIN {CLICKHOUSE_DATABASE:Identifier}.mt_2 ON {CLICKHOUSE_DATABASE:Identifier}.mt.timestamp = {CLICKHOUSE_DATABASE:Identifier}.mt_2.timestamp GROUP BY tumble({CLICKHOUSE_DATABASE:Identifier}.mt.timestamp, INTERVAL '1' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '---HOP---'; SELECT '||---WINDOW COLUMN NAME---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01047.mt GROUP BY wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---DATA COLUMN ALIAS---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY id ENGINE Memory AS SELECT count(a) AS count, b as id FROM test_01047.mt GROUP BY id, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND); -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY id ENGINE Memory AS SELECT count(a) AS count, b as id FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY id, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND); +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---IDENTIFIER---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), b) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), b) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---FUNCTION---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), plus(a, b)) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), plus(a, b)) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---PARTITION---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---JOIN---'; -DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY hop({CLICKHOUSE_DATABASE:Identifier}.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count({CLICKHOUSE_DATABASE:Identifier}.mt.a), count({CLICKHOUSE_DATABASE:Identifier}.mt_2.b), wid FROM {CLICKHOUSE_DATABASE:Identifier}.mt JOIN {CLICKHOUSE_DATABASE:Identifier}.mt_2 ON {CLICKHOUSE_DATABASE:Identifier}.mt.timestamp = {CLICKHOUSE_DATABASE:Identifier}.mt_2.timestamp GROUP BY hop({CLICKHOUSE_DATABASE:Identifier}.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; -DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count({CLICKHOUSE_DATABASE:Identifier}.mt.a), count({CLICKHOUSE_DATABASE:Identifier}.mt_2.b), wid FROM {CLICKHOUSE_DATABASE:Identifier}.mt JOIN {CLICKHOUSE_DATABASE:Identifier}.mt_2 ON {CLICKHOUSE_DATABASE:Identifier}.mt.timestamp = {CLICKHOUSE_DATABASE:Identifier}.mt_2.timestamp GROUP BY hop({CLICKHOUSE_DATABASE:Identifier}.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; -DROP TABLE test_01047.wv; -DROP TABLE test_01047.mt; -DROP TABLE test_01047.mt_2; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.mt; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.mt_2; diff --git a/tests/queries/0_stateless/01049_zookeeper_synchronous_mutations_long.sql b/tests/queries/0_stateless/01049_zookeeper_synchronous_mutations_long.sql index c77ab50ab8b..2458fe14981 100644 --- a/tests/queries/0_stateless/01049_zookeeper_synchronous_mutations_long.sql +++ b/tests/queries/0_stateless/01049_zookeeper_synchronous_mutations_long.sql @@ -5,9 +5,9 @@ DROP TABLE IF EXISTS table_for_synchronous_mutations2; SELECT 'Replicated'; -CREATE TABLE table_for_synchronous_mutations1(k UInt32, v1 UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_01049/table_for_synchronous_mutations', '1') ORDER BY k; +CREATE TABLE table_for_synchronous_mutations1(k UInt32, v1 UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_01049/table_for_synchronous_mutations', '1') ORDER BY k SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -CREATE TABLE table_for_synchronous_mutations2(k UInt32, v1 UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_01049/table_for_synchronous_mutations', '2') ORDER BY k; +CREATE TABLE table_for_synchronous_mutations2(k UInt32, v1 UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_01049/table_for_synchronous_mutations', '2') ORDER BY k SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO table_for_synchronous_mutations1 select number, number from numbers(100000); @@ -29,7 +29,7 @@ SELECT 'Normal'; DROP TABLE IF EXISTS table_for_synchronous_mutations_no_replication; -CREATE TABLE table_for_synchronous_mutations_no_replication(k UInt32, v1 UInt64) ENGINE MergeTree ORDER BY k; +CREATE TABLE table_for_synchronous_mutations_no_replication(k UInt32, v1 UInt64) ENGINE MergeTree ORDER BY k SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO table_for_synchronous_mutations_no_replication select number, number from numbers(100000); diff --git a/tests/queries/0_stateless/01050_clickhouse_dict_source_with_subquery.sql b/tests/queries/0_stateless/01050_clickhouse_dict_source_with_subquery.sql index ec440baf8d6..a790f384f7a 100644 --- a/tests/queries/0_stateless/01050_clickhouse_dict_source_with_subquery.sql +++ b/tests/queries/0_stateless/01050_clickhouse_dict_source_with_subquery.sql @@ -1,18 +1,17 @@ --- Tags: no-parallel -drop dictionary if exists default.test_dict_01051_d; -drop table if exists default.test_01051_d; -drop table if exists default.test_view_01051_d; +drop dictionary if exists {CLICKHOUSE_DATABASE:Identifier}.test_dict_01051_d; +drop table if exists {CLICKHOUSE_DATABASE:Identifier}.test_01051_d; +drop table if exists {CLICKHOUSE_DATABASE:Identifier}.test_view_01051_d; -create table default.test_01051_d (key UInt64, value String) engine = MergeTree order by key; -create view default.test_view_01051_d (key UInt64, value String) as select k2 + 1 as key, v2 || '_x' as value from (select key + 2 as k2, value || '_y' as v2 from default.test_01051_d); +create table {CLICKHOUSE_DATABASE:Identifier}.test_01051_d (key UInt64, value String) engine = MergeTree order by key; +create view {CLICKHOUSE_DATABASE:Identifier}.test_view_01051_d (key UInt64, value String) as select k2 + 1 as key, v2 || '_x' as value from (select key + 2 as k2, value || '_y' as v2 from test_01051_d); -insert into default.test_01051_d values (1, 'a'); +insert into {CLICKHOUSE_DATABASE:Identifier}.test_01051_d values (1, 'a'); -create dictionary default.test_dict_01051_d (key UInt64, value String) primary key key source(clickhouse(host 'localhost' port '9000' user 'default' password '' db 'default' table 'test_view_01051_d')) layout(flat()) lifetime(100500); +create dictionary {CLICKHOUSE_DATABASE:Identifier}.test_dict_01051_d (key UInt64, value String) primary key key source(clickhouse(host 'localhost' port '9000' user 'default' password '' db currentDatabase() table 'test_view_01051_d')) layout(flat()) lifetime(100500); -select dictGet('default.test_dict_01051_d', 'value', toUInt64(4)); +select dictGet({CLICKHOUSE_DATABASE:String} || '.test_dict_01051_d', 'value', toUInt64(4)); -drop dictionary if exists default.test_dict_01051_d; -drop table if exists default.test_01051_d; -drop table if exists default.test_view_01051_d; +drop dictionary if exists {CLICKHOUSE_DATABASE:Identifier}.test_dict_01051_d; +drop table if exists {CLICKHOUSE_DATABASE:Identifier}.test_01051_d; +drop table if exists {CLICKHOUSE_DATABASE:Identifier}.test_view_01051_d; diff --git a/tests/queries/0_stateless/01051_system_stack_trace.reference b/tests/queries/0_stateless/01051_system_stack_trace.reference index 5142593dba6..6ef82c703e9 100644 --- a/tests/queries/0_stateless/01051_system_stack_trace.reference +++ b/tests/queries/0_stateless/01051_system_stack_trace.reference @@ -1,5 +1,5 @@ -- { echo } -SELECT count() > 0 FROM system.stack_trace WHERE query_id != ''; +SELECT count() > 0 FROM system.stack_trace WHERE query_id != '' AND thread_name = 'TCPHandler'; 1 -- opimization for not reading /proc/self/task/{}/comm and avoid sending signal SELECT countIf(thread_id > 0) > 0 FROM system.stack_trace; @@ -8,7 +8,7 @@ SELECT countIf(thread_id > 0) > 0 FROM system.stack_trace; SELECT count(trace) > 0 FROM system.stack_trace WHERE length(trace) > 0 LIMIT 1; 1 -- optimization for query_id -SELECT length(query_id) > 0 FROM system.stack_trace WHERE query_id != '' LIMIT 1; +SELECT length(query_id) > 0 FROM system.stack_trace WHERE query_id != '' AND thread_name = 'TCPHandler' LIMIT 1; 1 -- optimization for thread_name SELECT length(thread_name) > 0 FROM system.stack_trace WHERE thread_name != '' LIMIT 1; diff --git a/tests/queries/0_stateless/01051_system_stack_trace.sql b/tests/queries/0_stateless/01051_system_stack_trace.sql index 7eb2a05dc87..b9b08f94221 100644 --- a/tests/queries/0_stateless/01051_system_stack_trace.sql +++ b/tests/queries/0_stateless/01051_system_stack_trace.sql @@ -1,13 +1,19 @@ -SET storage_system_stack_trace_pipe_read_timeout_ms = 1000; +-- Tags: no-parallel +-- Tag no-parallel: to decrease failure probability of collecting stack traces + +-- NOTE: It is OK to have bigger timeout here since: +-- a) this test is marked as no-parallel +-- b) there is a filter by thread_name, so it will send signals only to the threads with the name TCPHandler +SET storage_system_stack_trace_pipe_read_timeout_ms = 5000; -- { echo } -SELECT count() > 0 FROM system.stack_trace WHERE query_id != ''; +SELECT count() > 0 FROM system.stack_trace WHERE query_id != '' AND thread_name = 'TCPHandler'; -- opimization for not reading /proc/self/task/{}/comm and avoid sending signal SELECT countIf(thread_id > 0) > 0 FROM system.stack_trace; -- optimization for trace SELECT count(trace) > 0 FROM system.stack_trace WHERE length(trace) > 0 LIMIT 1; -- optimization for query_id -SELECT length(query_id) > 0 FROM system.stack_trace WHERE query_id != '' LIMIT 1; +SELECT length(query_id) > 0 FROM system.stack_trace WHERE query_id != '' AND thread_name = 'TCPHandler' LIMIT 1; -- optimization for thread_name SELECT length(thread_name) > 0 FROM system.stack_trace WHERE thread_name != '' LIMIT 1; -- enough rows (optimizations works "correctly") diff --git a/tests/queries/0_stateless/01053_drop_database_mat_view.sql b/tests/queries/0_stateless/01053_drop_database_mat_view.sql index 7651ac4885c..2642430eb05 100644 --- a/tests/queries/0_stateless/01053_drop_database_mat_view.sql +++ b/tests/queries/0_stateless/01053_drop_database_mat_view.sql @@ -1,14 +1,13 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS some_tests; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; set allow_deprecated_database_ordinary=1; -CREATE DATABASE some_tests ENGINE=Ordinary; -- Different inner table name with Atomic +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier} ENGINE=Ordinary; -- Different inner table name with Atomic set allow_deprecated_syntax_for_merge_tree=1; -create table some_tests.my_table ENGINE = MergeTree(day, (day), 8192) as select today() as day, 'mystring' as str; -show tables from some_tests; -create materialized view some_tests.my_materialized_view ENGINE = MergeTree(day, (day), 8192) as select * from some_tests.my_table; -show tables from some_tests; -select * from some_tests.my_materialized_view; +create table {CLICKHOUSE_DATABASE:Identifier}.my_table ENGINE = MergeTree(day, (day), 8192) as select today() as day, 'mystring' as str; +show tables from {CLICKHOUSE_DATABASE:Identifier}; +create materialized view {CLICKHOUSE_DATABASE:Identifier}.my_materialized_view ENGINE = MergeTree(day, (day), 8192) as select * from {CLICKHOUSE_DATABASE:Identifier}.my_table; +show tables from {CLICKHOUSE_DATABASE:Identifier}; +select * from {CLICKHOUSE_DATABASE:Identifier}.my_materialized_view; -DROP DATABASE some_tests; +DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh index fe757acfd06..8e28995980f 100755 --- a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh +++ b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh @@ -16,14 +16,14 @@ DROP TABLE IF EXISTS wv; CREATE TABLE dst(count UInt64) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY hop(now('US/Samoa'), INTERVAL '5' SECOND, INTERVAL '5' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY hop(now('US/Samoa'), INTERVAL '10' SECOND, INTERVAL '10' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1); EOF for _ in {1..100}; do $CLICKHOUSE_CLIENT "${opts[@]}" --query="SELECT count(*) FROM dst" | grep -q "1" && echo 'OK' && break - sleep .5 + sleep .2 done $CLICKHOUSE_CLIENT "${opts[@]}" --query="SELECT count FROM dst" diff --git a/tests/queries/0_stateless/01056_create_table_as.sql b/tests/queries/0_stateless/01056_create_table_as.sql index 6df660dba61..aa2dffb6e2d 100644 --- a/tests/queries/0_stateless/01056_create_table_as.sql +++ b/tests/queries/0_stateless/01056_create_table_as.sql @@ -1,4 +1,3 @@ --- Tags: no-parallel DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; @@ -23,9 +22,9 @@ DROP TABLE v; -- dictionary DROP DICTIONARY IF EXISTS dict; -DROP DATABASE if exists test_01056_dict_data; -CREATE DATABASE test_01056_dict_data; -CREATE TABLE test_01056_dict_data.dict_data (key Int, value UInt16) Engine=Memory(); +DROP DATABASE if exists {CLICKHOUSE_DATABASE_1:Identifier}; +CREATE DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; +CREATE TABLE {CLICKHOUSE_DATABASE_1:Identifier}.dict_data (key Int, value UInt16) Engine=Memory(); CREATE DICTIONARY dict ( `key` UInt64, @@ -34,7 +33,7 @@ CREATE DICTIONARY dict PRIMARY KEY key SOURCE(CLICKHOUSE( HOST '127.0.0.1' PORT tcpPort() - TABLE 'dict_data' DB 'test_01056_dict_data' USER 'default' PASSWORD '')) + TABLE 'dict_data' DB concat(currentDatabase(), '_1') USER 'default' PASSWORD '')) LIFETIME(MIN 0 MAX 0) LAYOUT(SPARSE_HASHED()); CREATE TABLE t3 AS dict; -- { serverError 80 } @@ -42,9 +41,9 @@ CREATE TABLE t3 AS dict; -- { serverError 80 } DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t3; DROP DICTIONARY dict; -DROP TABLE test_01056_dict_data.dict_data; +DROP TABLE {CLICKHOUSE_DATABASE_1:Identifier}.dict_data; -DROP DATABASE test_01056_dict_data; +DROP DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; CREATE TABLE t1 (x String) ENGINE = Memory AS SELECT 1; SELECT x, toTypeName(x) FROM t1; diff --git a/tests/queries/0_stateless/01060_shutdown_table_after_detach.sql b/tests/queries/0_stateless/01060_shutdown_table_after_detach.sql index bfe928d7003..7a853f32d0f 100644 --- a/tests/queries/0_stateless/01060_shutdown_table_after_detach.sql +++ b/tests/queries/0_stateless/01060_shutdown_table_after_detach.sql @@ -1,7 +1,7 @@ -- Tags: no-parallel DROP TABLE IF EXISTS test; -CREATE TABLE test Engine = MergeTree ORDER BY number AS SELECT number, toString(rand()) x from numbers(10000000); +CREATE TABLE test Engine = MergeTree ORDER BY number SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi' AS SELECT number, toString(rand()) x from numbers(10000000); SELECT count() FROM test; diff --git a/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql b/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql index 247e412484f..7ac70d41871 100644 --- a/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql +++ b/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql @@ -2,6 +2,9 @@ set mutations_sync = 2; +-- system.parts has server default, timezone cannot be randomized +set session_timezone = ''; + drop table if exists ttl; create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d) diff --git a/tests/queries/0_stateless/01071_prohibition_secondary_index_with_old_format_merge_tree.sql b/tests/queries/0_stateless/01071_prohibition_secondary_index_with_old_format_merge_tree.sql index 992973c97e8..f92b6779587 100644 --- a/tests/queries/0_stateless/01071_prohibition_secondary_index_with_old_format_merge_tree.sql +++ b/tests/queries/0_stateless/01071_prohibition_secondary_index_with_old_format_merge_tree.sql @@ -1,4 +1,3 @@ --- Tags: no-parallel set allow_deprecated_syntax_for_merge_tree=1; CREATE TABLE old_syntax_01071_test (date Date, id UInt8) ENGINE = MergeTree(date, id, 8192); diff --git a/tests/queries/0_stateless/01073_attach_if_not_exists.sql b/tests/queries/0_stateless/01073_attach_if_not_exists.sql index 8bd356b466c..a99d5fb5041 100644 --- a/tests/queries/0_stateless/01073_attach_if_not_exists.sql +++ b/tests/queries/0_stateless/01073_attach_if_not_exists.sql @@ -1,4 +1,3 @@ --- Tags: no-parallel CREATE TABLE aine (a Int) ENGINE = Log; ATTACH TABLE aine; -- { serverError 57 } diff --git a/tests/queries/0_stateless/01073_show_tables_not_like.sql b/tests/queries/0_stateless/01073_show_tables_not_like.sql index 405a6cb199b..9ff2afe7f27 100644 --- a/tests/queries/0_stateless/01073_show_tables_not_like.sql +++ b/tests/queries/0_stateless/01073_show_tables_not_like.sql @@ -1,11 +1,6 @@ --- Tags: no-parallel SHOW TABLES NOT LIKE '%'; -DROP DATABASE IF EXISTS test_01073; -CREATE DATABASE test_01073; -USE test_01073; - SHOW TABLES; SELECT '---'; CREATE TABLE test1 (x UInt8) ENGINE = Memory; @@ -22,14 +17,14 @@ SELECT '--'; SHOW TABLES NOT LIKE 'tes%2'; SELECT '---'; -SHOW TABLES FROM test_01073; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier}; SELECT '--'; -SHOW TABLES FROM test_01073 LIKE 'tes%'; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier} LIKE 'tes%'; SELECT '--'; -SHOW TABLES FROM test_01073 NOT LIKE 'tes%'; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier} NOT LIKE 'tes%'; SELECT '--'; -SHOW TABLES FROM test_01073 LIKE 'tes%1'; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier} LIKE 'tes%1'; SELECT '--'; -SHOW TABLES FROM test_01073 NOT LIKE 'tes%2'; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier} NOT LIKE 'tes%2'; -DROP DATABASE test_01073; +DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/01077_mutations_index_consistency.sh b/tests/queries/0_stateless/01077_mutations_index_consistency.sh index c41eab62ecb..ffbe3692b64 100755 --- a/tests/queries/0_stateless/01077_mutations_index_consistency.sh +++ b/tests/queries/0_stateless/01077_mutations_index_consistency.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS movement" -$CLICKHOUSE_CLIENT -n --query "CREATE TABLE movement (date DateTime('Asia/Istanbul')) Engine = MergeTree ORDER BY (toStartOfHour(date));" +$CLICKHOUSE_CLIENT -n --query "CREATE TABLE movement (date DateTime('Asia/Istanbul')) Engine = MergeTree ORDER BY (toStartOfHour(date)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';" $CLICKHOUSE_CLIENT --query "insert into movement select toDateTime('2020-01-22 00:00:00', 'Asia/Istanbul') + number%(23*3600) from numbers(1000000);" diff --git a/tests/queries/0_stateless/01079_order_by_pk.sql b/tests/queries/0_stateless/01079_order_by_pk.sql index 78e304b3118..0b442bf78c9 100644 --- a/tests/queries/0_stateless/01079_order_by_pk.sql +++ b/tests/queries/0_stateless/01079_order_by_pk.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS mt_pk; -CREATE TABLE mt_pk ENGINE = MergeTree PARTITION BY d ORDER BY x +CREATE TABLE mt_pk ENGINE = MergeTree PARTITION BY d ORDER BY x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi' AS SELECT toDate(number % 32) AS d, number AS x FROM system.numbers LIMIT 10000010; SELECT x FROM mt_pk ORDER BY x ASC LIMIT 10000000, 1; diff --git a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh index 26c2bf133ac..bfdea95fa9e 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh @@ -15,7 +15,7 @@ done for i in $(seq $REPLICAS); do - $CLICKHOUSE_CLIENT --query "CREATE TABLE concurrent_alter_add_drop_$i (key UInt64, value0 UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_alter_add_drop_column', '$i') ORDER BY key SETTINGS max_replicated_mutations_in_queue=1000, number_of_free_entries_in_pool_to_execute_mutation=0,max_replicated_merges_in_queue=1000" + $CLICKHOUSE_CLIENT --query "CREATE TABLE concurrent_alter_add_drop_$i (key UInt64, value0 UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_alter_add_drop_column', '$i') ORDER BY key SETTINGS max_replicated_mutations_in_queue = 1000, number_of_free_entries_in_pool_to_execute_mutation = 0, max_replicated_merges_in_queue = 1000, index_granularity = 8192, index_granularity_bytes = '10Mi'" done $CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_add_drop_1 SELECT number, number + 10 from numbers(100000)" diff --git a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh index e508b77a0c2..8133f866c58 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-parallel, no-fasttest, no-upgrade-check +# Tags: zookeeper, no-parallel, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01080_check_for_error_incorrect_size_of_nested_column.sql b/tests/queries/0_stateless/01080_check_for_error_incorrect_size_of_nested_column.sql index 1eb6e849851..d7b05bb7d78 100644 --- a/tests/queries/0_stateless/01080_check_for_error_incorrect_size_of_nested_column.sql +++ b/tests/queries/0_stateless/01080_check_for_error_incorrect_size_of_nested_column.sql @@ -1,40 +1,33 @@ --- Tags: no-parallel --- TODO: can't just remove default prefix, it breaks the test! +drop table if exists {CLICKHOUSE_DATABASE:Identifier}.test_table_01080; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test_table_01080 (dim_key Int64, dim_id String) ENGINE = MergeTree Order by (dim_key); +insert into {CLICKHOUSE_DATABASE:Identifier}.test_table_01080 values(1,'test1'); -drop database if exists db_01080; -create database db_01080; +drop DICTIONARY if exists {CLICKHOUSE_DATABASE:Identifier}.test_dict_01080; -drop table if exists db_01080.test_table_01080; -CREATE TABLE db_01080.test_table_01080 (dim_key Int64, dim_id String) ENGINE = MergeTree Order by (dim_key); -insert into db_01080.test_table_01080 values(1,'test1'); - -drop DICTIONARY if exists db_01080.test_dict_01080; - -CREATE DICTIONARY db_01080.test_dict_01080 ( dim_key Int64, dim_id String ) +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.test_dict_01080 ( dim_key Int64, dim_id String ) PRIMARY KEY dim_key -source(clickhouse(host 'localhost' port tcpPort() user 'default' password '' db 'db_01080' table 'test_table_01080')) +source(clickhouse(host 'localhost' port tcpPort() user 'default' password '' db currentDatabase() table 'test_table_01080')) LIFETIME(MIN 0 MAX 0) LAYOUT(complex_key_hashed()); -SELECT dictGetString('db_01080.test_dict_01080', 'dim_id', tuple(toInt64(1))); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', tuple(toInt64(1))); -SELECT dictGetString('db_01080.test_dict_01080', 'dim_id', tuple(toInt64(0))); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', tuple(toInt64(0))); -select dictGetString('db_01080.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(0)) as x); +select dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(0)) as x); -select dictGetString('db_01080.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(1)) as x); +select dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(1)) as x); -select dictGetString('db_01080.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(number)) as x from numbers(5)); +select dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(number)) as x from numbers(5)); -select dictGetString('db_01080.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(rand64()*0)) as x); +select dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(rand64()*0)) as x); -select dictGetString('db_01080.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(blockSize()=0)) as x); +select dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(blockSize()=0)) as x); -select dictGetString('db_01080.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(materialize(0))) as x); +select dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(materialize(0))) as x); -select dictGetString('db_01080.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(materialize(1))) as x); +select dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(materialize(1))) as x); -drop DICTIONARY db_01080.test_dict_01080; -drop table db_01080.test_table_01080; -drop database db_01080; +drop DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.test_dict_01080; +drop table {CLICKHOUSE_DATABASE:Identifier}.test_table_01080; diff --git a/tests/queries/0_stateless/01084_regexp_empty.sql b/tests/queries/0_stateless/01084_regexp_empty.sql index 3ccd4af80ab..5dd060ab58c 100644 --- a/tests/queries/0_stateless/01084_regexp_empty.sql +++ b/tests/queries/0_stateless/01084_regexp_empty.sql @@ -1,10 +1,9 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS test_01084; -CREATE DATABASE test_01084; -USE test_01084; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; +CREATE DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; +USE {CLICKHOUSE_DATABASE_1:Identifier}; CREATE TABLE t (x UInt8) ENGINE = Memory; SELECT * FROM merge('', ''); -DROP DATABASE test_01084; +DROP DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; diff --git a/tests/queries/0_stateless/01085_window_view_attach.sql b/tests/queries/0_stateless/01085_window_view_attach.sql index 051557a6a76..51a88a04f95 100644 --- a/tests/queries/0_stateless/01085_window_view_attach.sql +++ b/tests/queries/0_stateless/01085_window_view_attach.sql @@ -1,30 +1,29 @@ --- Tags: no-parallel SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; -DROP DATABASE IF EXISTS test_01085; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; set allow_deprecated_database_ordinary=1; -CREATE DATABASE test_01085 ENGINE=Ordinary; +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier} ENGINE=Ordinary; -DROP TABLE IF EXISTS test_01085.mt; -DROP TABLE IF EXISTS test_01085.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.mt; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; -CREATE TABLE test_01085.mt(a Int32, market Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW test_01085.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, market, tumbleEnd(wid) AS w_end FROM test_01085.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid, market; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.mt(a Int32, market Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, market, tumbleEnd(wid) AS w_end FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid, market; -SHOW tables FROM test_01085; +SHOW tables FROM {CLICKHOUSE_DATABASE:Identifier}; -DROP TABLE test_01085.wv SYNC; -SHOW tables FROM test_01085; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.wv SYNC; +SHOW tables FROM {CLICKHOUSE_DATABASE:Identifier}; -CREATE WINDOW VIEW test_01085.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, market, tumbleEnd(wid) AS w_end FROM test_01085.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid, market; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, market, tumbleEnd(wid) AS w_end FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid, market; -DETACH TABLE test_01085.wv; -SHOW tables FROM test_01085; +DETACH TABLE {CLICKHOUSE_DATABASE:Identifier}.wv; +SHOW tables FROM {CLICKHOUSE_DATABASE:Identifier}; -ATTACH TABLE test_01085.wv; -SHOW tables FROM test_01085; +ATTACH TABLE {CLICKHOUSE_DATABASE:Identifier}.wv; +SHOW tables FROM {CLICKHOUSE_DATABASE:Identifier}; -DROP TABLE test_01085.wv SYNC; -SHOW tables FROM test_01085; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.wv SYNC; +SHOW tables FROM {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/01092_base64.reference b/tests/queries/0_stateless/01092_base64.reference deleted file mode 100644 index 4c41ac31946..00000000000 --- a/tests/queries/0_stateless/01092_base64.reference +++ /dev/null @@ -1 +0,0 @@ -TEcgT3B0aW11cw== diff --git a/tests/queries/0_stateless/01092_base64.sql b/tests/queries/0_stateless/01092_base64.sql deleted file mode 100644 index f50cf49d270..00000000000 --- a/tests/queries/0_stateless/01092_base64.sql +++ /dev/null @@ -1,4 +0,0 @@ --- Tags: no-fasttest - --- This query reproduces a bug in TurboBase64 library. -select distinct base64Encode(materialize('LG Optimus')) from numbers(100); diff --git a/tests/queries/0_stateless/01098_temporary_and_external_tables.sh b/tests/queries/0_stateless/01098_temporary_and_external_tables.sh index 860529a26e5..9ed78fd9f81 100755 --- a/tests/queries/0_stateless/01098_temporary_and_external_tables.sh +++ b/tests/queries/0_stateless/01098_temporary_and_external_tables.sh @@ -25,7 +25,7 @@ echo "SELECT COUNT() FROM $internal_table_name" | ${CLICKHOUSE_CURL} -m 60 -sSgk echo -ne '0\n1\n' | ${CLICKHOUSE_CURL} -m 30 -sSkF 'file=@-' "$url&file_format=CSV&file_types=UInt64&query=SELECT+sum((number+GLOBAL+IN+(SELECT+number+AS+n+FROM+remote('127.0.0.2',+numbers(5))+WHERE+n+GLOBAL+IN+(SELECT+*+FROM+tmp_table)+AND+n+GLOBAL+NOT+IN+(SELECT+*+FROM+file)+))+AS+res),+sum(number*res)+FROM+remote('127.0.0.2',+numbers(10))" -echo -ne '0\n1\n' | ${CLICKHOUSE_CURL} -m 30 -sSkF 'file=@-' "$url&file_format=CSV&file_types=UInt64&query=SELECT+_1%2BsleepEachRow(3)+FROM+file" & +echo -ne '0\n1\n' | ${CLICKHOUSE_CURL} -m 30 -sSkF 'file=@-' "$url&function_sleep_max_microseconds_per_block=0&file_format=CSV&file_types=UInt64&query=SELECT+_1%2BsleepEachRow(3)+FROM+file" & wait ${CLICKHOUSE_CURL} -m 30 -sSk "$url" --data "DROP TEMPORARY TABLE tmp_table" diff --git a/tests/queries/0_stateless/01099_operators_date_and_timestamp.reference b/tests/queries/0_stateless/01099_operators_date_and_timestamp.reference index 0d8a65c3869..24f609fcf30 100644 --- a/tests/queries/0_stateless/01099_operators_date_and_timestamp.reference +++ b/tests/queries/0_stateless/01099_operators_date_and_timestamp.reference @@ -11,4 +11,11 @@ 3 Int32 2001-09-29 00:00:00 2001-09-28 00:00:00 +2001-09-29 03:25:45 DateTime +2001-09-28 20:34:15 DateTime +2001-09-29 03:25:45.000 DateTime64(3) +2001-09-28 20:34:15.000 DateTime64(3) 140400 Int32 +-23 +(1,1) +(1,-1) diff --git a/tests/queries/0_stateless/01099_operators_date_and_timestamp.sql b/tests/queries/0_stateless/01099_operators_date_and_timestamp.sql index 8c3068cd36b..feffd08562a 100644 --- a/tests/queries/0_stateless/01099_operators_date_and_timestamp.sql +++ b/tests/queries/0_stateless/01099_operators_date_and_timestamp.sql @@ -13,17 +13,35 @@ select (date '2001-10-01' - date '2001-09-28') x, toTypeName(x); select timestamp '2001-09-28 01:00:00' + interval 23 hour; select timestamp '2001-09-28 23:00:00' - interval 23 hour; --- TODO: return interval -select (timestamp '2001-12-29 03:00:00' - timestamp '2001-12-27 12:00:00') x, toTypeName(x); -- interval '1 day 15:00:00' +SET session_timezone = 'Europe/Amsterdam'; --- select -interval 23 hour; -- interval '-23:00:00' --- select interval 1 day + interval 1 hour; -- interval '1 day 01:00:00' --- select interval '1 day' - interval '1 hour'; -- interval '1 day -01:00:00' +select (date '2001-09-29' + interval 12345 second) x, toTypeName(x); +select (date '2001-09-29' + interval 12345 millisecond) x, toTypeName(x); -- { serverError 43 } +select (date '2001-09-29' + interval 12345 microsecond) x, toTypeName(x); -- { serverError 43 } +select (date '2001-09-29' + interval 12345 nanosecond) x, toTypeName(x); -- { serverError 43 } +select (date '2001-09-29' - interval 12345 second) x, toTypeName(x); +select (date '2001-09-29' - interval 12345 millisecond) x, toTypeName(x); -- { serverError 43 } +select (date '2001-09-29' - interval 12345 microsecond) x, toTypeName(x); -- { serverError 43 } +select (date '2001-09-29' - interval 12345 nanosecond) x, toTypeName(x); -- { serverError 43 } +select (toDate32('2001-09-29') + interval 12345 second) x, toTypeName(x); +select (toDate32('2001-09-29') + interval 12345 millisecond) x, toTypeName(x); -- { serverError 43 } +select (toDate32('2001-09-29') + interval 12345 microsecond) x, toTypeName(x); -- { serverError 43 } +select (toDate32('2001-09-29') + interval 12345 nanosecond) x, toTypeName(x); -- { serverError 43 } +select (toDate32('2001-09-29') - interval 12345 second) x, toTypeName(x); +select (toDate32('2001-09-29') - interval 12345 millisecond) x, toTypeName(x); -- { serverError 43 } +select (toDate32('2001-09-29') - interval 12345 microsecond) x, toTypeName(x); -- { serverError 43 } +select (toDate32('2001-09-29') - interval 12345 nanosecond) x, toTypeName(x); -- { serverError 43 } --- select date '2001-09-28' + time '03:00'; -- timestamp '2001-09-28 03:00:00' --- select time '01:00' + interval '3 hours'; -- time '04:00:00' --- select time '05:00' - time '03:00'; -- interval '02:00:00' --- select time '05:00' - interval '2 hours'; -- time '03:00:00' +select (timestamp '2001-12-29 03:00:00' - timestamp '2001-12-27 12:00:00') x, toTypeName(x); + +select -interval 23 hour; +select interval 1 day + interval 1 hour; +select interval '1 day' - interval '1 hour'; + +-- select date '2001-09-28' + time '03:00'; +-- select time '01:00' + interval '3 hours'; +-- select time '05:00' - time '03:00'; +-- select time '05:00' - interval '2 hours'; -- select 900 * interval '1 second'; -- interval '00:15:00' -- select (21 * interval '1 day') x, toTypeName(x); -- interval '21 days' diff --git a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh index e4dad56bc29..bcaa70abbb5 100755 --- a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh +++ b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS test_01107" $CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01107 ENGINE=Atomic" $CLICKHOUSE_CLIENT -q "CREATE TABLE test_01107.mt (n UInt64) ENGINE=MergeTree() ORDER BY tuple()" -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(3) FROM numbers(5)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(3) FROM numbers(5)" & sleep 1 $CLICKHOUSE_CLIENT -q "DETACH TABLE test_01107.mt" --database_atomic_wait_for_drop_and_detach_synchronously=0 @@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT -q "DETACH DATABASE test_01107" --database_atomic_wait_for_dr $CLICKHOUSE_CLIENT -q "ATTACH DATABASE test_01107" $CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01107.mt" -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(1) FROM numbers(5)" && echo "end" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(1) FROM numbers(5)" && echo "end" & sleep 1 $CLICKHOUSE_CLIENT -q "DROP DATABASE test_01107" --database_atomic_wait_for_drop_and_detach_synchronously=0 && sleep 1 && echo "dropped" wait diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index cc63af3676b..f61a60a0bda 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-upgrade-check +# Tags: race, zookeeper CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -56,7 +56,7 @@ function create_table() if [ -z "$database" ]; then continue; fi $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=0 -q \ "create table $database.rmt_${RANDOM}_${RANDOM}_${RANDOM} (n int) engine=ReplicatedMergeTree order by tuple() -- suppress $CLICKHOUSE_TEST_ZOOKEEPER_PREFIX" \ - 2>&1| grep -Fa "Exception: " | grep -Fv "Macro 'uuid' and empty arguments" | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE + 2>&1| grep -Fa "Exception: " | grep -Fv "Macro 'uuid' and empty arguments" | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE | grep -Fv TABLE_IS_DROPPED sleep 0.$RANDOM done } diff --git a/tests/queries/0_stateless/01114_database_atomic.reference b/tests/queries/0_stateless/01114_database_atomic.reference index 10a39087c57..93e89e3a2ec 100644 --- a/tests/queries/0_stateless/01114_database_atomic.reference +++ b/tests/queries/0_stateless/01114_database_atomic.reference @@ -1,3 +1,4 @@ +2 CREATE DATABASE test_01114_1\nENGINE = Atomic CREATE DATABASE test_01114_2\nENGINE = Atomic CREATE DATABASE test_01114_3\nENGINE = Ordinary diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index 4a3d35e48b7..3e1f9eb1f43 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -13,6 +13,8 @@ DROP DATABASE IF EXISTS test_01114_2; DROP DATABASE IF EXISTS test_01114_3; " +$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=0 -q "CREATE DATABASE test_01114_1 ENGINE=Ordinary" 2>&1| grep -Fac "UNKNOWN_DATABASE_ENGINE" + $CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01114_1 ENGINE=Atomic" $CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01114_2" $CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -q "CREATE DATABASE test_01114_3 ENGINE=Ordinary" @@ -49,8 +51,8 @@ $CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW $CLICKHOUSE_CLIENT -q "SELECT name, uuid, create_table_query FROM system.tables WHERE database='test_01114_2'" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g" -$CLICKHOUSE_CLIENT -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM test_01114_1.mt)" & # 33s (1.5s * 22 rows per partition), result: 110, 5995 -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01114_2.mt SELECT number + sleepEachRow(1.5) FROM numbers(30)" & # 45s (1.5s * 30 rows) +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM test_01114_1.mt)" & # 33s (1.5s * 22 rows per partition), result: 110, 5995 +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO test_01114_2.mt SELECT number + sleepEachRow(1.5) FROM numbers(30)" & # 45s (1.5s * 30 rows) sleep 1 # SELECT and INSERT should start before the following RENAMEs $CLICKHOUSE_CLIENT -nm -q " @@ -74,7 +76,7 @@ INSERT INTO test_01114_1.mt SELECT 's' || toString(number) FROM numbers(5); SELECT count() FROM test_01114_1.mt " # result: 5 -$CLICKHOUSE_CLIENT -q "SELECT tuple(s, sleepEachRow(3)) FROM test_01114_1.mt" > /dev/null & # 15s (3s * 5 rows) +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "SELECT tuple(s, sleepEachRow(3)) FROM test_01114_1.mt" > /dev/null & # 15s (3s * 5 rows) sleep 1 $CLICKHOUSE_CLIENT -q "DROP DATABASE test_01114_1" --database_atomic_wait_for_drop_and_detach_synchronously=0 && echo "dropped" diff --git a/tests/queries/0_stateless/01121_remote_scalar_subquery.reference b/tests/queries/0_stateless/01121_remote_scalar_subquery.reference index 6ed281c757a..98fb6a68656 100644 --- a/tests/queries/0_stateless/01121_remote_scalar_subquery.reference +++ b/tests/queries/0_stateless/01121_remote_scalar_subquery.reference @@ -1,2 +1,4 @@ 1 1 +1 +1 diff --git a/tests/queries/0_stateless/01121_remote_scalar_subquery.sql b/tests/queries/0_stateless/01121_remote_scalar_subquery.sql index eada5ed4b59..2d0c842c5b4 100644 --- a/tests/queries/0_stateless/01121_remote_scalar_subquery.sql +++ b/tests/queries/0_stateless/01121_remote_scalar_subquery.sql @@ -1 +1,2 @@ SELECT (SELECT 1) FROM remote('127.0.0.{1,2}', system.one); +SELECT (SELECT 1) FROM remote('127.0.0.{1,2}'); diff --git a/tests/queries/0_stateless/01137_order_by_func.sql b/tests/queries/0_stateless/01137_order_by_func.sql index 682b2d391ce..536f2d1c61d 100644 --- a/tests/queries/0_stateless/01137_order_by_func.sql +++ b/tests/queries/0_stateless/01137_order_by_func.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS pk_func; -CREATE TABLE pk_func(d DateTime, ui UInt32) ENGINE = MergeTree ORDER BY toDate(d); +CREATE TABLE pk_func(d DateTime, ui UInt32) ENGINE = MergeTree ORDER BY toDate(d) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO pk_func SELECT '2020-05-05 01:00:00', number FROM numbers(1000000); INSERT INTO pk_func SELECT '2020-05-06 01:00:00', number FROM numbers(1000000); @@ -10,7 +10,7 @@ SELECT * FROM pk_func ORDER BY toDate(d), ui LIMIT 5; DROP TABLE pk_func; DROP TABLE IF EXISTS nORX; -CREATE TABLE nORX (`A` Int64, `B` Int64, `V` Int64) ENGINE = MergeTree ORDER BY (A, negate(B)); +CREATE TABLE nORX (`A` Int64, `B` Int64, `V` Int64) ENGINE = MergeTree ORDER BY (A, negate(B)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO nORX SELECT 111, number, number FROM numbers(10000000); SELECT * diff --git a/tests/queries/0_stateless/01161_all_system_tables.sh b/tests/queries/0_stateless/01161_all_system_tables.sh index 6a72027478e..3ba59f9a424 100755 --- a/tests/queries/0_stateless/01161_all_system_tables.sh +++ b/tests/queries/0_stateless/01161_all_system_tables.sh @@ -18,8 +18,8 @@ function run_selects() { thread_num=$1 readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} -q "SELECT database || '.' || name FROM system.tables - WHERE database in ('system', 'information_schema', 'INFORMATION_SCHEMA') and name!='zookeeper' and name!='merge_tree_metadata_cache' and name!='models' - AND sipHash64(name || toString($RAND)) % $THREADS = $thread_num") + WHERE database in ('system', 'information_schema', 'INFORMATION_SCHEMA') and name != 'zookeeper' and name != 'models' + AND sipHash64(name || toString($RAND)) % $THREADS = $thread_num AND name NOT LIKE '%\\_sender' AND name NOT LIKE '%\\_watcher'") for t in "${tables_arr[@]}" do diff --git a/tests/queries/0_stateless/01161_information_schema.reference b/tests/queries/0_stateless/01161_information_schema.reference index 5331e30b899..32ad3f16abc 100644 --- a/tests/queries/0_stateless/01161_information_schema.reference +++ b/tests/queries/0_stateless/01161_information_schema.reference @@ -1,3 +1,7 @@ +COLUMNS +SCHEMATA +TABLES +VIEWS columns schemata tables @@ -6,6 +10,10 @@ COLUMNS SCHEMATA TABLES VIEWS +columns +schemata +tables +views INFORMATION_SCHEMA INFORMATION_SCHEMA default \N \N \N \N information_schema information_schema default \N \N \N \N default default mv VIEW @@ -25,3 +33,5 @@ default default v default v f 2 0 Float64 \N \N \N \N \N \N \N \N \N \N \N \N \ tmp tmp d 1 0 Date \N \N \N \N \N 0 \N \N \N \N \N \N \N \N \N Date tmp tmp dt 2 0 DateTime \N \N \N \N \N 0 \N \N \N \N \N \N \N \N \N DateTime tmp tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N \N \N \N \N \N \N \N DateTime64(3) +1 +1 diff --git a/tests/queries/0_stateless/01161_information_schema.sql b/tests/queries/0_stateless/01161_information_schema.sql index ed77ef1c1c2..68a3b011ced 100644 --- a/tests/queries/0_stateless/01161_information_schema.sql +++ b/tests/queries/0_stateless/01161_information_schema.sql @@ -1,20 +1,31 @@ -show tables from information_schema; +SHOW TABLES FROM information_schema; SHOW TABLES FROM INFORMATION_SCHEMA; -create table t (n UInt64, f Float32, s String, fs FixedString(42), d Decimal(9, 6)) engine=Memory; -create view v (n Nullable(Int32), f Float64) as select n, f from t; -create materialized view mv engine=Null as select * from system.one; -create temporary table tmp (d Date, dt DateTime, dtms DateTime64(3)); +DROP TABLE IF EXISTS t; +DROP VIEW IF EXISTS v; +DROP VIEW IF EXISTS mv; +DROP TABLE IF EXISTS tmp; + +CREATE TABLE t (n UInt64, f Float32, s String, fs FixedString(42), d Decimal(9, 6)) ENGINE=Memory; +CREATE VIEW v (n Nullable(Int32), f Float64) AS SELECT n, f FROM t; +CREATE MATERIALIZED VIEW mv ENGINE=Null AS SELECT * FROM system.one; +CREATE TEMPORARY TABLE tmp (d Date, dt DateTime, dtms DateTime64(3)); -- FIXME #28687 -select * from information_schema.schemata where schema_name ilike 'information_schema'; +SELECT * FROM information_schema.schemata WHERE schema_name ilike 'information_schema'; -- SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (TABLE_SCHEMA=currentDatabase() OR TABLE_SCHEMA='') AND TABLE_NAME NOT LIKE '%inner%'; SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (table_schema=currentDatabase() OR table_schema='') AND table_name NOT LIKE '%inner%'; -select * from information_schema.views where table_schema=currentDatabase(); +SELECT * FROM information_schema.views WHERE table_schema=currentDatabase(); -- SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (TABLE_SCHEMA=currentDatabase() OR TABLE_SCHEMA='') AND TABLE_NAME NOT LIKE '%inner%'; SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (table_schema=currentDatabase() OR table_schema='') AND table_name NOT LIKE '%inner%'; -drop table t; -drop view v; +-- mixed upper/lowercase schema and table name: +SELECT count() FROM information_schema.TABLES WHERE table_schema=currentDatabase() AND table_name = 't'; +SELECT count() FROM INFORMATION_SCHEMA.tables WHERE table_schema=currentDatabase() AND table_name = 't'; +SELECT count() FROM INFORMATION_schema.tables WHERE table_schema=currentDatabase() AND table_name = 't'; -- { serverError UNKNOWN_DATABASE } +SELECT count() FROM information_schema.taBLES WHERE table_schema=currentDatabase() AND table_name = 't'; -- { serverError UNKNOWN_TABLE } + drop view mv; +drop view v; +drop table t; diff --git a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh index e645cb5aae7..07b39723c37 100755 --- a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh +++ b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "drop table if exists mt" -$CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by n settings parts_to_throw_insert=1000" +$CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by n settings parts_to_throw_insert=5000" $CLICKHOUSE_CLIENT -q "insert into mt values (1)" $CLICKHOUSE_CLIENT -q "insert into mt values (2)" $CLICKHOUSE_CLIENT -q "insert into mt values (3)" diff --git a/tests/queries/0_stateless/01165_lost_part_empty_partition.reference b/tests/queries/0_stateless/01165_lost_part_empty_partition.reference index 6ed281c757a..1af4bf8965c 100644 --- a/tests/queries/0_stateless/01165_lost_part_empty_partition.reference +++ b/tests/queries/0_stateless/01165_lost_part_empty_partition.reference @@ -1,2 +1,4 @@ 1 1 +1 +2 diff --git a/tests/queries/0_stateless/01165_lost_part_empty_partition.sql b/tests/queries/0_stateless/01165_lost_part_empty_partition.sql index 924798b0050..a1db1c27bee 100644 --- a/tests/queries/0_stateless/01165_lost_part_empty_partition.sql +++ b/tests/queries/0_stateless/01165_lost_part_empty_partition.sql @@ -12,6 +12,8 @@ drop table rmt1; system sync replica rmt2; select lost_part_count from system.replicas where database = currentDatabase() and table = 'rmt2'; drop table rmt2; +SYSTEM FLUSH LOGS; +select count() from system.text_log where logger_name like '%' || currentDatabase() || '%' and message ilike '%table with non-zero lost_part_count equal to%'; create table rmt1 (d DateTime, n int) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '1') order by n partition by tuple(); @@ -24,6 +26,8 @@ drop table rmt1; system sync replica rmt2; select lost_part_count from system.replicas where database = currentDatabase() and table = 'rmt2'; drop table rmt2; +SYSTEM FLUSH LOGS; +select count() from system.text_log where logger_name like '%' || currentDatabase() || '%' and message ilike '%table with non-zero lost_part_count equal to%'; create table rmt1 (n UInt8, m Int32, d Date, t DateTime) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '1') order by n partition by (n, m, d, t); diff --git a/tests/queries/0_stateless/01168_mutations_isolation.reference b/tests/queries/0_stateless/01168_mutations_isolation.reference index f9ebd1c5f83..00859ce99b9 100644 --- a/tests/queries/0_stateless/01168_mutations_isolation.reference +++ b/tests/queries/0_stateless/01168_mutations_isolation.reference @@ -21,18 +21,20 @@ tx7 7 20 all_1_1_0_13 tx7 7 40 all_14_14_0 tx7 7 60 all_7_7_0_13 tx7 7 80 all_12_12_0_13 -tx7 8 20 all_1_14_2_13 -tx7 8 40 all_1_14_2_13 -tx7 8 60 all_1_14_2_13 -tx7 8 80 all_1_14_2_13 +tx7 8 20 all_1_14_1_13 +tx7 8 40 all_1_14_1_13 +tx7 8 60 all_1_14_1_13 +tx7 8 80 all_1_14_1_13 Serialization error INVALID_TRANSACTION -tx11 9 21 all_1_14_2_17 -tx11 9 41 all_1_14_2_17 -tx11 9 61 all_1_14_2_17 -tx11 9 81 all_1_14_2_17 +tx11 9 21 all_1_14_1_17 +tx11 9 41 all_1_14_1_17 +tx11 9 61 all_1_14_1_17 +tx11 9 81 all_1_14_1_17 1 1 RUNNING -tx14 10 22 all_1_14_2_18 -tx14 10 42 all_1_14_2_18 -tx14 10 62 all_1_14_2_18 -tx14 10 82 all_1_14_2_18 +tx14 10 22 all_1_14_1_18 +tx14 10 42 all_1_14_1_18 +tx14 10 62 all_1_14_1_18 +tx14 10 82 all_1_14_1_18 +11 2 all_2_2_0 +11 10 all_1_1_0_3 diff --git a/tests/queries/0_stateless/01168_mutations_isolation.sh b/tests/queries/0_stateless/01168_mutations_isolation.sh index 5d014e030f1..c1d70189673 100755 --- a/tests/queries/0_stateless/01168_mutations_isolation.sh +++ b/tests/queries/0_stateless/01168_mutations_isolation.sh @@ -53,9 +53,6 @@ tx 6 "alter table mt update n=n*10 wh tx 6 "insert into mt values (40)" tx 6 "commit" -function accept_both_parts() { - sed 's/all_1_14_1_1/all_1_14_2_1/g' -} tx 7 "begin transaction" tx 7 "select 7, n, _part from mt order by n" @@ -64,7 +61,7 @@ tx_async 8 "alter table mt update n = 0 whe $CLICKHOUSE_CLIENT -q "kill mutation where database=currentDatabase() and mutation_id='mutation_15.txt' format Null" 2>&1| grep -Fv "probably it finished" tx_sync 8 "rollback" tx 7 "optimize table mt final" -tx 7 "select 8, n, _part from mt order by n" | accept_both_parts +tx 7 "select 8, n, _part from mt order by n" tx 10 "begin transaction" tx 10 "alter table mt update n = 0 where 1" | grep -Eo "Serialization error" | uniq tx 7 "alter table mt update n=n+1 where 1" @@ -74,7 +71,7 @@ tx 7 "commit" tx_async 11 "begin transaction" -tx_async 11 "select 9, n, _part from mt order by n" | accept_both_parts +tx_async 11 "select 9, n, _part from mt order by n" tx_async 12 "begin transaction" tx_async 11 "alter table mt update n=n+1 where 1" >/dev/null tx_async 12 "alter table mt update n=n+1 where 1" >/dev/null @@ -91,6 +88,19 @@ $CLICKHOUSE_CLIENT -q "kill transaction where tid=$tid_to_kill format Null" tx_sync 13 "rollback" tx 14 "begin transaction" -tx 14 "select 10, n, _part from mt order by n" | accept_both_parts +tx 14 "select 10, n, _part from mt order by n" $CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=0 -q "drop table mt" + +$CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by tuple()" +$CLICKHOUSE_CLIENT --implicit_transaction=1 -q "insert into mt values (1)" + +tx 15 "begin transaction" +tx 16 "begin transaction" +tx 16 "insert into mt values (2)" +tx 15 "alter table mt update n = 10*n where 1" +tx 15 "commit" +tx 16 "commit" +$CLICKHOUSE_CLIENT --implicit_transaction=1 -q "select 11, n, _part from mt order by n" + +$CLICKHOUSE_CLIENT -q "drop table mt" diff --git a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh index f2348c29146..d4884cbf457 100755 --- a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh +++ b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh @@ -242,10 +242,10 @@ wait ||: wait_for_queries_to_finish 40 -$CLICKHOUSE_CLIENT -q "SELECT type, count(n) = countDistinct(n) FROM merge(currentDatabase(), '') GROUP BY type ORDER BY type" -$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arraySort(groupArrayIf(n, type=1)) = arraySort(groupArrayIf(n, type=2)) FROM merge(currentDatabase(), '') GROUP BY _table ORDER BY _table" -$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM merge(currentDatabase(), '') WHERE type=4" -$CLICKHOUSE_CLIENT -q "SELECT type, count(n) == max(n), sum(n) == max(n)*(max(n)+1)/2 FROM merge(currentDatabase(), '') WHERE type IN (1, 2) GROUP BY type ORDER BY type" +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT type, count(n) = countDistinct(n) FROM merge(currentDatabase(), '') GROUP BY type ORDER BY type" +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT DISTINCT arraySort(groupArrayIf(n, type=1)) = arraySort(groupArrayIf(n, type=2)) FROM merge(currentDatabase(), '') GROUP BY _table ORDER BY _table" +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT count(n), sum(n) FROM merge(currentDatabase(), '') WHERE type=4" +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT type, count(n) == max(n), sum(n) == max(n)*(max(n)+1)/2 FROM merge(currentDatabase(), '') WHERE type IN (1, 2) GROUP BY type ORDER BY type" $CLICKHOUSE_CLIENT --query "DROP TABLE src"; $CLICKHOUSE_CLIENT --query "DROP TABLE dst"; diff --git a/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.reference b/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.reference new file mode 100644 index 00000000000..12b941eab50 --- /dev/null +++ b/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.reference @@ -0,0 +1,8 @@ +1 1 +2 1 +3 1 +4 1 +1 +10 100 +1 1 1 +2 1 1 diff --git a/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.sh b/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.sh new file mode 100755 index 00000000000..0d2016952d4 --- /dev/null +++ b/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash +# Tags: long, no-replicated-database, no-ordinary-database + +# shellcheck disable=SC2015 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS src"; +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS dst"; +$CLICKHOUSE_CLIENT --query "CREATE TABLE src (n UInt64, type UInt8) ENGINE=MergeTree ORDER BY type SETTINGS old_parts_lifetime=0"; +$CLICKHOUSE_CLIENT --query "CREATE TABLE dst (n UInt64, type UInt8) ENGINE=MergeTree ORDER BY type SETTINGS old_parts_lifetime=0"; + +function thread_insert() +{ + set -e + val=1 + while true; do + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO src VALUES /* ($val, 1) */ ($val, 1); + INSERT INTO src VALUES /* ($val, 2) */ ($val, 2); + COMMIT;" + val=$((val+1)) + sleep 0.$RANDOM; + done +} + + +# NOTE +# ALTER PARTITION query stops merges, +# but serialization error is still possible if some merge was assigned (and committed) between BEGIN and ALTER. +function thread_partition_src_to_dst() +{ + set -e + count=0 + sum=0 + for i in {1..20}; do + out=$( + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO src VALUES /* ($i, 3) */ ($i, 3); + INSERT INTO dst SELECT * FROM src; + ALTER TABLE src DROP PARTITION ID 'all'; + SET throw_on_unsupported_query_inside_transaction=0; + SELECT throwIf((SELECT (count(), sum(n)) FROM merge(currentDatabase(), '') WHERE type=3) != ($count + 1, $sum + $i)) FORMAT Null; + COMMIT;" 2>&1) ||: + + echo "$out" | grep -Fv "SERIALIZATION_ERROR" | grep -F "Received from " && $CLICKHOUSE_CLIENT --multiquery --query " + begin transaction; + set transaction snapshot 3; + select $i, 'src', type, n, _part from src order by type, n; + select $i, 'dst', type, n, _part from dst order by type, n; + rollback" ||: + echo "$out" | grep -Fa "SERIALIZATION_ERROR" >/dev/null || count=$((count+1)) + echo "$out" | grep -Fa "SERIALIZATION_ERROR" >/dev/null || sum=$((sum+i)) + done +} + +function thread_partition_dst_to_src() +{ + set -e + for i in {1..20}; do + action="ROLLBACK" + if (( i % 2 )); then + action="COMMIT" + fi + $CLICKHOUSE_CLIENT --multiquery --query " + SYSTEM STOP MERGES dst; + ALTER TABLE dst DROP PARTITION ID 'nonexistent'; -- STOP MERGES doesn't wait for started merges to finish, so we use this trick + SYSTEM SYNC TRANSACTION LOG; + BEGIN TRANSACTION; + INSERT INTO dst VALUES /* ($i, 4) */ ($i, 4); + INSERT INTO src SELECT * FROM dst; + ALTER TABLE dst DROP PARTITION ID 'all'; + SET throw_on_unsupported_query_inside_transaction=0; + SYSTEM START MERGES dst; + SELECT throwIf((SELECT (count(), sum(n)) FROM merge(currentDatabase(), '') WHERE type=4) != (toUInt8($i/2 + 1), (select sum(number) from numbers(1, $i) where number % 2 or number=$i))) FORMAT Null; + $action;" + done +} + +function thread_select() +{ + set -e + while true; do + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + -- no duplicates + SELECT type, throwIf(count(n) != countDistinct(n)) FROM src GROUP BY type FORMAT Null; + SELECT type, throwIf(count(n) != countDistinct(n)) FROM dst GROUP BY type FORMAT Null; + -- rows inserted by thread_insert moved together + SET throw_on_unsupported_query_inside_transaction=0; + SELECT _table, throwIf(arraySort(groupArrayIf(n, type=1)) != arraySort(groupArrayIf(n, type=2))) FROM merge(currentDatabase(), '') GROUP BY _table FORMAT Null; + -- all rows are inserted in insert_thread + SELECT type, throwIf(count(n) != max(n)), throwIf(sum(n) != max(n)*(max(n)+1)/2) FROM merge(currentDatabase(), '') WHERE type IN (1, 2) GROUP BY type ORDER BY type FORMAT Null; + COMMIT;" + done +} + +thread_insert & PID_1=$! +thread_select & PID_2=$! + +thread_partition_src_to_dst & PID_3=$! +thread_partition_dst_to_src & PID_4=$! +wait $PID_3 && wait $PID_4 + +kill -TERM $PID_1 +kill -TERM $PID_2 +wait +wait_for_queries_to_finish + +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT type, count(n) = countDistinct(n) FROM merge(currentDatabase(), '') GROUP BY type ORDER BY type" +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT DISTINCT arraySort(groupArrayIf(n, type=1)) = arraySort(groupArrayIf(n, type=2)) FROM merge(currentDatabase(), '') GROUP BY _table ORDER BY _table" +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT count(n), sum(n) FROM merge(currentDatabase(), '') WHERE type=4" +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT type, count(n) == max(n), sum(n) == max(n)*(max(n)+1)/2 FROM merge(currentDatabase(), '') WHERE type IN (1, 2) GROUP BY type ORDER BY type" + + +$CLICKHOUSE_CLIENT --query "DROP TABLE src"; +$CLICKHOUSE_CLIENT --query "DROP TABLE dst"; diff --git a/tests/queries/0_stateless/01172_transaction_counters.reference b/tests/queries/0_stateless/01172_transaction_counters.reference index d088d747ee8..24083d7d40b 100644 --- a/tests/queries/0_stateless/01172_transaction_counters.reference +++ b/tests/queries/0_stateless/01172_transaction_counters.reference @@ -29,13 +29,9 @@ 4 1 Commit 1 1 1 0 5 1 Begin 1 1 1 1 5 1 AddPart 1 1 1 1 all_5_5_0 -5 1 AddPart 1 1 1 1 all_1_1_1 5 1 LockPart 1 1 1 1 all_1_1_0 -5 1 AddPart 1 1 1 1 all_3_3_1 5 1 LockPart 1 1 1 1 all_3_3_0 -5 1 AddPart 1 1 1 1 all_4_4_1 5 1 LockPart 1 1 1 1 all_4_4_0 -5 1 AddPart 1 1 1 1 all_5_5_1 5 1 LockPart 1 1 1 1 all_5_5_0 5 1 UnlockPart 1 1 1 1 all_1_1_0 5 1 UnlockPart 1 1 1 1 all_3_3_0 diff --git a/tests/queries/0_stateless/01184_long_insert_values_huge_strings.sh b/tests/queries/0_stateless/01184_long_insert_values_huge_strings.sh index 09a43d13a42..5e115e6b3af 100755 --- a/tests/queries/0_stateless/01184_long_insert_values_huge_strings.sh +++ b/tests/queries/0_stateless/01184_long_insert_values_huge_strings.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -q "drop table if exists huge_strings" -$CLICKHOUSE_CLIENT -q "create table huge_strings (n UInt64, l UInt64, s String, h UInt64) engine=MergeTree order by n" +$CLICKHOUSE_CLIENT -q "create table huge_strings (n UInt64, l UInt64, s String, h UInt64) engine=MergeTree order by n SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" # Timeouts are increased, because test can be slow with sanitizers and parallel runs. diff --git a/tests/queries/0_stateless/01191_rename_dictionary.sql b/tests/queries/0_stateless/01191_rename_dictionary.sql index 8074e84f0ed..e9fed1dd6b2 100644 --- a/tests/queries/0_stateless/01191_rename_dictionary.sql +++ b/tests/queries/0_stateless/01191_rename_dictionary.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-upgrade-check +-- Tags: no-parallel DROP DATABASE IF EXISTS test_01191; CREATE DATABASE test_01191 ENGINE=Atomic; diff --git a/tests/queries/0_stateless/01192_rename_database_zookeeper.sh b/tests/queries/0_stateless/01192_rename_database_zookeeper.sh index dec1276111a..d1a7144e886 100755 --- a/tests/queries/0_stateless/01192_rename_database_zookeeper.sh +++ b/tests/queries/0_stateless/01192_rename_database_zookeeper.sh @@ -20,11 +20,11 @@ $CLICKHOUSE_CLIENT -q "SELECT engine, splitByChar('/', data_path)[-2], uuid, spl # 3. check RENAME don't wait for INSERT $CLICKHOUSE_CLIENT -q "CREATE TABLE test_01192.mt (n UInt64) ENGINE=MergeTree ORDER BY n" -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01192.mt SELECT number + sleepEachRow(1.5) FROM numbers(10)" && echo "inserted" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 15000000 -q "INSERT INTO test_01192.mt SELECT number + sleepEachRow(1.5) FROM numbers(10)" && echo "inserted" & sleep 1 $CLICKHOUSE_CLIENT -q "RENAME DATABASE test_01192 TO default" 2>&1| grep -F "already exists" > /dev/null && echo "ok" -$CLICKHOUSE_CLIENT -q "RENAME DATABASE test_01192_notexisting TO test_01192_renamed" 2>&1| grep -F "doesn't exist" > /dev/null && echo "ok" +$CLICKHOUSE_CLIENT -q "RENAME DATABASE test_01192_notexisting TO test_01192_renamed" 2>&1| grep -F "does not exist" > /dev/null && echo "ok" $CLICKHOUSE_CLIENT -q "RENAME DATABASE test_01192 TO test_01192_renamed" && echo "renamed" wait @@ -50,7 +50,7 @@ $CLICKHOUSE_CLIENT -q "RENAME TABLE test_01192.mt TO test_01192_atomic.mt, test_ # 6. check data after RENAME $CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01192_atomic.mt" $CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01192_atomic.rmt" -$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01192_atomic.mv" 2>&1| grep -F "doesn't exist" > /dev/null && echo "ok" +$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01192_atomic.mv" 2>&1| grep -F "does not exist" > /dev/null && echo "ok" # 7. create dictionary and check it $CLICKHOUSE_CLIENT -q "CREATE TABLE test_01192.mt (n UInt64, _part String) ENGINE=Memory" # mock @@ -60,7 +60,7 @@ $CLICKHOUSE_CLIENT -q "SELECT database, name, status, origin FROM system.diction $CLICKHOUSE_CLIENT -q "SELECT dictGet('test_01192_atomic.dict', '_part', toUInt64(1))" # 8. check RENAME don't wait for INSERT -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01192_atomic.mt SELECT number + sleepEachRow(1) + 10 FROM numbers(10)" && echo "inserted" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 -q "INSERT INTO test_01192_atomic.mt SELECT number + sleepEachRow(1) + 10 FROM numbers(10)" && echo "inserted" & sleep 1 $CLICKHOUSE_CLIENT --check_table_dependencies=0 -q "RENAME DATABASE test_01192 TO test_01192_renamed" 2>&1| grep -F "not supported" > /dev/null && echo "ok" diff --git a/tests/queries/0_stateless/01231_markdown_format.reference b/tests/queries/0_stateless/01231_markdown_format.reference index 9fb9901a053..19a8be906e9 100644 --- a/tests/queries/0_stateless/01231_markdown_format.reference +++ b/tests/queries/0_stateless/01231_markdown_format.reference @@ -3,3 +3,21 @@ | 1 | name1 | [1,2,3] | Some long string | name1 | 1.11 | | 2 | name2 | [4,5,60000] | \N | Another long string | 222.222222 | | 30000 | One more long string | [7,8,9] | name3 | name3 | 3.33 | +| a | +|:-| +| \!\#\$\%\&\(\*\+\,\-\.\/\:\<\=\>\?\@\[\^\`\{\|\}\~ | +| a | +|:-| +| \!\#\$\%\&\(\*\+\,\-\.\/\:\<\=\>\?\@\[\^\`\{\|\}\~ | +| a | +|:-| +| \!\#\$\%\&\(\*\+\,\-\.\/\:\<\=\>\?\@\[\^\`\{\|\}\~ | +| a | +|:-| +| !#$%&(*+,-./:<=>?@[^`{|}~ | +| a | +|:-| +| !#$%&(*+,-./:<=>?@[^`{|}~ | +| a | +|:-| +| !#$%&(*+,-./:<=>?@[^`{|}~ | diff --git a/tests/queries/0_stateless/01231_markdown_format.sql b/tests/queries/0_stateless/01231_markdown_format.sql index 65c65389e12..cc9ffa109ac 100644 --- a/tests/queries/0_stateless/01231_markdown_format.sql +++ b/tests/queries/0_stateless/01231_markdown_format.sql @@ -1,6 +1,17 @@ -DROP TABLE IF EXISTS makrdown; +DROP TABLE IF EXISTS markdown; CREATE TABLE markdown (id UInt32, name String, array Array(Int32), nullable Nullable(String), low_cardinality LowCardinality(String), decimal Decimal32(6)) ENGINE = Memory; INSERT INTO markdown VALUES (1, 'name1', [1,2,3], 'Some long string', 'name1', 1.11), (2, 'name2', [4,5,60000], Null, 'Another long string', 222.222222), (30000, 'One more long string', [7,8,9], 'name3', 'name3', 3.33); SELECT * FROM markdown FORMAT Markdown; -DROP TABLE IF EXISTS markdown +DROP TABLE IF EXISTS markdown; + + +SET output_format_markdown_escape_special_characters = true; +SELECT '!#$%&(*+,-./:<=>?@[^`{|}~' AS a FORMAT Markdown; +SELECT CAST(1 AS Enum('!#$%&(*+,-./:<=>?@[^`{|}~' = 1)) AS a FORMAT Markdown; +SELECT toFixedString('!#$%&(*+,-./:<=>?@[^`{|}~', 25) AS a FORMAT Markdown; + +SET output_format_markdown_escape_special_characters = false; +SELECT '!#$%&(*+,-./:<=>?@[^`{|}~' AS a FORMAT Markdown; +SELECT CAST(1 AS Enum('!#$%&(*+,-./:<=>?@[^`{|}~' = 1)) AS a FORMAT Markdown; +SELECT toFixedString('!#$%&(*+,-./:<=>?@[^`{|}~', 25) AS a FORMAT Markdown; diff --git a/tests/queries/0_stateless/01231_operator_null_in.sql b/tests/queries/0_stateless/01231_operator_null_in.sql index 27ab0bbd838..0424a995b3f 100644 --- a/tests/queries/0_stateless/01231_operator_null_in.sql +++ b/tests/queries/0_stateless/01231_operator_null_in.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS null_in; -CREATE TABLE null_in (dt DateTime, idx int, i Nullable(int), s Nullable(String)) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx; +CREATE TABLE null_in (dt DateTime, idx int, i Nullable(int), s Nullable(String)) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO null_in VALUES (1, 1, 1, '1') (2, 2, NULL, NULL) (3, 3, 3, '3') (4, 4, NULL, NULL) (5, 5, 5, '5'); @@ -81,7 +81,7 @@ DROP TABLE IF EXISTS null_in; DROP TABLE IF EXISTS null_in_subquery; -CREATE TABLE null_in_subquery (dt DateTime, idx int, i Nullable(UInt64)) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx; +CREATE TABLE null_in_subquery (dt DateTime, idx int, i Nullable(UInt64)) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO null_in_subquery SELECT number % 3, number, number FROM system.numbers LIMIT 99999; SELECT count() == 33333 FROM null_in_subquery WHERE i in (SELECT i FROM null_in_subquery WHERE dt = 0); @@ -111,7 +111,7 @@ DROP TABLE IF EXISTS null_in_subquery; DROP TABLE IF EXISTS null_in_tuple; -CREATE TABLE null_in_tuple (dt DateTime, idx int, t Tuple(Nullable(UInt64), Nullable(String))) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx; +CREATE TABLE null_in_tuple (dt DateTime, idx int, t Tuple(Nullable(UInt64), Nullable(String))) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO null_in_tuple VALUES (1, 1, (1, '1')) (2, 2, (2, NULL)) (3, 3, (NULL, '3')) (4, 4, (NULL, NULL)) SET transform_null_in = 0; diff --git a/tests/queries/0_stateless/01233_check_table_with_metadata_cache.reference b/tests/queries/0_stateless/01233_check_table_with_metadata_cache.reference deleted file mode 100644 index b773fc49ec3..00000000000 --- a/tests/queries/0_stateless/01233_check_table_with_metadata_cache.reference +++ /dev/null @@ -1,672 +0,0 @@ -database engine:Ordinary; table engine:ReplicatedMergeTree; use metadata cache:false; use projection:false; use_compact_data_part:false -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Ordinary; table engine:ReplicatedMergeTree; use metadata cache:false; use projection:false; use_compact_data_part:true -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Ordinary; table engine:ReplicatedMergeTree; use metadata cache:false; use projection:true; use_compact_data_part:false -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Ordinary; table engine:ReplicatedMergeTree; use metadata cache:false; use projection:true; use_compact_data_part:true -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Ordinary; table engine:ReplicatedMergeTree; use metadata cache:true; use projection:false; use_compact_data_part:false -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Ordinary; table engine:ReplicatedMergeTree; use metadata cache:true; use projection:false; use_compact_data_part:true -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Ordinary; table engine:ReplicatedMergeTree; use metadata cache:true; use projection:true; use_compact_data_part:false -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Ordinary; table engine:ReplicatedMergeTree; use metadata cache:true; use projection:true; use_compact_data_part:true -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Atomic; table engine:ReplicatedMergeTree; use metadata cache:false; use projection:false; use_compact_data_part:false -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Atomic; table engine:ReplicatedMergeTree; use metadata cache:false; use projection:false; use_compact_data_part:true -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Atomic; table engine:ReplicatedMergeTree; use metadata cache:false; use projection:true; use_compact_data_part:false -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Atomic; table engine:ReplicatedMergeTree; use metadata cache:false; use projection:true; use_compact_data_part:true -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Atomic; table engine:ReplicatedMergeTree; use metadata cache:true; use projection:false; use_compact_data_part:false -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Atomic; table engine:ReplicatedMergeTree; use metadata cache:true; use projection:false; use_compact_data_part:true -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Atomic; table engine:ReplicatedMergeTree; use metadata cache:true; use projection:true; use_compact_data_part:false -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -database engine:Atomic; table engine:ReplicatedMergeTree; use metadata cache:true; use projection:true; use_compact_data_part:true -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000); -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 -TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache; -CHECK TABLE test_metadata_cache.check_part_metadata_cache; -1 diff --git a/tests/queries/0_stateless/01233_check_table_with_metadata_cache.sh b/tests/queries/0_stateless/01233_check_table_with_metadata_cache.sh deleted file mode 100755 index 67f11e58a68..00000000000 --- a/tests/queries/0_stateless/01233_check_table_with_metadata_cache.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest, long, no-s3-storage, no-random-settings, no-parallel -# Tag no-fasttest: setting use_metadata_cache=true is not supported in fasttest, because clickhouse binary in fasttest is build without RocksDB. -# Tag no-random-settings: random settings significantly slow down test with debug build (alternative: add no-debug tag) -# To suppress Warning messages from CHECK TABLE -CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=error -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -set -e - -table_engines=(ReplicatedMergeTree) -database_engines=(Ordinary Atomic) -use_metadata_caches=(false true) -use_projections=(false true) -use_compact_data_parts=(false true) - -for table_engine in "${table_engines[@]}"; do - for database_engine in "${database_engines[@]}"; do - for use_metadata_cache in "${use_metadata_caches[@]}"; do - for use_projection in "${use_projections[@]}"; do - for use_compact_data_part in "${use_compact_data_parts[@]}"; do - echo "database engine:${database_engine}; table engine:${table_engine}; use metadata cache:${use_metadata_cache}; use projection:${use_projection}; use_compact_data_part:${use_compact_data_part}" - - ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_metadata_cache.check_part_metadata_cache SYNC;" - ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS test_metadata_cache;" - ${CLICKHOUSE_CLIENT} --allow_deprecated_database_ordinary=1 --query "CREATE DATABASE test_metadata_cache ENGINE = ${database_engine};" - - table_engine_clause="" - if [[ "$table_engine" == "ReplicatedMergeTree" ]]; then - table_engine_clause="ENGINE ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_metadata_cache/check_part_metadata_cache', 'r1')" - elif [[ "$table_engine" == "MergeTree" ]]; then - table_engine_clause="ENGINE MergeTree()" - fi - - projection_clause="" - if [[ "$use_projection" == "true" ]]; then - projection_clause=", projection p1 (select p, sum(k), sum(v1), sum(v2) group by p)" - fi - - compact_data_part_clause=", min_bytes_for_wide_part = 10485760" - if [[ $use_compact_data_part == "true" ]]; then - compact_data_part_clause=", min_bytes_for_wide_part = 0" - fi - ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_metadata_cache.check_part_metadata_cache (p Date, k UInt64, v1 UInt64, v2 Int64${projection_clause}) $table_engine_clause PARTITION BY toYYYYMM(p) ORDER BY k settings use_metadata_cache = ${use_metadata_cache} ${compact_data_part_clause}" - ${CLICKHOUSE_CLIENT} --echo --query "CHECK TABLE test_metadata_cache.check_part_metadata_cache;" - - # Insert first batch of data. - ${CLICKHOUSE_CLIENT} --echo --query "INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000);" - ${CLICKHOUSE_CLIENT} --echo --query "CHECK TABLE test_metadata_cache.check_part_metadata_cache;" - - # Insert second batch of data. - ${CLICKHOUSE_CLIENT} --echo --query "INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-05-15', 5, 1000, 2000), ('2018-05-16', 6, 3000, 4000), ('2018-05-17', 7, 5000, 6000), ('2018-05-18', 8, 7000, 8000);" - ${CLICKHOUSE_CLIENT} --echo --query "CHECK TABLE test_metadata_cache.check_part_metadata_cache;" - - # First update. - ${CLICKHOUSE_CLIENT} --echo --query "ALTER TABLE test_metadata_cache.check_part_metadata_cache update v1 = 2001 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1;" - ${CLICKHOUSE_CLIENT} --echo --query "CHECK TABLE test_metadata_cache.check_part_metadata_cache;" - - # Second update. - ${CLICKHOUSE_CLIENT} --echo --query "ALTER TABLE test_metadata_cache.check_part_metadata_cache update v2 = 4002 where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1;" - ${CLICKHOUSE_CLIENT} --echo --query "CHECK TABLE test_metadata_cache.check_part_metadata_cache;" - - # First delete. - ${CLICKHOUSE_CLIENT} --echo --query "ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 1 settings mutations_sync = 1, replication_alter_partitions_sync = 1;" - ${CLICKHOUSE_CLIENT} --echo --query "CHECK TABLE test_metadata_cache.check_part_metadata_cache;" - - # Second delete. - ${CLICKHOUSE_CLIENT} --echo --query "ALTER TABLE test_metadata_cache.check_part_metadata_cache delete where k = 8 settings mutations_sync = 1, replication_alter_partitions_sync = 1;" - ${CLICKHOUSE_CLIENT} --echo --query "CHECK TABLE test_metadata_cache.check_part_metadata_cache;" - - # Insert third batch of data. - ${CLICKHOUSE_CLIENT} --echo --query "INSERT INTO test_metadata_cache.check_part_metadata_cache (p, k, v1, v2) VALUES ('2018-06-15', 5, 1000, 2000), ('2018-06-16', 6, 3000, 4000), ('2018-06-17', 7, 5000, 6000), ('2018-06-18', 8, 7000, 8000);" - ${CLICKHOUSE_CLIENT} --echo --query "CHECK TABLE test_metadata_cache.check_part_metadata_cache;" - - # Drop one partition. - ${CLICKHOUSE_CLIENT} --echo --query "ALTER TABLE test_metadata_cache.check_part_metadata_cache drop partition 201805 settings mutations_sync = 1, replication_alter_partitions_sync = 1;" - ${CLICKHOUSE_CLIENT} --echo --query "CHECK TABLE test_metadata_cache.check_part_metadata_cache;" - - # Add column. - ${CLICKHOUSE_CLIENT} --echo --query "ALTER TABLE test_metadata_cache.check_part_metadata_cache add column v3 UInt64 settings mutations_sync = 1, replication_alter_partitions_sync = 1;" - ${CLICKHOUSE_CLIENT} --echo --query "CHECK TABLE test_metadata_cache.check_part_metadata_cache;" - - # Delete column. - ${CLICKHOUSE_CLIENT} --echo --query "ALTER TABLE test_metadata_cache.check_part_metadata_cache drop column v3 settings mutations_sync = 1, replication_alter_partitions_sync = 1;" - ${CLICKHOUSE_CLIENT} --echo --query "CHECK TABLE test_metadata_cache.check_part_metadata_cache;" - - # Add TTL. - ${CLICKHOUSE_CLIENT} --echo --query "ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 10 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1;" - ${CLICKHOUSE_CLIENT} --echo --query "CHECK TABLE test_metadata_cache.check_part_metadata_cache;" - - # Modify TTL. - ${CLICKHOUSE_CLIENT} --echo --query "ALTER TABLE test_metadata_cache.check_part_metadata_cache modify TTL p + INTERVAL 15 YEAR settings mutations_sync = 1, replication_alter_partitions_sync = 1;" - ${CLICKHOUSE_CLIENT} --echo --query "CHECK TABLE test_metadata_cache.check_part_metadata_cache;" - - # Truncate table. - ${CLICKHOUSE_CLIENT} --echo --query "TRUNCATE TABLE test_metadata_cache.check_part_metadata_cache;" - ${CLICKHOUSE_CLIENT} --echo --query "CHECK TABLE test_metadata_cache.check_part_metadata_cache;" - done - done - done - done -done diff --git a/tests/queries/0_stateless/01238_http_memory_tracking.sh b/tests/queries/0_stateless/01238_http_memory_tracking.sh index 9b0fe875416..26d3dd8acd4 100755 --- a/tests/queries/0_stateless/01238_http_memory_tracking.sh +++ b/tests/queries/0_stateless/01238_http_memory_tracking.sh @@ -10,7 +10,7 @@ set -o pipefail # This is needed to keep at least one running query for user for the time of test. # (1k http queries takes ~1 second, let's run for 5x more to avoid flaps) -${CLICKHOUSE_CLIENT} --format Null -n <<<'SELECT sleepEachRow(1) FROM numbers(5)' & +${CLICKHOUSE_CLIENT} --function_sleep_max_microseconds_per_block 5000000 --format Null -n <<<'SELECT sleepEachRow(1) FROM numbers(5)' & # ignore "yes: standard output: Broken pipe" yes 'SELECT 1' 2>/dev/null | { diff --git a/tests/queries/0_stateless/01246_buffer_flush.sql b/tests/queries/0_stateless/01246_buffer_flush.sql index ac507d94b69..36bcaae383f 100644 --- a/tests/queries/0_stateless/01246_buffer_flush.sql +++ b/tests/queries/0_stateless/01246_buffer_flush.sql @@ -1,5 +1,7 @@ -- Tags: no-fasttest +SET function_sleep_max_microseconds_per_block = 4000000; + drop table if exists data_01256; drop table if exists buffer_01256; diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 4cca0ceb4e3..abdda9aa048 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -104,6 +104,7 @@ SYSTEM DROP MMAP CACHE ['SYSTEM DROP MMAP','DROP MMAP CACHE','DROP MMAP'] GLOBAL SYSTEM DROP QUERY CACHE ['SYSTEM DROP QUERY','DROP QUERY CACHE','DROP QUERY'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP COMPILED EXPRESSION CACHE ['SYSTEM DROP COMPILED EXPRESSION','DROP COMPILED EXPRESSION CACHE','DROP COMPILED EXPRESSIONS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP FILESYSTEM CACHE ['SYSTEM DROP FILESYSTEM CACHE','DROP FILESYSTEM CACHE'] GLOBAL SYSTEM DROP CACHE +SYSTEM SYNC FILESYSTEM CACHE ['SYSTEM REPAIR FILESYSTEM CACHE','REPAIR FILESYSTEM CACHE','SYNC FILESYSTEM CACHE'] GLOBAL SYSTEM SYSTEM DROP SCHEMA CACHE ['SYSTEM DROP SCHEMA CACHE','DROP SCHEMA CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP S3 CLIENT CACHE ['SYSTEM DROP S3 CLIENT','DROP S3 CLIENT CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP CACHE ['DROP CACHE'] \N SYSTEM @@ -119,6 +120,7 @@ SYSTEM MERGES ['SYSTEM STOP MERGES','SYSTEM START MERGES','STOP MERGES','START M SYSTEM TTL MERGES ['SYSTEM STOP TTL MERGES','SYSTEM START TTL MERGES','STOP TTL MERGES','START TTL MERGES'] TABLE SYSTEM SYSTEM FETCHES ['SYSTEM STOP FETCHES','SYSTEM START FETCHES','STOP FETCHES','START FETCHES'] TABLE SYSTEM SYSTEM MOVES ['SYSTEM STOP MOVES','SYSTEM START MOVES','STOP MOVES','START MOVES'] TABLE SYSTEM +SYSTEM PULLING REPLICATION LOG ['SYSTEM STOP PULLING REPLICATION LOG','SYSTEM START PULLING REPLICATION LOG'] TABLE SYSTEM SYSTEM DISTRIBUTED SENDS ['SYSTEM STOP DISTRIBUTED SENDS','SYSTEM START DISTRIBUTED SENDS','STOP DISTRIBUTED SENDS','START DISTRIBUTED SENDS'] TABLE SYSTEM SENDS SYSTEM REPLICATED SENDS ['SYSTEM STOP REPLICATED SENDS','SYSTEM START REPLICATED SENDS','STOP REPLICATED SENDS','START REPLICATED SENDS'] TABLE SYSTEM SENDS SYSTEM SENDS ['SYSTEM STOP SENDS','SYSTEM START SENDS','STOP SENDS','START SENDS'] \N SYSTEM @@ -133,10 +135,12 @@ SYSTEM SYNC TRANSACTION LOG ['SYNC TRANSACTION LOG'] GLOBAL SYSTEM SYSTEM SYNC FILE CACHE ['SYNC FILE CACHE'] GLOBAL SYSTEM SYSTEM FLUSH DISTRIBUTED ['FLUSH DISTRIBUTED'] TABLE SYSTEM FLUSH SYSTEM FLUSH LOGS ['FLUSH LOGS'] GLOBAL SYSTEM FLUSH +SYSTEM FLUSH ASYNC INSERT QUEUE ['FLUSH ASYNC INSERT QUEUE'] GLOBAL SYSTEM FLUSH SYSTEM FLUSH [] \N SYSTEM SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER','START THREAD FUZZER','STOP THREAD FUZZER'] GLOBAL SYSTEM SYSTEM UNFREEZE ['SYSTEM UNFREEZE'] GLOBAL SYSTEM SYSTEM FAILPOINT ['SYSTEM ENABLE FAILPOINT','SYSTEM DISABLE FAILPOINT'] GLOBAL SYSTEM +SYSTEM LISTEN ['SYSTEM START LISTEN','SYSTEM STOP LISTEN'] GLOBAL SYSTEM SYSTEM [] \N ALL dictGet ['dictHas','dictGetHierarchy','dictIsIn'] DICTIONARY ALL displaySecretsInShowAndSelect [] GLOBAL ALL diff --git a/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql index ac1186284be..c1cec6ea212 100644 --- a/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql +++ b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql @@ -5,7 +5,7 @@ SELECT parseDateTime64BestEffort('foo'); -- {serverError 41} SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 'bar'); -- {serverError 43} -- invalid scale parameter SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 4); -- {serverError 43} -- invalid timezone parameter -SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 'baz'); -- {serverError 1000} -- unknown timezone +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 'baz'); -- {serverError BAD_ARGUMENTS} -- unknown timezone SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', materialize(3), 4); -- {serverError 44} -- non-const precision SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, materialize('UTC')); -- {serverError 44} -- non-const timezone diff --git a/tests/queries/0_stateless/01289_min_execution_speed_not_too_early.sql b/tests/queries/0_stateless/01289_min_execution_speed_not_too_early.sql index 222a85094d0..1abe9bf8cd8 100644 --- a/tests/queries/0_stateless/01289_min_execution_speed_not_too_early.sql +++ b/tests/queries/0_stateless/01289_min_execution_speed_not_too_early.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS ES; -create table ES(A String) Engine=MergeTree order by tuple(); +create table ES(A String) Engine=MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into ES select toString(number) from numbers(10000000); SET max_execution_time = 100, diff --git a/tests/queries/0_stateless/01292_create_user.reference b/tests/queries/0_stateless/01292_create_user.reference index eb89a5ed38c..b7c30b304bf 100644 --- a/tests/queries/0_stateless/01292_create_user.reference +++ b/tests/queries/0_stateless/01292_create_user.reference @@ -106,10 +106,10 @@ CREATE USER u2_01292 DEFAULT ROLE r1_01292, r2_01292 SETTINGS readonly = 1 CREATE USER u3_01292 HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE r1_01292, r2_01292 CREATE USER u4_01292 HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE r1_01292, r2_01292 -- system.users -u1_01292 local directory plaintext_password {} [] ['localhost'] [] [] 1 [] [] -u2_01292 local directory no_password {} [] [] [] ['%.%.myhost.com'] 0 [] [] -u3_01292 local directory sha256_password {} ['192.169.1.1','192.168.0.0/16'] ['localhost'] [] [] 0 ['r1_01292'] [] -u4_01292 local directory double_sha1_password {} ['::/0'] [] [] [] 1 [] ['r1_01292'] +u1_01292 local_directory plaintext_password {} [] ['localhost'] [] [] 1 [] [] +u2_01292 local_directory no_password {} [] [] [] ['%.%.myhost.com'] 0 [] [] +u3_01292 local_directory sha256_password {} ['192.169.1.1','192.168.0.0/16'] ['localhost'] [] [] 0 ['r1_01292'] [] +u4_01292 local_directory double_sha1_password {} ['::/0'] [] [] [] 1 [] ['r1_01292'] -- system.settings_profile_elements \N u1_01292 \N 0 readonly 1 \N \N \N \N \N u2_01292 \N 0 \N \N \N \N \N default diff --git a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect index 3bfd454bb1f..25933777f9f 100755 --- a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect +++ b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect @@ -6,7 +6,7 @@ exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 set history_file $env(CLICKHOUSE_TMP)/$basename.history log_user 0 -set timeout 10 +set timeout 60 match_max 100000 expect_after { diff --git a/tests/queries/0_stateless/01293_create_role.reference b/tests/queries/0_stateless/01293_create_role.reference index 9b3c4eabd47..42f091bddd5 100644 --- a/tests/queries/0_stateless/01293_create_role.reference +++ b/tests/queries/0_stateless/01293_create_role.reference @@ -28,7 +28,7 @@ CREATE ROLE r2_01293 CREATE ROLE r1_01293 SETTINGS readonly = 1 CREATE ROLE r2_01293 SETTINGS readonly = 1 -- system.roles -r1_01293 local directory +r1_01293 local_directory -- system.settings_profile_elements \N \N r1_01293 0 readonly 1 \N \N \N \N \N \N r2_01293 0 \N \N \N \N \N default diff --git a/tests/queries/0_stateless/01293_create_role.sql b/tests/queries/0_stateless/01293_create_role.sql index f22edfeec3e..fd75d62964d 100644 --- a/tests/queries/0_stateless/01293_create_role.sql +++ b/tests/queries/0_stateless/01293_create_role.sql @@ -1,3 +1,5 @@ +-- Tags: no-parallel + DROP ROLE IF EXISTS r1_01293, r2_01293, r3_01293, r4_01293, r5_01293, r6_01293, r7_01293, r8_01293, r9_01293; DROP ROLE IF EXISTS r2_01293_renamed; DROP ROLE IF EXISTS r1_01293@'%', 'r2_01293@%.myhost.com'; diff --git a/tests/queries/0_stateless/01293_system_distribution_queue.sql b/tests/queries/0_stateless/01293_system_distribution_queue.sql index 9997f18f61d..f14c0d64a7d 100644 --- a/tests/queries/0_stateless/01293_system_distribution_queue.sql +++ b/tests/queries/0_stateless/01293_system_distribution_queue.sql @@ -8,7 +8,7 @@ create table null_01293 (key Int) engine=Null(); create table dist_01293 as null_01293 engine=Distributed(test_cluster_two_shards, currentDatabase(), null_01293, key); -- no rows, since no active monitor -select * from system.distribution_queue; +select * from system.distribution_queue where database = currentDatabase(); select 'INSERT'; system stop distributed sends dist_01293; diff --git a/tests/queries/0_stateless/01294_create_settings_profile.reference b/tests/queries/0_stateless/01294_create_settings_profile.reference index dbb73bca851..a10d5758752 100644 --- a/tests/queries/0_stateless/01294_create_settings_profile.reference +++ b/tests/queries/0_stateless/01294_create_settings_profile.reference @@ -53,12 +53,12 @@ CREATE SETTINGS PROFILE s4_01294 SETTINGS INHERIT readonly, INHERIT readonly CREATE SETTINGS PROFILE s5_01294 SETTINGS INHERIT readonly, readonly = 1 CREATE SETTINGS PROFILE s6_01294 SETTINGS INHERIT readonly, readonly CONST -- system.settings_profiles -s1_01294 local directory 0 0 [] [] -s2_01294 local directory 1 0 ['r1_01294'] [] -s3_01294 local directory 1 0 ['r1_01294'] [] -s4_01294 local directory 1 0 ['r1_01294'] [] -s5_01294 local directory 3 0 ['u1_01294'] [] -s6_01294 local directory 0 1 [] ['r1_01294','u1_01294'] +s1_01294 local_directory 0 0 [] [] +s2_01294 local_directory 1 0 ['r1_01294'] [] +s3_01294 local_directory 1 0 ['r1_01294'] [] +s4_01294 local_directory 1 0 ['r1_01294'] [] +s5_01294 local_directory 3 0 ['u1_01294'] [] +s6_01294 local_directory 0 1 [] ['r1_01294','u1_01294'] -- system.settings_profile_elements s2_01294 \N \N 0 readonly 0 \N \N \N \N s3_01294 \N \N 0 max_memory_usage 5000000 4000000 6000000 CONST \N diff --git a/tests/queries/0_stateless/01294_create_settings_profile.sql b/tests/queries/0_stateless/01294_create_settings_profile.sql index 565b4e70367..f71eefa6975 100644 --- a/tests/queries/0_stateless/01294_create_settings_profile.sql +++ b/tests/queries/0_stateless/01294_create_settings_profile.sql @@ -1,3 +1,5 @@ +-- Tags: no-parallel + DROP SETTINGS PROFILE IF EXISTS s1_01294, s2_01294, s3_01294, s4_01294, s5_01294, s6_01294, s7_01294, s8_01294, s9_01294, s10_01294; DROP SETTINGS PROFILE IF EXISTS s2_01294_renamed; DROP USER IF EXISTS u1_01294; diff --git a/tests/queries/0_stateless/01295_create_row_policy.reference b/tests/queries/0_stateless/01295_create_row_policy.reference index 6e3169b7fec..d73d9752bc1 100644 --- a/tests/queries/0_stateless/01295_create_row_policy.reference +++ b/tests/queries/0_stateless/01295_create_row_policy.reference @@ -30,6 +30,6 @@ CREATE ROW POLICY p5_01295 ON db2.table2 FOR SELECT USING a = b CREATE ROW POLICY p1_01295 ON db.table FOR SELECT USING 1 TO ALL CREATE ROW POLICY p2_01295 ON db.table FOR SELECT USING 1 TO ALL -- system.row_policies -p1_01295 ON db.table p1_01295 db table local directory (a < b) AND (c > d) 0 0 [] [] -p2_01295 ON db.table p2_01295 db table local directory id = currentUser() 1 0 ['u1_01295'] [] -p3_01295 ON db.table p3_01295 db table local directory 1 0 1 [] ['r1_01295'] +p1_01295 ON db.table p1_01295 db table local_directory (a < b) AND (c > d) 0 0 [] [] +p2_01295 ON db.table p2_01295 db table local_directory id = currentUser() 1 0 ['u1_01295'] [] +p3_01295 ON db.table p3_01295 db table local_directory 1 0 1 [] ['r1_01295'] diff --git a/tests/queries/0_stateless/01295_create_row_policy.sql b/tests/queries/0_stateless/01295_create_row_policy.sql index b484d0ea0f3..5ccd815c89a 100644 --- a/tests/queries/0_stateless/01295_create_row_policy.sql +++ b/tests/queries/0_stateless/01295_create_row_policy.sql @@ -1,3 +1,5 @@ +-- Tags: no-parallel + DROP ROW POLICY IF EXISTS p1_01295, p2_01295, p3_01295, p4_01295, p5_01295, p6_01295, p7_01295, p8_01295, p9_01295, p10_01295 ON db.table; DROP ROW POLICY IF EXISTS p2_01295_renamed ON db.table; DROP ROW POLICY IF EXISTS p3_01295 ON db.table, db2.table2; diff --git a/tests/queries/0_stateless/01297_create_quota.reference b/tests/queries/0_stateless/01297_create_quota.reference index 6b844e0d2f6..308bbf79024 100644 --- a/tests/queries/0_stateless/01297_create_quota.reference +++ b/tests/queries/0_stateless/01297_create_quota.reference @@ -52,10 +52,10 @@ CREATE QUOTA q2_01297 FOR INTERVAL 1 day MAX errors = 5 CREATE QUOTA q1_01297 FOR INTERVAL 1 day TRACKING ONLY TO r1_01297 CREATE QUOTA q2_01297 FOR INTERVAL 1 day TRACKING ONLY TO r1_01297 -- system.quotas -q1_01297 local directory ['user_name'] [] 0 ['r1_01297'] [] -q2_01297 local directory [] [5259492] 0 ['r1_01297','u1_01297'] [] -q3_01297 local directory ['client_key','user_name'] [5259492,15778476] 0 [] [] -q4_01297 local directory [] [604800] 1 [] ['u1_01297'] +q1_01297 local_directory ['user_name'] [] 0 ['r1_01297'] [] +q2_01297 local_directory [] [5259492] 0 ['r1_01297','u1_01297'] [] +q3_01297 local_directory ['client_key','user_name'] [5259492,15778476] 0 [] [] +q4_01297 local_directory [] [604800] 1 [] ['u1_01297'] -- system.quota_limits q2_01297 5259492 0 100 \N \N 11 1000 10000 1001 10001 2.5 \N q3_01297 5259492 0 \N \N \N \N 1002 \N \N \N \N \N diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index bd503f40396..97148dc268e 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-s3-storage-with-slow-build CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01304_polygons_sym_difference.reference b/tests/queries/0_stateless/01304_polygons_sym_difference.reference index 9344410f192..828e0d5d4d4 100644 --- a/tests/queries/0_stateless/01304_polygons_sym_difference.reference +++ b/tests/queries/0_stateless/01304_polygons_sym_difference.reference @@ -1,7 +1,7 @@ [[[(1,2.9),(1,1),(2.9,1),(3,0),(0,0),(0,3),(1,2.9)]],[[(1,2.9),(1,4),(4,4),(4,1),(2.9,1),(2.6,2),(2,2.6),(1,2.9)]]] -------- MultiPolygon with Polygon -MULTIPOLYGON(((-20 -10.3067,-20 -20,-10 -20.8791,-10 -40,-40 -40,-40 -10,-20 -10.3067)),((20 10.3067,20 -20,-10 -20.8791,-10 -10,-20 -10.3067,-20 20,10 20.8791,10 10,20 10.3067)),((20 10.3067,20 20,10 20.8791,10 40,40 40,40 10,20 10.3067))) +[(-40,-40),(-40,-10),(-20,-20),(-20,-10.307),(-20,20),(-10,-40),(-10,-20.879),(-10,-10),(10,10),(10,20.879),(10,40),(20,-20),(20,10.307),(20,20),(40,10),(40,40)] -------- MultiPolygon with Polygon with Holes -MULTIPOLYGON(((-10 -20.8791,-20 -20,-20 -10.3067,-10 -10,-10 -20.8791)),((10 20.8791,20 20,20 10.3067,10 10,10 20.8791)),((50 50,50 -50,-50 -50,-50 50,50 50),(20 10.3067,40 10,40 40,10 40,10 20.8791,-20 20,-20 -10.3067,-40 -10,-40 -40,-10 -40,-10 -20.8791,20 -20,20 10.3067))) +[(-50,-50),(-50,50),(-40,-40),(-40,-10),(-20,-20),(-20,-10.307),(-20,20),(-10,-40),(-10,-20.879),(-10,-10),(10,10),(10,20.879),(10,40),(20,-20),(20,10.307),(20,20),(40,10),(40,40),(50,-50),(50,50)] -------- Polygon with Polygon with Holes -MULTIPOLYGON(((-20 -10.3067,-10 -10,-10 -20.8791,-20 -20,-20 -10.3067)),((10 20.8791,20 20,20 10.3067,10 10,10 20.8791)),((50 50,50 -50,-50 -50,-50 50,50 50),(20 10.3067,40 10,40 40,10 40,10 20.8791,-20 20,-20 -10.3067,-40 -10,-40 -40,-10 -40,-10 -20.8791,20 -20,20 10.3067))) +[(-50,-50),(-50,50),(-40,-40),(-40,-10),(-20,-20),(-20,-10.307),(-20,20),(-10,-40),(-10,-20.879),(-10,-10),(10,10),(10,20.879),(10,40),(20,-20),(20,10.307),(20,20),(40,10),(40,40),(50,-50),(50,50)] diff --git a/tests/queries/0_stateless/01304_polygons_sym_difference.sql b/tests/queries/0_stateless/01304_polygons_sym_difference.sql index c4129676b26..d0e022e1439 100644 --- a/tests/queries/0_stateless/01304_polygons_sym_difference.sql +++ b/tests/queries/0_stateless/01304_polygons_sym_difference.sql @@ -4,12 +4,12 @@ select polygonsSymDifferenceCartesian([[[(0, 0),(0, 3),(1, 2.9),(2, 2.6),(2.6, 2 select '-------- MultiPolygon with Polygon'; select wkt(polygonsSymDifferenceSpherical([[(29.453587685533865,59.779570356240356),(29.393139070478895,52.276266797422124),(40.636581470703206,59.38168915000267),(41.21084331372543,59.103467777099866),(29.786055068336193,52.146627480315004),(31.23682182965546,52.16517054781818),(41.69443223416517,58.85424941916091),(42.51048853740727,58.47703162291134),(32.59691566839227,52.22075341251539),(34.289476889931414,52.22075341251539),(43.02430176537451,58.07974369546071),(43.02430176537451,57.25537683364851),(35.468224883503325,52.2022335126388),(37.16078610504247,52.23926559241349),(43.02430176537451,56.26136189644947),(43.02430176537451,55.326904361850836),(38.33953409861437,52.16517054781818),(40.09254393520848,52.16517054781818),(44.4146199116388,55.3097062225408),(44.47506852669377,59.80998197603594),(39.72985224487867,59.931351417569715),(30.23941968124846,53.67744677450975),(30.20919537372098,54.63314259659509),(38.73245009647167,59.94649146557819),(37.2816833351524,59.97675082987618),(30.23941968124846,55.2752875586599),(30.33009260383092,56.19415599955667),(36.28428118674541,59.96162460231375),(34.863738732953635,59.97675082987618),(30.178971066193498,56.97640788219866),(30.178971066193498,57.91957806959033),(33.65476643185424,59.94649146557819),(32.32489690064491,59.94649146557819),(30.481214141468342,58.85424941916091),(30.571887064050795,59.99187015036608),(29.453587685533865,59.779570356240356)]], [[[(33.473420586689336,58.85424941916091),(32.23422397806246,58.492830557036),(32.173775363007486,58.03176922751564),(31.508840597402823,57.499784781503735),(31.750635057622702,56.86092686957355),(31.508840597402823,55.941082594334574),(32.20399967053497,55.515591939372456),(31.84130798020516,54.998862226280465),(31.418167674820367,54.422670886434275),(32.47601843828233,53.83826377018255),(32.08310244042503,53.408048308050866),(33.171177511414484,52.82758702113742),(34.77306581037117,52.91880107773494),(34.77306581037117,53.784726518357985),(34.108131044766516,54.17574726780569),(35.07530888564602,54.59813930694554),(34.25925258240394,54.96417435716029),(35.01486027059106,55.361278263643584),(33.50364489421682,55.37845402950552),(32.7480372060297,55.90721384574556),(35.67979503619571,55.68634475630185),(32.83871012861215,56.311688992608396),(34.591719965206266,56.29492065473883),(35.7100193437232,56.311688992608396),(33.83611227701915,56.695333481003644),(32.95960735872209,56.9434497616887),(36.072711034053015,57.091531913901434),(33.171177511414484,57.33702717078384),(36.193608264162954,57.499784781503735),(33.23162612646945,57.77481561306047),(36.43540272438284,58.04776787540811),(33.62454212432676,58.27099811968307),(36.344729801800376,58.54018474404165),(33.83611227701915,58.68186423448108),(34.74284150284369,59.565911441555244),(33.473420586689336,58.85424941916091)]], [[(34.65216858026123,58.91672306881671),(37.19101041256995,58.68186423448108),(36.01226241899805,58.28688958537609),(37.16078610504247,58.04776787540811),(35.74024365125068,57.79092907387934),(37.009664567405046,57.499784781503735),(35.77046795877817,57.25537683364851),(36.979440259877556,57.07510745541089),(34.22902827487645,56.794777197297435),(36.7074214921302,56.210968525786996),(34.712617195316206,56.10998276812964),(36.55629995449277,55.63519693782703),(35.13575750070099,55.53270067649592),(36.43540272438284,55.34409504165558),(34.83351442542614,55.01619492319591),(35.61934642114075,54.49294870011772),(34.89396304048112,54.12264226523038),(35.37755196092087,53.046178687628185),(37.43280487278982,52.95523300597458),(35.92158949641559,53.80257986695776),(36.91899164482259,53.856094327816805),(36.01226241899805,54.75541714463799),(37.765272255592166,55.189110239786885),(36.828318722240134,55.44708256557195),(38.03729102333953,55.652253637168315),(36.64697287707522,55.941082594334574),(38.21863686850443,56.05939028508024),(36.37495410932787,56.64551287174558),(38.30930979108689,56.992876013526654),(37.16078610504247,57.25537683364851),(38.127963945921984,57.516020773674256),(37.43280487278982,57.710289827306724),(38.33953409861437,57.935626886818994),(37.40258056526235,58.31865112960426),(38.58132855883426,58.744648733419496),(37.31190764267989,59.02578062465136),(34.65216858026123,58.91672306881671)]], [[(38.52087994377928,59.11898412389468),(39.54850639971376,58.713270635642914),(38.369758406141855,58.28688958537609),(38.85334732658162,58.06375936407028),(38.33953409861437,57.710289827306724),(38.73245009647167,57.48354156434209),(38.21863686850443,57.271721400459285),(38.97424455669155,56.87744603722649),(37.463029180317314,56.5623320541159),(38.94402024916407,56.05939028508024),(38.18841256097694,55.856355210835915),(38.490655636251795,55.53270067649592),(37.795496563119656,55.39562234093384),(38.30930979108689,55.154587013355666),(36.7074214921302,54.65063295250911),(37.31190764267989,53.92734063371401),(36.979440259877556,53.58783775557231),(37.855945178174615,52.91880107773497),(39.57873070724124,52.69956490610895),(38.33953409861437,53.281741738901104),(40.00187101262603,53.35396273604752),(39.54850639971376,53.58783775557231),(40.24366547284591,53.58783775557231),(39.97164670509855,53.98069568468355),(40.60635716317572,54.03398248547225),(40.39478701048334,54.44025165268903),(39.54850639971376,54.56310590284329),(39.54850639971376,54.87732350170489),(40.39478701048334,54.87732350170489),(40.39478701048334,55.24083903654295),(39.82052516746112,55.2752875586599),(39.760076552406154,55.75443792473942),(40.57613285564824,55.78844000174894),(40.425011318010824,56.19415599955667),(39.82052516746112,56.07626182891758),(39.79030085993364,56.41214455508424),(40.48545993306579,56.495655446714636),(40.33433839542836,56.95993246553937),(39.79030085993364,56.992876013526654),(39.72985224487867,57.46729112028032),(40.33433839542836,57.46729112028032),(40.24366547284591,58.04776787540811),(39.63917932229622,58.04776787540811),(39.63917932229622,58.382088724871295),(40.33433839542836,58.382088724871295),(40.45523562553831,58.9011152358548),(38.52087994377928,59.11898412389468)]]])) format Null; -SELECT wkt(arraySort(polygonsSymDifferenceSpherical([[[(10., 10.), (10., 40.), (40., 40.), (40., 10.), (10., 10.)]], [[(-10., -10.), (-10., -40.), (-40., -40.), (-40., -10.), (-10., -10.)]]], [[[(-20., -20.), (-20., 20.), (20., 20.), (20., -20.), (-20., -20.)]]]))); +SELECT arrayDistinct(arraySort(arrayMap((x, y) -> (round(x, 3), round(y, 3)), arrayFlatten(polygonsSymDifferenceSpherical([[[(10., 10.), (10., 40.), (40., 40.), (40., 10.), (10., 10.)]], [[(-10., -10.), (-10., -40.), (-40., -40.), (-40., -10.), (-10., -10.)]]], [[[(-20., -20.), (-20., 20.), (20., 20.), (20., -20.), (-20., -20.)]]]))))); select '-------- MultiPolygon with Polygon with Holes'; select wkt(polygonsSymDifferenceSpherical([[[(33.473420586689336,58.85424941916091),(32.23422397806246,58.492830557036),(32.173775363007486,58.03176922751564),(31.508840597402823,57.499784781503735),(31.750635057622702,56.86092686957355),(31.508840597402823,55.941082594334574),(32.20399967053497,55.515591939372456),(31.84130798020516,54.998862226280465),(31.418167674820367,54.422670886434275),(32.47601843828233,53.83826377018255),(32.08310244042503,53.408048308050866),(33.171177511414484,52.82758702113742),(34.77306581037117,52.91880107773494),(34.77306581037117,53.784726518357985),(34.108131044766516,54.17574726780569),(35.07530888564602,54.59813930694554),(34.25925258240394,54.96417435716029),(35.01486027059106,55.361278263643584),(33.50364489421682,55.37845402950552),(32.7480372060297,55.90721384574556),(35.67979503619571,55.68634475630185),(32.83871012861215,56.311688992608396),(34.591719965206266,56.29492065473883),(35.7100193437232,56.311688992608396),(33.83611227701915,56.695333481003644),(32.95960735872209,56.9434497616887),(36.072711034053015,57.091531913901434),(33.171177511414484,57.33702717078384),(36.193608264162954,57.499784781503735),(33.23162612646945,57.77481561306047),(36.43540272438284,58.04776787540811),(33.62454212432676,58.27099811968307),(36.344729801800376,58.54018474404165),(33.83611227701915,58.68186423448108),(34.74284150284369,59.565911441555244),(33.473420586689336,58.85424941916091)]], [[(34.65216858026123,58.91672306881671),(37.19101041256995,58.68186423448108),(36.01226241899805,58.28688958537609),(37.16078610504247,58.04776787540811),(35.74024365125068,57.79092907387934),(37.009664567405046,57.499784781503735),(35.77046795877817,57.25537683364851),(36.979440259877556,57.07510745541089),(34.22902827487645,56.794777197297435),(36.7074214921302,56.210968525786996),(34.712617195316206,56.10998276812964),(36.55629995449277,55.63519693782703),(35.13575750070099,55.53270067649592),(36.43540272438284,55.34409504165558),(34.83351442542614,55.01619492319591),(35.61934642114075,54.49294870011772),(34.89396304048112,54.12264226523038),(35.37755196092087,53.046178687628185),(37.43280487278982,52.95523300597458),(35.92158949641559,53.80257986695776),(36.91899164482259,53.856094327816805),(36.01226241899805,54.75541714463799),(37.765272255592166,55.189110239786885),(36.828318722240134,55.44708256557195),(38.03729102333953,55.652253637168315),(36.64697287707522,55.941082594334574),(38.21863686850443,56.05939028508024),(36.37495410932787,56.64551287174558),(38.30930979108689,56.992876013526654),(37.16078610504247,57.25537683364851),(38.127963945921984,57.516020773674256),(37.43280487278982,57.710289827306724),(38.33953409861437,57.935626886818994),(37.40258056526235,58.31865112960426),(38.58132855883426,58.744648733419496),(37.31190764267989,59.02578062465136),(34.65216858026123,58.91672306881671)]], [[(38.52087994377928,59.11898412389468),(39.54850639971376,58.713270635642914),(38.369758406141855,58.28688958537609),(38.85334732658162,58.06375936407028),(38.33953409861437,57.710289827306724),(38.73245009647167,57.48354156434209),(38.21863686850443,57.271721400459285),(38.97424455669155,56.87744603722649),(37.463029180317314,56.5623320541159),(38.94402024916407,56.05939028508024),(38.18841256097694,55.856355210835915),(38.490655636251795,55.53270067649592),(37.795496563119656,55.39562234093384),(38.30930979108689,55.154587013355666),(36.7074214921302,54.65063295250911),(37.31190764267989,53.92734063371401),(36.979440259877556,53.58783775557231),(37.855945178174615,52.91880107773497),(39.57873070724124,52.69956490610895),(38.33953409861437,53.281741738901104),(40.00187101262603,53.35396273604752),(39.54850639971376,53.58783775557231),(40.24366547284591,53.58783775557231),(39.97164670509855,53.98069568468355),(40.60635716317572,54.03398248547225),(40.39478701048334,54.44025165268903),(39.54850639971376,54.56310590284329),(39.54850639971376,54.87732350170489),(40.39478701048334,54.87732350170489),(40.39478701048334,55.24083903654295),(39.82052516746112,55.2752875586599),(39.760076552406154,55.75443792473942),(40.57613285564824,55.78844000174894),(40.425011318010824,56.19415599955667),(39.82052516746112,56.07626182891758),(39.79030085993364,56.41214455508424),(40.48545993306579,56.495655446714636),(40.33433839542836,56.95993246553937),(39.79030085993364,56.992876013526654),(39.72985224487867,57.46729112028032),(40.33433839542836,57.46729112028032),(40.24366547284591,58.04776787540811),(39.63917932229622,58.04776787540811),(39.63917932229622,58.382088724871295),(40.33433839542836,58.382088724871295),(40.45523562553831,58.9011152358548),(38.52087994377928,59.11898412389468)]]], [[(24.367675781249993,61.45977057029751),(19.577636718749993,58.67693767258692),(19.577636718749993,57.492213666700735),(19.445800781249996,55.87531083569678),(19.445800781249996,54.085173420886775),(17.468261718749996,53.014783245859235),(20.017089843749993,51.563412328675895),(21.203613281249993,50.205033264943324),(26.125488281249993,50.40151532278236),(27.22412109374999,48.980216985374994),(32.80517578124999,49.525208341974405),(35.26611328124999,48.74894534343292),(36.93603515624999,49.66762782262194),(42.56103515625,48.77791275550183),(43.92333984374999,49.8096315635631),(47.17529296875,49.152969656170455),(49.28466796875,50.54136296522162),(48.05419921875,51.17934297928929),(51.39404296875,52.48278022207825),(50.64697265625,53.014783245859235),(52.88818359375,53.93021986394004),(51.65771484374999,54.29088164657006),(52.66845703125,55.825973254619015),(50.25146484375,56.145549500679095),(51.92138671875,57.914847767009206),(49.15283203125,58.17070248348605),(49.59228515625,60.086762746260064),(47.043457031249986,59.88893689676584),(43.57177734375,61.37567331572748),(42.64892578125,60.630101766266705),(36.89208984374999,62.000904713685856),(36.01318359374999,61.143235250840576),(31.398925781249993,62.02152819100766),(30.563964843749996,61.05828537037917),(26.872558593749993,61.71070595883174),(26.652832031249993,61.10078883158897),(24.367675781249993,61.45977057029751)], [(24.455566406249993,59.42272750081452),(21.203613281249993,58.49369382056807),(21.335449218749993,56.89700392127261),(21.599121093749993,55.92458580482949),(25.202636718749993,55.998380955359636),(28.850097656249993,57.06463027327854),(27.09228515625,57.844750992890994),(28.806152343749996,59.17592824927138),(26.257324218749993,59.17592824927138),(24.455566406249993,59.42272750081452)], [(35.13427734375,59.84481485969107),(31.970214843749993,58.97266715450152),(33.20068359374999,56.776808316568406),(36.67236328125,56.41390137600675),(39.08935546874999,57.25528054528888),(42.69287109374999,58.03137242177638),(40.89111328124999,59.26588062825809),(37.28759765625,58.722598828043374),(37.11181640624999,59.66774058164964),(35.13427734375,59.84481485969107)], [(29.157714843749993,55.75184939173528),(22.565917968749993,55.128649068488784),(22.565917968749993,53.54030739150019),(22.038574218749996,51.48138289610097),(26.257324218749993,51.42661449707484),(30.124511718749993,50.54136296522162),(32.18994140624999,51.17934297928929),(30.124511718749993,53.173119202640635),(35.09033203124999,53.173119202640635),(33.11279296875,54.085173420886775),(29.597167968749993,55.50374985927513),(29.157714843749993,55.75184939173528)], [(42.82470703125,56.58369172128337),(36.584472656249986,55.329144408405085),(37.99072265625,53.592504809039355),(34.95849609374999,51.48138289610097),(36.54052734374999,50.40151532278236),(39.66064453124999,50.289339253291786),(39.79248046875,52.13348804077148),(41.77001953125,50.68079714532166),(44.49462890624999,51.97134580885171),(47.30712890624999,52.509534770327264),(44.05517578125,53.54030739150019),(46.60400390625,53.696706475303245),(47.61474609375,55.40406982700608),(45.37353515625,55.40406982700608),(42.82470703125,56.58369172128337)]])) format Null; -SELECT wkt(arraySort(polygonsSymDifferenceSpherical([[(50.,50.),(50.,-50.),(-50.,-50.),(-50.,50.),(50.,50.)],[(10.,10.),(10.,40.),(40.,40.),(40.,10.),(10.,10.)],[(-10.,-10.),(-10.,-40.),(-40.,-40.),(-40.,-10.),(-10.,-10.)]], [[[(-20.,-20.),(-20.,20.),(20.,20.),(20.,-20.),(-20.,-20.)]]]))); +SELECT arrayDistinct(arraySort(arrayMap((x, y) -> (round(x, 3), round(y, 3)), arrayFlatten(polygonsSymDifferenceSpherical([[(50.,50.),(50.,-50.),(-50.,-50.),(-50.,50.),(50.,50.)],[(10.,10.),(10.,40.),(40.,40.),(40.,10.),(10.,10.)],[(-10.,-10.),(-10.,-40.),(-40.,-40.),(-40.,-10.),(-10.,-10.)]], [[[(-20.,-20.),(-20.,20.),(20.,20.),(20.,-20.),(-20.,-20.)]]]))))); select '-------- Polygon with Polygon with Holes'; select wkt(polygonsSymDifferenceSpherical([[(29.453587685533865,59.779570356240356),(29.393139070478895,52.276266797422124),(40.636581470703206,59.38168915000267),(41.21084331372543,59.103467777099866),(29.786055068336193,52.146627480315004),(31.23682182965546,52.16517054781818),(41.69443223416517,58.85424941916091),(42.51048853740727,58.47703162291134),(32.59691566839227,52.22075341251539),(34.289476889931414,52.22075341251539),(43.02430176537451,58.07974369546071),(43.02430176537451,57.25537683364851),(35.468224883503325,52.2022335126388),(37.16078610504247,52.23926559241349),(43.02430176537451,56.26136189644947),(43.02430176537451,55.326904361850836),(38.33953409861437,52.16517054781818),(40.09254393520848,52.16517054781818),(44.4146199116388,55.3097062225408),(44.47506852669377,59.80998197603594),(39.72985224487867,59.931351417569715),(30.23941968124846,53.67744677450975),(30.20919537372098,54.63314259659509),(38.73245009647167,59.94649146557819),(37.2816833351524,59.97675082987618),(30.23941968124846,55.2752875586599),(30.33009260383092,56.19415599955667),(36.28428118674541,59.96162460231375),(34.863738732953635,59.97675082987618),(30.178971066193498,56.97640788219866),(30.178971066193498,57.91957806959033),(33.65476643185424,59.94649146557819),(32.32489690064491,59.94649146557819),(30.481214141468342,58.85424941916091),(30.571887064050795,59.99187015036608),(29.453587685533865,59.779570356240356)]], [[(24.367675781249993,61.45977057029751),(19.577636718749993,58.67693767258692),(19.577636718749993,57.492213666700735),(19.445800781249996,55.87531083569678),(19.445800781249996,54.085173420886775),(17.468261718749996,53.014783245859235),(20.017089843749993,51.563412328675895),(21.203613281249993,50.205033264943324),(26.125488281249993,50.40151532278236),(27.22412109374999,48.980216985374994),(32.80517578124999,49.525208341974405),(35.26611328124999,48.74894534343292),(36.93603515624999,49.66762782262194),(42.56103515625,48.77791275550183),(43.92333984374999,49.8096315635631),(47.17529296875,49.152969656170455),(49.28466796875,50.54136296522162),(48.05419921875,51.17934297928929),(51.39404296875,52.48278022207825),(50.64697265625,53.014783245859235),(52.88818359375,53.93021986394004),(51.65771484374999,54.29088164657006),(52.66845703125,55.825973254619015),(50.25146484375,56.145549500679095),(51.92138671875,57.914847767009206),(49.15283203125,58.17070248348605),(49.59228515625,60.086762746260064),(47.043457031249986,59.88893689676584),(43.57177734375,61.37567331572748),(42.64892578125,60.630101766266705),(36.89208984374999,62.000904713685856),(36.01318359374999,61.143235250840576),(31.398925781249993,62.02152819100766),(30.563964843749996,61.05828537037917),(26.872558593749993,61.71070595883174),(26.652832031249993,61.10078883158897),(24.367675781249993,61.45977057029751)], [(24.455566406249993,59.42272750081452),(21.203613281249993,58.49369382056807),(21.335449218749993,56.89700392127261),(21.599121093749993,55.92458580482949),(25.202636718749993,55.998380955359636),(28.850097656249993,57.06463027327854),(27.09228515625,57.844750992890994),(28.806152343749996,59.17592824927138),(26.257324218749993,59.17592824927138),(24.455566406249993,59.42272750081452)], [(35.13427734375,59.84481485969107),(31.970214843749993,58.97266715450152),(33.20068359374999,56.776808316568406),(36.67236328125,56.41390137600675),(39.08935546874999,57.25528054528888),(42.69287109374999,58.03137242177638),(40.89111328124999,59.26588062825809),(37.28759765625,58.722598828043374),(37.11181640624999,59.66774058164964),(35.13427734375,59.84481485969107)], [(29.157714843749993,55.75184939173528),(22.565917968749993,55.128649068488784),(22.565917968749993,53.54030739150019),(22.038574218749996,51.48138289610097),(26.257324218749993,51.42661449707484),(30.124511718749993,50.54136296522162),(32.18994140624999,51.17934297928929),(30.124511718749993,53.173119202640635),(35.09033203124999,53.173119202640635),(33.11279296875,54.085173420886775),(29.597167968749993,55.50374985927513),(29.157714843749993,55.75184939173528)], [(42.82470703125,56.58369172128337),(36.584472656249986,55.329144408405085),(37.99072265625,53.592504809039355),(34.95849609374999,51.48138289610097),(36.54052734374999,50.40151532278236),(39.66064453124999,50.289339253291786),(39.79248046875,52.13348804077148),(41.77001953125,50.68079714532166),(44.49462890624999,51.97134580885171),(47.30712890624999,52.509534770327264),(44.05517578125,53.54030739150019),(46.60400390625,53.696706475303245),(47.61474609375,55.40406982700608),(45.37353515625,55.40406982700608),(42.82470703125,56.58369172128337)]])) format Null; -SELECT wkt(arraySort(polygonsSymDifferenceSpherical([[(50., 50.), (50., -50.), (-50., -50.), (-50., 50.), (50., 50.)], [(10., 10.), (10., 40.), (40., 40.), (40., 10.), (10., 10.)], [(-10., -10.), (-10., -40.), (-40., -40.), (-40., -10.), (-10., -10.)]], [[(-20., -20.), (-20., 20.), (20., 20.), (20., -20.), (-20., -20.)]]))); +SELECT arrayDistinct(arraySort(arrayMap((x, y) -> (round(x, 3), round(y, 3)), arrayFlatten(polygonsSymDifferenceSpherical([[(50., 50.), (50., -50.), (-50., -50.), (-50., 50.), (50., 50.)], [(10., 10.), (10., 40.), (40., 40.), (40., 10.), (10., 10.)], [(-10., -10.), (-10., -40.), (-40., -40.), (-40., -10.), (-10., -10.)]], [[(-20., -20.), (-20., 20.), (20., 20.), (20., -20.), (-20., -20.)]]))))); diff --git a/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference b/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference deleted file mode 100644 index 10f8bbfd392..00000000000 --- a/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference +++ /dev/null @@ -1,58 +0,0 @@ -SELECT number -FROM -( - SELECT number - FROM - ( - SELECT DISTINCT number - FROM numbers(3) - ) -) -ORDER BY number ASC -0 -1 -2 -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM - ( - SELECT DISTINCT number - FROM numbers(3) - ORDER BY number ASC - ) - ORDER BY number ASC -) -ORDER BY number ASC -0 -1 -2 -SELECT number -FROM -( - SELECT number - FROM - ( - SELECT DISTINCT number % 2 AS number - FROM numbers(3) - ) -) -ORDER BY number ASC -0 -1 -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM - ( - SELECT DISTINCT number % 2 AS number - FROM numbers(3) - ORDER BY number ASC - ) - ORDER BY number ASC -) -ORDER BY number ASC -0 -1 diff --git a/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql b/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql deleted file mode 100644 index 3b13b208eb5..00000000000 --- a/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql +++ /dev/null @@ -1,123 +0,0 @@ -set optimize_duplicate_order_by_and_distinct = 1; - -EXPLAIN SYNTAX SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT * - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT * - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -set optimize_duplicate_order_by_and_distinct = 0; - -EXPLAIN SYNTAX SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT * - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT * - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -set optimize_duplicate_order_by_and_distinct = 1; - -EXPLAIN SYNTAX SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT number % 2 - AS number - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT number % 2 - AS number - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -set optimize_duplicate_order_by_and_distinct = 0; - -EXPLAIN SYNTAX SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT number % 2 - AS number - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT number % 2 - AS number - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; diff --git a/tests/queries/0_stateless/01306_benchmark_json.sh b/tests/queries/0_stateless/01306_benchmark_json.sh deleted file mode 100755 index ae361826250..00000000000 --- a/tests/queries/0_stateless/01306_benchmark_json.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -set -e - -$CLICKHOUSE_BENCHMARK --iterations 10 --json "${CLICKHOUSE_TMP}"/out.json <<< "SELECT 1" 2>/dev/null && cat "${CLICKHOUSE_TMP}"/out.json | - $CLICKHOUSE_LOCAL --input-format JSONAsString --structure "s String" --query "SELECT isValidJSON(s) FROM table" diff --git a/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql b/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql deleted file mode 100644 index 8ef1273c855..00000000000 --- a/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql +++ /dev/null @@ -1,46 +0,0 @@ --- Tags: distributed - -set query_plan_remove_redundant_distinct = 1; -set optimize_duplicate_order_by_and_distinct = 0; -SET distributed_group_by_no_merge = 0; - -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM remote('127.0.0.{1,2}', system.numbers) - LIMIT 1 - SETTINGS distributed_group_by_no_merge = 1 -); - -SET distributed_group_by_no_merge = 1; - -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM remote('127.0.0.{1,2}', system.numbers) - LIMIT 1 -); - -set optimize_duplicate_order_by_and_distinct = 0; -SET distributed_group_by_no_merge = 0; - -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM remote('127.0.0.{1,2}', system.numbers) - LIMIT 1 - SETTINGS distributed_group_by_no_merge = 1 -); - -SET distributed_group_by_no_merge = 1; -set optimize_duplicate_order_by_and_distinct = 0; -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM remote('127.0.0.{1,2}', system.numbers) - LIMIT 1 -); diff --git a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh index f9a2ec8a34c..42941b486d6 100755 --- a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh +++ b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel, no-upgrade-check +# Tags: long, zookeeper, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.sh b/tests/queries/0_stateless/01338_long_select_and_alter.sh index 2e3080e9cfc..fcdfa2dec82 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENG $CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" -$CLICKHOUSE_CLIENT --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & # to be sure that select took all required locks sleep 2 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh index 12bc3b09472..50ade3fad45 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENG $CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" -$CLICKHOUSE_CLIENT --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & # to be sure that select took all required locks sleep 2 diff --git a/tests/queries/0_stateless/01339_client_unrecognized_option.sh b/tests/queries/0_stateless/01339_client_unrecognized_option.sh index b3488d010c2..0cfb705185e 100755 --- a/tests/queries/0_stateless/01339_client_unrecognized_option.sh +++ b/tests/queries/0_stateless/01339_client_unrecognized_option.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -xyzgarbage 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo ' $CLICKHOUSE_CLIENT --xyzgarbage 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' -cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' xyzgarbage 2>&1 | grep -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL' +cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' xyzgarbage 2>&1 | grep -q "SYNTAX_ERROR" && echo 'OK' || echo 'FAIL' cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external -xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/01373_is_zero_or_null.reference b/tests/queries/0_stateless/01373_is_zero_or_null.reference index d9caaa2089a..baf51073fd4 100644 --- a/tests/queries/0_stateless/01373_is_zero_or_null.reference +++ b/tests/queries/0_stateless/01373_is_zero_or_null.reference @@ -20,3 +20,5 @@ world 3 --- 4 +--- +1 diff --git a/tests/queries/0_stateless/01373_is_zero_or_null.sql b/tests/queries/0_stateless/01373_is_zero_or_null.sql index 32458dc9f62..dcb4f9649f9 100644 --- a/tests/queries/0_stateless/01373_is_zero_or_null.sql +++ b/tests/queries/0_stateless/01373_is_zero_or_null.sql @@ -27,3 +27,7 @@ SELECT count() FROM UNION ALL SELECT * FROM test WHERE isZeroOrNull(x != 'xyz') ); + +SELECT '---'; + +select isZeroOrNull(Null); diff --git a/tests/queries/0_stateless/01373_summing_merge_tree_exclude_partition_key.sql b/tests/queries/0_stateless/01373_summing_merge_tree_exclude_partition_key.sql index c5a874efe09..f1e1ab7c70f 100644 --- a/tests/queries/0_stateless/01373_summing_merge_tree_exclude_partition_key.sql +++ b/tests/queries/0_stateless/01373_summing_merge_tree_exclude_partition_key.sql @@ -4,7 +4,7 @@ DROP TABLE IF EXISTS tt_01373; CREATE TABLE tt_01373 (a Int64, d Int64, val Int64) -ENGINE = SummingMergeTree PARTITION BY (a) ORDER BY (d); +ENGINE = SummingMergeTree PARTITION BY (a) ORDER BY (d) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; SYSTEM STOP MERGES tt_01373; diff --git a/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql b/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql index 43c9fa43104..b6982910ace 100644 --- a/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql +++ b/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql @@ -1,4 +1,4 @@ --- Tags: zookeeper, no-upgrade-check +-- Tags: zookeeper DROP TABLE IF EXISTS table_rename_with_ttl; diff --git a/tests/queries/0_stateless/01391_join_on_dict_crash.sql b/tests/queries/0_stateless/01391_join_on_dict_crash.sql index 5321e03767f..854da04b334 100644 --- a/tests/queries/0_stateless/01391_join_on_dict_crash.sql +++ b/tests/queries/0_stateless/01391_join_on_dict_crash.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-upgrade-check +-- Tags: no-parallel DROP DATABASE IF EXISTS db_01391; CREATE DATABASE db_01391; diff --git a/tests/queries/0_stateless/01414_mutations_and_errors_zookeeper.sh b/tests/queries/0_stateless/01414_mutations_and_errors_zookeeper.sh index f92ab265eb8..dc88808d73b 100755 --- a/tests/queries/0_stateless/01414_mutations_and_errors_zookeeper.sh +++ b/tests/queries/0_stateless/01414_mutations_and_errors_zookeeper.sh @@ -18,19 +18,19 @@ $CLICKHOUSE_CLIENT --query " PARTITION BY date " -$CLICKHOUSE_CLIENT --query "INSERT INTO replicated_mutation_table SELECT toDate('2019-10-02'), number, '42' FROM numbers(4)" +$CLICKHOUSE_CLIENT --query "INSERT INTO replicated_mutation_table SELECT toDate('2019-10-02'), number, '42' FROM numbers(10)" -$CLICKHOUSE_CLIENT --query "INSERT INTO replicated_mutation_table SELECT toDate('2019-10-02'), number, 'Hello' FROM numbers(4)" +$CLICKHOUSE_CLIENT --query "INSERT INTO replicated_mutation_table SELECT toDate('2019-10-02'), number, 'Hello' FROM numbers(10)" $CLICKHOUSE_CLIENT --query "ALTER TABLE replicated_mutation_table UPDATE key = key + 1 WHERE sleepEachRow(1) == 0 SETTINGS mutations_sync = 2" 2>&1 | grep -o 'Mutation 0000000000 was killed' | head -n 1 & check_query="SELECT count() FROM system.mutations WHERE table='replicated_mutation_table' and database='$CLICKHOUSE_DATABASE' and mutation_id='0000000000'" -query_result=$($CLICKHOUSE_CLIENT --query="$check_query" 2>&1) +query_result=$(curl $CLICKHOUSE_URL --silent --fail --data "$check_query") while [ "$query_result" != "1" ] do - query_result=$($CLICKHOUSE_CLIENT --query="$check_query" 2>&1) + query_result=$(curl $CLICKHOUSE_URL --silent --fail --data "$check_query") sleep 0.1 done @@ -38,7 +38,7 @@ $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE table='replicated_mutation_table while [ "$query_result" != "0" ] do - query_result=$($CLICKHOUSE_CLIENT --query="$check_query" 2>&1) + query_result=$(curl $CLICKHOUSE_URL --silent --fail --data "$check_query") sleep 0.5 done @@ -49,11 +49,11 @@ $CLICKHOUSE_CLIENT --query "ALTER TABLE replicated_mutation_table MODIFY COLUMN check_query="SELECT type = 'UInt64' FROM system.columns WHERE table='replicated_mutation_table' and database='$CLICKHOUSE_DATABASE' and name='value'" -query_result=$($CLICKHOUSE_CLIENT --query="$check_query" 2>&1) +query_result=$(curl $CLICKHOUSE_URL --silent --fail --data "$check_query") while [ "$query_result" != "1" ] do - query_result=$($CLICKHOUSE_CLIENT --query="$check_query" 2>&1) + query_result=$(curl $CLICKHOUSE_URL --silent --fail --data "$check_query") sleep 0.5 done @@ -66,7 +66,7 @@ $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE table='replicated_mutation_table while [ "$query_result" != "0" ] do - query_result=$($CLICKHOUSE_CLIENT --query="$check_query" 2>&1) + query_result=$(curl $CLICKHOUSE_URL --silent --fail --data "$check_query") sleep 0.5 done diff --git a/tests/queries/0_stateless/01419_merge_tree_settings_sanity_check.sql b/tests/queries/0_stateless/01419_merge_tree_settings_sanity_check.sql index 686594f435d..5655a8af3d6 100644 --- a/tests/queries/0_stateless/01419_merge_tree_settings_sanity_check.sql +++ b/tests/queries/0_stateless/01419_merge_tree_settings_sanity_check.sql @@ -22,6 +22,17 @@ PARTITION BY toYYYYMM(eventday) ORDER BY (eventday, user_id) SETTINGS number_of_free_entries_in_pool_to_lower_max_size_of_merge = 100; -- { serverError 36 } +CREATE TABLE mytable_local +( + created DateTime, + eventday Date, + user_id UInt32 +) +ENGINE = MergeTree() +PARTITION BY toYYYYMM(eventday) +ORDER BY (eventday, user_id) +SETTINGS number_of_free_entries_in_pool_to_execute_optimize_entire_partition = 100; -- { serverError 36 } + CREATE TABLE mytable_local ( created DateTime, diff --git a/tests/queries/0_stateless/01441_low_cardinality_array_index.sql b/tests/queries/0_stateless/01441_low_cardinality_array_index.sql index 8febe8f2e44..4b31a86edfb 100644 --- a/tests/queries/0_stateless/01441_low_cardinality_array_index.sql +++ b/tests/queries/0_stateless/01441_low_cardinality_array_index.sql @@ -4,7 +4,7 @@ CREATE TABLE t_01411( str LowCardinality(String), arr Array(LowCardinality(String)) default [str] ) ENGINE = MergeTree() -ORDER BY tuple(); +ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_01411 (str) SELECT concat('asdf', toString(number % 10000)) FROM numbers(1000000); @@ -24,7 +24,7 @@ CREATE TABLE t_01411_num( num UInt8, arr Array(LowCardinality(Int64)) default [num] ) ENGINE = MergeTree() -ORDER BY tuple(); +ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_01411_num (num) SELECT number % 1000 FROM numbers(1000000); diff --git a/tests/queries/0_stateless/01455_duplicate_distinct_optimization.reference b/tests/queries/0_stateless/01455_duplicate_distinct_optimization.reference deleted file mode 100644 index 82e887e1b92..00000000000 --- a/tests/queries/0_stateless/01455_duplicate_distinct_optimization.reference +++ /dev/null @@ -1,136 +0,0 @@ -SELECT DISTINCT number -FROM numbers(1) -SELECT number -FROM -( - SELECT DISTINCT number - FROM numbers(1) -) -SELECT DISTINCT number * 2 -FROM -( - SELECT DISTINCT - number * 2, - number - FROM numbers(1) -) -SELECT number -FROM -( - SELECT DISTINCT number * 2 AS number - FROM numbers(1) -) -SELECT - b, - a -FROM -( - SELECT DISTINCT - number % 2 AS a, - number % 3 AS b - FROM numbers(100) -) -SELECT DISTINCT a -FROM -( - SELECT DISTINCT - number % 2 AS a, - number % 3 AS b - FROM numbers(100) -) -SELECT a -FROM -( - SELECT DISTINCT a - FROM - ( - SELECT DISTINCT - number % 2 AS a, - number % 3 AS b - FROM numbers(100) - ) -) -SELECT DISTINCT a -FROM -( - SELECT - a, - b - FROM - ( - SELECT DISTINCT - number % 2 AS a, - number % 3 AS b - FROM numbers(100) - ) -) -SELECT - a, - b -FROM -( - SELECT - b, - a - FROM - ( - SELECT DISTINCT - number AS a, - number AS b - FROM numbers(1) - ) -) -SELECT - a, - b -FROM -( - SELECT - b, - a, - a + b - FROM - ( - SELECT DISTINCT - number % 2 AS a, - number % 3 AS b - FROM numbers(100) - ) -) -SELECT DISTINCT a -FROM -( - SELECT a - FROM - ( - SELECT DISTINCT - number % 2 AS a, - number % 3 AS b - FROM numbers(100) - ) -) -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM numbers(1) -) AS t1 -CROSS JOIN numbers(2) AS t2 -SELECT number -FROM -( - SELECT DISTINCT number - FROM numbers(1) AS t1 - CROSS JOIN numbers(2) AS t2 -) -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM numbers(1) - UNION ALL - SELECT DISTINCT number - FROM numbers(2) -) -0 -1 diff --git a/tests/queries/0_stateless/01455_duplicate_distinct_optimization.sql b/tests/queries/0_stateless/01455_duplicate_distinct_optimization.sql deleted file mode 100644 index 6fbf80a4dc3..00000000000 --- a/tests/queries/0_stateless/01455_duplicate_distinct_optimization.sql +++ /dev/null @@ -1,32 +0,0 @@ -SET optimize_duplicate_order_by_and_distinct = 1; - -EXPLAIN SYNTAX SELECT DISTINCT number FROM numbers(1); -EXPLAIN SYNTAX SELECT DISTINCT number FROM (SELECT DISTINCT number FROM numbers(1)); -EXPLAIN SYNTAX SELECT DISTINCT number * 2 FROM (SELECT DISTINCT number * 2, number FROM numbers(1)); -EXPLAIN SYNTAX SELECT DISTINCT number FROM (SELECT DISTINCT number * 2 AS number FROM numbers(1)); -EXPLAIN SYNTAX SELECT DISTINCT b, a FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100)); -EXPLAIN SYNTAX SELECT DISTINCT a FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100)); -EXPLAIN SYNTAX SELECT DISTINCT a FROM (SELECT DISTINCT a FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100))); -EXPLAIN SYNTAX SELECT DISTINCT a FROM (SELECT DISTINCT a, b FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100))); -EXPLAIN SYNTAX SELECT DISTINCT a, b FROM (SELECT DISTINCT b, a FROM (SELECT DISTINCT number a, number b FROM numbers(1))); -EXPLAIN SYNTAX SELECT DISTINCT a, b FROM (SELECT b, a, a + b FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100))); -EXPLAIN SYNTAX SELECT DISTINCT a FROM (SELECT a FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100))); -EXPLAIN SYNTAX SELECT DISTINCT number FROM (SELECT DISTINCT number FROM numbers(1)) t1 CROSS JOIN numbers(2) t2; -EXPLAIN SYNTAX SELECT DISTINCT number FROM (SELECT DISTINCT number FROM numbers(1) t1 CROSS JOIN numbers(2) t2); - -EXPLAIN SYNTAX SELECT DISTINCT number FROM -( - (SELECT DISTINCT number FROM numbers(1)) - UNION ALL - (SELECT DISTINCT number FROM numbers(2)) -); - --- - -SELECT DISTINCT number FROM -( - (SELECT DISTINCT number FROM numbers(1)) - UNION ALL - (SELECT DISTINCT number FROM numbers(2)) -) -ORDER BY number; diff --git a/tests/queries/0_stateless/01460_DistributedFilesToInsert.reference b/tests/queries/0_stateless/01460_DistributedFilesToInsert.reference deleted file mode 100644 index 7a123a39978..00000000000 --- a/tests/queries/0_stateless/01460_DistributedFilesToInsert.reference +++ /dev/null @@ -1,12 +0,0 @@ -INSERT -0 -0 -STOP/START DISTRIBUTED SENDS -1 -0 -FLUSH DISTRIBUTED -1 -0 -DROP TABLE -1 -0 diff --git a/tests/queries/0_stateless/01460_DistributedFilesToInsert.sql b/tests/queries/0_stateless/01460_DistributedFilesToInsert.sql deleted file mode 100644 index 9b7a98f30bc..00000000000 --- a/tests/queries/0_stateless/01460_DistributedFilesToInsert.sql +++ /dev/null @@ -1,45 +0,0 @@ --- Tags: no-parallel, no-fasttest --- Tag no-fasttest: Look at DistributedFilesToInsert, so cannot run in parallel. - --- otherwise SYSTEM STOP DISTRIBUTED SENDS does not makes any effect (for localhost) --- (i.e. no .bin files and hence no sending is required) -set prefer_localhost_replica=0; - -drop table if exists data_01460; -drop table if exists dist_01460; - -create table data_01460 as system.one engine=Null(); -create table dist_01460 as data_01460 engine=Distributed(test_shard_localhost, currentDatabase(), data_01460) settings monitor_sleep_time_ms=50; - -select 'INSERT'; -select value from system.metrics where metric = 'DistributedFilesToInsert'; -insert into dist_01460 select * from system.one; -select sleep(1) format Null; -- monitor_sleep_time_ms -select value from system.metrics where metric = 'DistributedFilesToInsert'; - -select 'STOP/START DISTRIBUTED SENDS'; -system stop distributed sends dist_01460; -insert into dist_01460 select * from system.one; -select sleep(1) format Null; -- monitor_sleep_time_ms -select value from system.metrics where metric = 'DistributedFilesToInsert'; -system start distributed sends dist_01460; -select sleep(1) format Null; -- monitor_sleep_time_ms -select value from system.metrics where metric = 'DistributedFilesToInsert'; - -select 'FLUSH DISTRIBUTED'; -system stop distributed sends dist_01460; -insert into dist_01460 select * from system.one; -select sleep(1) format Null; -- monitor_sleep_time_ms -select value from system.metrics where metric = 'DistributedFilesToInsert'; -system flush distributed dist_01460; -select value from system.metrics where metric = 'DistributedFilesToInsert'; - -select 'DROP TABLE'; -system stop distributed sends dist_01460; -insert into dist_01460 select * from system.one; -select sleep(1) format Null; -- monitor_sleep_time_ms -select value from system.metrics where metric = 'DistributedFilesToInsert'; -drop table dist_01460; -select value from system.metrics where metric = 'DistributedFilesToInsert'; - -drop table data_01460; diff --git a/tests/queries/0_stateless/01472_obfuscator_uuid.sh b/tests/queries/0_stateless/01472_obfuscator_uuid.sh index 6654dcaad71..eae9c1e3081 100755 --- a/tests/queries/0_stateless/01472_obfuscator_uuid.sh +++ b/tests/queries/0_stateless/01472_obfuscator_uuid.sh @@ -4,9 +4,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS t_uuid" $CLICKHOUSE_CLIENT --query="CREATE TABLE t_uuid(Id UUID) ENGINE=MergeTree ORDER BY (Id)" $CLICKHOUSE_CLIENT --query="INSERT INTO t_uuid VALUES ('3f5ffba3-19ff-4f3d-8861-60ae6e1fc1aa'),('4bd62524-e33c-43e5-882d-f1d96cf5561e'),('7a8b45d2-c18b-4e8c-89eb-abf5bee88931'),('45bb7333-965b-4526-870e-4f941edb025b'),('a4e72d0e-f9fa-465e-8d9d-151b9ced94df'),('cb5818ab-83b5-48a8-94b0-5177e30176d9'),('701e8006-fc9f-4496-80ba-efa6817b917b'),('e0936acf-6e8f-42aa-8f56-d1363476eece'),('239bb790-5293-40df-92ae-472294b6e178'),('508d0e80-729f-4e3b-9336-4c5c8792f6be'),('94abef70-f2d6-4f7b-ad60-3889409f1dac'),('b6f1ec08-8473-4fa2-b134-73db040b0d82'),('7e54dcae-0bb4-4c4f-a636-54a705fb8b40'),('d1d258c2-a35f-4c00-abfa-8addbcbc5471'),('7c74fbd8-bf79-46ee-adfe-96271040a4f7'),('41e3a274-eea9-41d8-a128-de5a6658fcfd'),('a72dc048-f72f-470e-b0f9-60cfad6e1157'),('40634f4f-37bf-44e4-ac7c-6f024ad19990')" -$CLICKHOUSE_CLIENT --query="SELECT Id FROM t_uuid FORMAT TSV" > "${CLICKHOUSE_TMP}"/data.tsv +$CLICKHOUSE_CLIENT --query="SELECT Id FROM t_uuid ORDER BY (Id) FORMAT TSV" > "${CLICKHOUSE_TMP}"/data.tsv echo FROM RAW DATA && cat "${CLICKHOUSE_TMP}"/data.tsv echo TRANSFORMED TO && $CLICKHOUSE_OBFUSCATOR --structure "Id UUID" --input-format TSV --output-format TSV --seed dsrub < "${CLICKHOUSE_TMP}"/data.tsv 2>/dev/null diff --git a/tests/queries/0_stateless/01485_256_bit_multiply.sql b/tests/queries/0_stateless/01485_256_bit_multiply.sql index f3f6cc05058..5c8c47c9127 100644 --- a/tests/queries/0_stateless/01485_256_bit_multiply.sql +++ b/tests/queries/0_stateless/01485_256_bit_multiply.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings, no-asan, no-msan, no-tsan, no-ubsan, no-debug + select count() from ( select toInt128(number) * number x, toInt256(number) * number y from numbers_mt(100000000) where x != y diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql index e3bc8cf6e72..3ce1c3aa131 100644 --- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql @@ -4,7 +4,7 @@ SET allow_asynchronous_read_from_io_pool_for_merge_tree = 0; SET do_not_merge_across_partitions_select_final = 1; SET max_threads = 16; -CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t); +CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(2); INSERT INTO select_final SELECT toDate('2000-01-01'), number + 1, '' FROM numbers(2); diff --git a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql index 4b907d5ebb6..30beb29251e 100644 --- a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql +++ b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql @@ -44,6 +44,7 @@ SYSTEM STOP REPLICATION QUEUES execute_on_single_replica_r2; OPTIMIZE TABLE execute_on_single_replica_r1 FINAL SETTINGS replication_alter_partitions_sync=0; /* if we will check immediately we can find the log entry unchecked */ +SET function_sleep_max_microseconds_per_block = 10000000; SELECT * FROM numbers(4) where sleepEachRow(1); SELECT '****************************'; diff --git a/tests/queries/0_stateless/01550_create_map_type.sql b/tests/queries/0_stateless/01550_create_map_type.sql index 26bbf3c7dde..92362f5596b 100644 --- a/tests/queries/0_stateless/01550_create_map_type.sql +++ b/tests/queries/0_stateless/01550_create_map_type.sql @@ -9,14 +9,14 @@ drop table if exists table_map; drop table if exists table_map; -create table table_map (a Map(String, UInt64)) engine = MergeTree() order by a; +create table table_map (a Map(String, UInt64)) engine = MergeTree() order by a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into table_map select map('key1', number, 'key2', number * 2) from numbers(1111, 3); select a['key1'], a['key2'] from table_map; drop table if exists table_map; -- MergeTree Engine drop table if exists table_map; -create table table_map (a Map(String, String), b String) engine = MergeTree() order by a; +create table table_map (a Map(String, String), b String) engine = MergeTree() order by a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into table_map values ({'name':'zhangsan', 'gender':'male'}, 'name'), ({'name':'lisi', 'gender':'female'}, 'gender'); select a[b] from table_map; select b from table_map where a = map('name','lisi', 'gender', 'female'); @@ -24,21 +24,21 @@ drop table if exists table_map; -- Big Integer type -create table table_map (d DATE, m Map(Int8, UInt256)) ENGINE = MergeTree() order by d; +create table table_map (d DATE, m Map(Int8, UInt256)) ENGINE = MergeTree() order by d SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into table_map values ('2020-01-01', map(1, 0, 2, 1)); select * from table_map; drop table table_map; -- Integer type -create table table_map (d DATE, m Map(Int8, Int8)) ENGINE = MergeTree() order by d; +create table table_map (d DATE, m Map(Int8, Int8)) ENGINE = MergeTree() order by d SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into table_map values ('2020-01-01', map(1, 0, 2, -1)); select * from table_map; drop table table_map; -- Unsigned Int type drop table if exists table_map; -create table table_map(a Map(UInt8, UInt64), b UInt8) Engine = MergeTree() order by b; +create table table_map(a Map(UInt8, UInt64), b UInt8) Engine = MergeTree() order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into table_map select map(number, number+5), number from numbers(1111,4); select a[b] from table_map; drop table if exists table_map; @@ -46,7 +46,7 @@ drop table if exists table_map; -- Array Type drop table if exists table_map; -create table table_map(a Map(String, Array(UInt8))) Engine = MergeTree() order by a; +create table table_map(a Map(String, Array(UInt8))) Engine = MergeTree() order by a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into table_map values(map('k1', [1,2,3], 'k2', [4,5,6])), (map('k0', [], 'k1', [100,20,90])); insert into table_map select map('k1', [number, number + 2, number * 2]) from numbers(6); insert into table_map select map('k2', [number, number + 2, number * 2]) from numbers(6); @@ -56,7 +56,7 @@ drop table if exists table_map; SELECT CAST(([1, 2, 3], ['1', '2', 'foo']), 'Map(UInt8, String)') AS map, map[1]; CREATE TABLE table_map (n UInt32, m Map(String, Int)) -ENGINE = MergeTree ORDER BY n SETTINGS min_bytes_for_wide_part = 0; +ENGINE = MergeTree ORDER BY n SETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192, index_granularity_bytes = '10Mi'; -- coversion from Tuple(Array(K), Array(V)) INSERT INTO table_map SELECT number, (arrayMap(x -> toString(x), range(number % 10 + 2)), range(number % 10 + 2)) FROM numbers(100000); @@ -67,7 +67,7 @@ SELECT sum(m['1']), sum(m['7']), sum(m['100']) FROM table_map; DROP TABLE IF EXISTS table_map; CREATE TABLE table_map (n UInt32, m Map(String, Int)) -ENGINE = MergeTree ORDER BY n; +ENGINE = MergeTree ORDER BY n SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -- coversion from Tuple(Array(K), Array(V)) INSERT INTO table_map SELECT number, (arrayMap(x -> toString(x), range(number % 10 + 2)), range(number % 10 + 2)) FROM numbers(100000); diff --git a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference index 1d76d9bd631..443f6d3ae93 100644 --- a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference +++ b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference @@ -9,9 +9,9 @@ ExpressionTransform × 3 ExpressionTransform × 3 (ReadFromMergeTree) ExpressionTransform × 4 - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 MergingSortedTransform 2 → 1 ExpressionTransform × 2 - MergeTreeInOrder × 2 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 ExpressionTransform - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql index 61083c3ae14..472e042a18b 100644 --- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check -- force data path with the user/pass in it set use_compact_format_in_distributed_parts_names=0; diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference index 2b7fdfaa642..0be3012dc29 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference @@ -21,34 +21,34 @@ array-join lambda 1 optimize_read_in_order -Expression (Projection) - Limit (preliminary LIMIT (without OFFSET)) - Sorting (Sorting for ORDER BY) - Expression (Before ORDER BY) - ReadFromMergeTree (default.test_table) -Expression (Projection) - Limit (preliminary LIMIT (without OFFSET)) - Sorting (Sorting for ORDER BY) - Expression (Before ORDER BY) - ReadFromMergeTree (default.test_table) -Expression ((Projection + Before ORDER BY [lifted up part])) - Limit (preliminary LIMIT (without OFFSET)) - Sorting (Sorting for ORDER BY) - Expression (Before ORDER BY) - ReadFromMergeTree (default.test_table) +Expression + Limit + Sorting + Expression + ReadFromMergeTree +Expression + Limit + Sorting + Expression + ReadFromMergeTree +Expression + Limit + Sorting + Expression + ReadFromMergeTree optimize_aggregation_in_order -Expression ((Projection + Before ORDER BY)) +Expression Aggregating - Expression (Before GROUP BY) - ReadFromMergeTree (default.test_table) -Expression ((Projection + Before ORDER BY)) + Expression + ReadFromMergeTree +Expression Aggregating - Expression (Before GROUP BY) - ReadFromMergeTree (default.test_table) -Expression ((Projection + Before ORDER BY)) + Expression + ReadFromMergeTree +Expression Aggregating - Expression (Before GROUP BY) - ReadFromMergeTree (default.test_table) + Expression + ReadFromMergeTree second-index 1 1 diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.sql b/tests/queries/0_stateless/01576_alias_column_rewrite.sql index d59889c1af8..1da88abb759 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.sql +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.sql @@ -81,15 +81,15 @@ SELECT count() == 10 FROM test_table WHERE arrayMap((day) -> day + 1, [1,2,3]) set max_rows_to_read = 0; SELECT 'optimize_read_in_order'; -EXPLAIN SELECT day AS s FROM test_table ORDER BY s LIMIT 1 SETTINGS optimize_read_in_order = 0; -EXPLAIN SELECT day AS s FROM test_table ORDER BY s LIMIT 1 SETTINGS optimize_read_in_order = 1; -EXPLAIN SELECT toDate(timestamp) AS s FROM test_table ORDER BY toDate(timestamp) LIMIT 1 SETTINGS optimize_read_in_order = 1; +EXPLAIN description = 0 SELECT day AS s FROM test_table ORDER BY s LIMIT 1 SETTINGS optimize_read_in_order = 0; +EXPLAIN description = 0 SELECT day AS s FROM test_table ORDER BY s LIMIT 1 SETTINGS optimize_read_in_order = 1; +EXPLAIN description = 0 SELECT toDate(timestamp) AS s FROM test_table ORDER BY toDate(timestamp) LIMIT 1 SETTINGS optimize_read_in_order = 1; SELECT 'optimize_aggregation_in_order'; -EXPLAIN SELECT day, count() AS s FROM test_table GROUP BY day SETTINGS optimize_aggregation_in_order = 0; -EXPLAIN SELECT day, count() AS s FROM test_table GROUP BY day SETTINGS optimize_aggregation_in_order = 1; -EXPLAIN SELECT toDate(timestamp), count() AS s FROM test_table GROUP BY toDate(timestamp) SETTINGS optimize_aggregation_in_order = 1; +EXPLAIN description = 0 SELECT day, count() AS s FROM test_table GROUP BY day SETTINGS optimize_aggregation_in_order = 0; +EXPLAIN description = 0 SELECT day, count() AS s FROM test_table GROUP BY day SETTINGS optimize_aggregation_in_order = 1; +EXPLAIN description = 0 SELECT toDate(timestamp), count() AS s FROM test_table GROUP BY toDate(timestamp) SETTINGS optimize_aggregation_in_order = 1; DROP TABLE test_table; diff --git a/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh b/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh index 4a9b4beee5b..fcea7f57cd3 100755 --- a/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh +++ b/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index 8939ea1111d..ce9c6f4589e 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -1193,10 +1193,10 @@ select count() over () from numbers(4) where number < 2; 2 -- floating point RANGE frame select - count(*) over (order by toFloat32(number) range 5. preceding), - count(*) over (order by toFloat64(number) range 5. preceding), - count(*) over (order by toFloat32(number) range between current row and 5. following), - count(*) over (order by toFloat64(number) range between current row and 5. following) + count(*) over (order by toFloat32(number) range 5 preceding), + count(*) over (order by toFloat64(number) range 5 preceding), + count(*) over (order by toFloat32(number) range between current row and 5 following), + count(*) over (order by toFloat64(number) range between current row and 5 following) from numbers(7) ; 1 1 6 6 diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql index 3c9c1f9cea7..07e323b3c40 100644 --- a/tests/queries/0_stateless/01591_window_functions.sql +++ b/tests/queries/0_stateless/01591_window_functions.sql @@ -474,10 +474,10 @@ select count() over () from numbers(4) where number < 2; -- floating point RANGE frame select - count(*) over (order by toFloat32(number) range 5. preceding), - count(*) over (order by toFloat64(number) range 5. preceding), - count(*) over (order by toFloat32(number) range between current row and 5. following), - count(*) over (order by toFloat64(number) range between current row and 5. following) + count(*) over (order by toFloat32(number) range 5 preceding), + count(*) over (order by toFloat64(number) range 5 preceding), + count(*) over (order by toFloat32(number) range between current row and 5 following), + count(*) over (order by toFloat64(number) range between current row and 5 following) from numbers(7) ; diff --git a/tests/queries/0_stateless/01592_long_window_functions1.sql b/tests/queries/0_stateless/01592_long_window_functions1.sql index 4911b7aa792..c63c651fb0b 100644 --- a/tests/queries/0_stateless/01592_long_window_functions1.sql +++ b/tests/queries/0_stateless/01592_long_window_functions1.sql @@ -7,7 +7,7 @@ set max_insert_threads = 4; create table stack(item_id Int64, brand_id Int64, rack_id Int64, dt DateTime, expiration_dt DateTime, quantity UInt64) Engine = MergeTree partition by toYYYYMM(dt) -order by (brand_id, toStartOfHour(dt)); +order by (brand_id, toStartOfHour(dt)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into stack select number%99991, number%11, number%1111, toDateTime('2020-01-01 00:00:00')+number/100, diff --git a/tests/queries/0_stateless/01593_insert_settings.sql b/tests/queries/0_stateless/01593_insert_settings.sql index 88a58b2152e..6493ecf649b 100644 --- a/tests/queries/0_stateless/01593_insert_settings.sql +++ b/tests/queries/0_stateless/01593_insert_settings.sql @@ -3,6 +3,8 @@ create table data_01593 (key Int) engine=MergeTree() order by key partition by k insert into data_01593 select * from numbers_mt(10); insert into data_01593 select * from numbers_mt(10) settings max_partitions_per_insert_block=1; -- { serverError TOO_MANY_PARTS } +-- throw_on_max_partitions_per_insert_block=false means we'll just log that the limit was reached rather than throw +insert into data_01593 select * from numbers_mt(10) settings max_partitions_per_insert_block=1, throw_on_max_partitions_per_insert_block=false; -- settings for INSERT is prefered insert into data_01593 settings max_partitions_per_insert_block=100 select * from numbers_mt(10) settings max_partitions_per_insert_block=1; diff --git a/tests/queries/0_stateless/01600_detach_permanently.sh b/tests/queries/0_stateless/01600_detach_permanently.sh index c32a255448e..4b270265cdc 100755 --- a/tests/queries/0_stateless/01600_detach_permanently.sh +++ b/tests/queries/0_stateless/01600_detach_permanently.sh @@ -108,7 +108,7 @@ clickhouse_local "INSERT INTO db_ordinary.src SELECT * FROM numbers(10)" clickhouse_local "SELECT if(count() = 10, 'MV is working', 'MV failed') FROM db_ordinary.src_mv_with_inner" clickhouse_local "DETACH VIEW db_ordinary.src_mv_with_inner PERMANENTLY; INSERT INTO db_ordinary.src SELECT * FROM numbers(10)" --stacktrace -clickhouse_local "SELECT if(count() = 10, 'MV can be detached permanently', 'MV detach failed') FROM db_ordinary.src_mv_with_inner" 2>&1 | grep -c "db_ordinary.src_mv_with_inner doesn't exist" +clickhouse_local "SELECT if(count() = 10, 'MV can be detached permanently', 'MV detach failed') FROM db_ordinary.src_mv_with_inner" 2>&1 | grep -c "db_ordinary.src_mv_with_inner does not exist" ## Quite silly: ATTACH MATERIALIZED VIEW don't work with short syntax (w/o select), but i can attach it using ATTACH TABLE ... clickhouse_local "ATTACH TABLE db_ordinary.src_mv_with_inner" clickhouse_local "INSERT INTO db_ordinary.src SELECT * FROM numbers(10)" diff --git a/tests/queries/0_stateless/01600_parts_types_metrics_long.reference b/tests/queries/0_stateless/01600_parts_types_metrics_long.reference index 98fb6a68656..e8183f05f5d 100644 --- a/tests/queries/0_stateless/01600_parts_types_metrics_long.reference +++ b/tests/queries/0_stateless/01600_parts_types_metrics_long.reference @@ -1,4 +1,3 @@ 1 1 1 -1 diff --git a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh index 5f724e81042..3ffac772aa7 100755 --- a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage +# Tags: no-s3-storage, no-asan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -10,14 +10,14 @@ set -o pipefail # NOTE: database = $CLICKHOUSE_DATABASE is unwanted verify_sql="SELECT - (SELECT sumIf(value, metric = 'PartsInMemory'), sumIf(value, metric = 'PartsCompact'), sumIf(value, metric = 'PartsWide') FROM system.metrics) = - (SELECT countIf(part_type == 'InMemory'), countIf(part_type == 'Compact'), countIf(part_type == 'Wide') - FROM (SELECT part_type FROM system.parts UNION ALL SELECT part_type FROM system.projection_parts))" + (SELECT sumIf(value, metric = 'PartsCompact'), sumIf(value, metric = 'PartsWide') FROM system.metrics) = + (SELECT countIf(part_type = 'Compact'), countIf(part_type = 'Wide') + FROM (SELECT part_type FROM system.parts UNION ALL SELECT part_type FROM system.projection_parts))" # The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. # So, there is inherent race condition (especially in fasttest that runs tests in parallel). # -# But it should get expected result eventually. +# But it should get the expected result eventually. # In case of test failure, this code will do infinite loop and timeout. verify() { @@ -32,21 +32,16 @@ verify() } $CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS data_01600" -# InMemory - [0..5] # Compact - (5..10] # Wide - >10 -$CLICKHOUSE_CLIENT --query="CREATE TABLE data_01600 (part_type String, key Int) ENGINE = MergeTree PARTITION BY part_type ORDER BY key SETTINGS min_bytes_for_wide_part=0, min_rows_for_wide_part=10" - -# InMemory -$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'InMemory', number FROM system.numbers LIMIT 1" -verify +$CLICKHOUSE_CLIENT --query="CREATE TABLE data_01600 (part_type String, key Int) ENGINE = MergeTree PARTITION BY part_type ORDER BY key SETTINGS min_bytes_for_wide_part=0, min_rows_for_wide_part=10, index_granularity = 8192, index_granularity_bytes = '10Mi'" # Compact -$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Compact', number FROM system.numbers LIMIT 6 OFFSET 1" +$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Compact', number FROM system.numbers LIMIT 6" verify # Wide -$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Wide', number FROM system.numbers LIMIT 11 OFFSET 7" +$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Wide', number FROM system.numbers LIMIT 11 OFFSET 6" verify # DROP and check diff --git a/tests/queries/0_stateless/01601_custom_tld.reference b/tests/queries/0_stateless/01601_custom_tld.reference index 7ef6eb7d3a2..c4750585eca 100644 --- a/tests/queries/0_stateless/01601_custom_tld.reference +++ b/tests/queries/0_stateless/01601_custom_tld.reference @@ -26,11 +26,11 @@ select cutToFirstSignificantSubdomainCustom('foo.kernel.biz.ss', 'public_suffix_ kernel.biz.ss select '-- difference'; -- difference --- biz.ss is not in the default TLD list, hence: -select cutToFirstSignificantSubdomain('foo.kernel.biz.ss'); -- biz.ss -biz.ss -select cutToFirstSignificantSubdomainCustom('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel.biz.ss -kernel.biz.ss +-- dev.ss is not in the default TLD list, hence: +select cutToFirstSignificantSubdomain('foo.kernel.dev.ss'); -- dev.ss +dev.ss +select cutToFirstSignificantSubdomainCustom('foo.kernel.dev.ss', 'public_suffix_list'); -- kernel.dev.ss +dev.ss select '-- 3+level'; -- 3+level select cutToFirstSignificantSubdomainCustom('xx.blogspot.co.at', 'public_suffix_list'); -- xx.blogspot.co.at @@ -115,11 +115,11 @@ select cutToFirstSignificantSubdomainCustomRFC('foo.kernel.biz.ss', 'public_suff kernel.biz.ss select '-- difference'; -- difference --- biz.ss is not in the default TLD list, hence: -select cutToFirstSignificantSubdomainRFC('foo.kernel.biz.ss'); -- biz.ss -biz.ss -select cutToFirstSignificantSubdomainCustomRFC('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel.biz.ss -kernel.biz.ss +-- dev.ss is not in the default TLD list, hence: +select cutToFirstSignificantSubdomainRFC('foo.kernel.dev.ss'); -- dev.ss +dev.ss +select cutToFirstSignificantSubdomainCustomRFC('foo.kernel.dev.ss', 'public_suffix_list'); -- kernel.dev.ss +dev.ss select '-- 3+level'; -- 3+level select cutToFirstSignificantSubdomainCustomRFC('xx.blogspot.co.at', 'public_suffix_list'); -- xx.blogspot.co.at diff --git a/tests/queries/0_stateless/01601_custom_tld.sql.j2 b/tests/queries/0_stateless/01601_custom_tld.sql.j2 index 1e0982ea1b7..272d5fccd63 100644 --- a/tests/queries/0_stateless/01601_custom_tld.sql.j2 +++ b/tests/queries/0_stateless/01601_custom_tld.sql.j2 @@ -18,9 +18,9 @@ select firstSignificantSubdomainCustom{{ suffix }}('foo.kernel.biz.ss', 'public_ select cutToFirstSignificantSubdomainCustom{{ suffix }}('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel.biz.ss select '-- difference'; --- biz.ss is not in the default TLD list, hence: -select cutToFirstSignificantSubdomain{{ suffix }}('foo.kernel.biz.ss'); -- biz.ss -select cutToFirstSignificantSubdomainCustom{{ suffix }}('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel.biz.ss +-- dev.ss is not in the default TLD list, hence: +select cutToFirstSignificantSubdomain{{ suffix }}('foo.kernel.dev.ss'); -- dev.ss +select cutToFirstSignificantSubdomainCustom{{ suffix }}('foo.kernel.dev.ss', 'public_suffix_list'); -- kernel.dev.ss select '-- 3+level'; select cutToFirstSignificantSubdomainCustom{{ suffix }}('xx.blogspot.co.at', 'public_suffix_list'); -- xx.blogspot.co.at diff --git a/tests/queries/0_stateless/01601_proxy_protocol.reference b/tests/queries/0_stateless/01601_proxy_protocol.reference index a5c19667710..2201a1c6c08 100644 --- a/tests/queries/0_stateless/01601_proxy_protocol.reference +++ b/tests/queries/0_stateless/01601_proxy_protocol.reference @@ -1 +1,3 @@ Hello, world +Hello, world +Hello, world diff --git a/tests/queries/0_stateless/01601_proxy_protocol.sh b/tests/queries/0_stateless/01601_proxy_protocol.sh index 5f4ec6cc597..5c51a40a2c7 100755 --- a/tests/queries/0_stateless/01601_proxy_protocol.sh +++ b/tests/queries/0_stateless/01601_proxy_protocol.sh @@ -6,4 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# Old clickhouse-client (with version 23.8-) sends "ClickHouse client" and then "ClickHouse" or "ClickHouse ". +# For backward compatibility purposes, the server accepts both variants. +printf "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\21ClickHouse client\24\r\253\251\3\0\7default\0\4\1\0\1\0\0\t0.0.0.0:0\1\tmilovidov\21milovidov-desktop\nClickHouse\24\r\253\251\3\0\1\0\0\0\2\1\25SELECT 'Hello, world'\2\0\247\203\254l\325\\z|\265\254F\275\333\206\342\24\202\24\0\0\0\n\0\0\0\240\1\0\2\377\377\377\377\0\0\0" | nc "${CLICKHOUSE_HOST}" "${CLICKHOUSE_PORT_TCP_WITH_PROXY}" | head -c150 | grep --text -o -F 'Hello, world' printf "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\21ClickHouse client\24\r\253\251\3\0\7default\0\4\1\0\1\0\0\t0.0.0.0:0\1\tmilovidov\21milovidov-desktop\vClickHouse \24\r\253\251\3\0\1\0\0\0\2\1\25SELECT 'Hello, world'\2\0\247\203\254l\325\\z|\265\254F\275\333\206\342\24\202\24\0\0\0\n\0\0\0\240\1\0\2\377\377\377\377\0\0\0" | nc "${CLICKHOUSE_HOST}" "${CLICKHOUSE_PORT_TCP_WITH_PROXY}" | head -c150 | grep --text -o -F 'Hello, world' +printf "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\21ClickHouse client\24\r\253\251\3\0\7default\0\4\1\0\1\0\0\t0.0.0.0:0\1\tmilovidov\21milovidov-desktop\21ClickHouse client\24\r\253\251\3\0\1\0\0\0\2\1\25SELECT 'Hello, world'\2\0\247\203\254l\325\\z|\265\254F\275\333\206\342\24\202\24\0\0\0\n\0\0\0\240\1\0\2\377\377\377\377\0\0\0" | nc "${CLICKHOUSE_HOST}" "${CLICKHOUSE_PORT_TCP_WITH_PROXY}" | head -c150 | grep --text -o -F 'Hello, world' diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql index 0c8f80f2b63..1cf52c0288b 100644 --- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql +++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql @@ -1,9 +1,11 @@ -- Tags: no-tsan -- Tag no-tsan: Too long for TSan +set enable_filesystem_cache=0; +set enable_filesystem_cache_on_write_operations=0; drop table if exists t; -create table t (x UInt64, s String) engine = MergeTree order by x; +create table t (x UInt64, s String) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t SELECT number, if(number < (8129 * 1024), arrayStringConcat(arrayMap(x -> toString(x), range(number % 128)), ' '), '') diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 4623c456475..f12a61055c4 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -1,4 +1,5 @@ --- Tags: no-random-merge-tree-settings +-- Tags: no-random-merge-tree-settings, no-tsan, no-debug +-- no-tsan: too slow SET use_uncompressed_cache = 0; SET allow_prefetched_read_pool_for_remote_filesystem=0; diff --git a/tests/queries/0_stateless/01606_merge_from_wide_to_compact.sql b/tests/queries/0_stateless/01606_merge_from_wide_to_compact.sql index 0f2fbcaa76d..de3b79eec76 100644 --- a/tests/queries/0_stateless/01606_merge_from_wide_to_compact.sql +++ b/tests/queries/0_stateless/01606_merge_from_wide_to_compact.sql @@ -5,7 +5,8 @@ CREATE TABLE wide_to_comp (a Int, b Int, c Int) settings vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1, min_bytes_for_wide_part = 0, - min_rows_for_wide_part = 0; + min_rows_for_wide_part = 0, + index_granularity = 8192, index_granularity_bytes = '10Mi'; SYSTEM STOP merges wide_to_comp; diff --git a/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql b/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql index b45a1974611..f4afcb8d55e 100644 --- a/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql +++ b/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql @@ -1,4 +1,4 @@ --- Tags: long, no-upgrade-check +-- Tags: long DROP TABLE IF EXISTS test_01640; DROP TABLE IF EXISTS restore_01640; diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index be42a656c66..54ca55d2068 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -168,19 +168,23 @@ Filter 3 > one condition of filter is pushed down before LEFT JOIN Join +Join Filter column: notEquals(number, 1) Join > (analyzer) one condition of filter is pushed down before LEFT JOIN Join +Join Filter column: notEquals(number_0, 1_UInt8) 0 0 3 3 > one condition of filter is pushed down before INNER JOIN Join +Join Filter column: notEquals(number, 1) Join > (analyzer) one condition of filter is pushed down before INNER JOIN Join +Join Filter column: notEquals(number_0, 1_UInt8) 3 3 > filter is pushed down before UNION diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.reference b/tests/queries/0_stateless/01683_text_log_deadlock.reference index 76de47c4a46..4cf61460252 100644 --- a/tests/queries/0_stateless/01683_text_log_deadlock.reference +++ b/tests/queries/0_stateless/01683_text_log_deadlock.reference @@ -1 +1 @@ -queries 25000 +queries: 25000 diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.sh b/tests/queries/0_stateless/01683_text_log_deadlock.sh index debb7a0017f..af7f348a6a2 100755 --- a/tests/queries/0_stateless/01683_text_log_deadlock.sh +++ b/tests/queries/0_stateless/01683_text_log_deadlock.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_BENCHMARK --secure -i 25000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries 25000' +$CLICKHOUSE_BENCHMARK --secure -i 25000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries: 25000' diff --git a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference index 682652152dc..de5a62159ef 100644 --- a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference +++ b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference @@ -13,6 +13,7 @@ SELECT covarPop(1, 1), covarSamp(1, 1), currentDatabase(), + currentDatabase(), dateDiff('DAY', toDate('2020-10-24'), toDate('2019-10-24')), exp(1), arrayFlatten([[1]]), diff --git a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql index 9b35087182c..dda2e045e76 100644 --- a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql +++ b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql @@ -1 +1 @@ -EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH('1'), CHARACTER_LENGTH('1'), COALESCE(1), CONCAT('1', '1'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), DATEDIFF('DAY', toDate('2020-10-24'), toDate('2019-10-24')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE('A'), LEAST(1), LENGTH('1'), LN(1), LOCATE('1', '1'), LOG(1), LOG10(1), LOG2(1), LOWER('A'), MAX(1), MID('123', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION('123', '2'), POW(1, 1), POWER(1, 1), RAND(), REPLACE('1', '1', '2'), REVERSE('123'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR('123', 2), SUBSTRING('123', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE('A'), UPPER('A'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate('2020-10-24')), YEARWEEK(toDate('2020-10-24')) format TSVRaw; +EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH('1'), CHARACTER_LENGTH('1'), COALESCE(1), CONCAT('1', '1'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), DATEDIFF('DAY', toDate('2020-10-24'), toDate('2019-10-24')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE('A'), LEAST(1), LENGTH('1'), LN(1), LOCATE('1', '1'), LOG(1), LOG10(1), LOG2(1), LOWER('A'), MAX(1), MID('123', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION('123', '2'), POW(1, 1), POWER(1, 1), RAND(), REPLACE('1', '1', '2'), REVERSE('123'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR('123', 2), SUBSTRING('123', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE('A'), UPPER('A'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate('2020-10-24')), YEARWEEK(toDate('2020-10-24')) format TSVRaw; diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.reference b/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.reference new file mode 100644 index 00000000000..25aa9dc5dec --- /dev/null +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.reference @@ -0,0 +1,2 @@ +3 +950 990 500 2000 diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.sql b/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.sql new file mode 100644 index 00000000000..e023c0991b3 --- /dev/null +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.sql @@ -0,0 +1,31 @@ +DROP TABLE IF EXISTS r; + +select finalizeAggregation(cast(quantileState(0)(arrayJoin([1,2,3])) as AggregateFunction(quantile(1), UInt8))); + +CREATE TABLE r ( + x String, + a LowCardinality(String), + q AggregateFunction(quantilesTiming(0.5, 0.95, 0.99), Int64), + s Int64, + PROJECTION p + (SELECT a, quantilesTimingMerge(0.5, 0.95, 0.99)(q), sum(s) GROUP BY a) +) Engine=SummingMergeTree order by (x, a); + +insert into r +select number%100 x, + 'x' a, + quantilesTimingState(0.5, 0.95, 0.99)(number::Int64) q, + sum(1) s +from numbers(1000) +group by x,a; + +SELECT + ifNotFinite(quantilesTimingMerge(0.95)(q)[1],0) as d1, + ifNotFinite(quantilesTimingMerge(0.99)(q)[1],0) as d2, + ifNotFinite(quantilesTimingMerge(0.50)(q)[1],0) as d3, + sum(s) +FROM cluster('test_cluster_two_shards', currentDatabase(), r) +WHERE a = 'x' +settings prefer_localhost_replica=0; + +DROP TABLE r; diff --git a/tests/queries/0_stateless/01710_normal_projection_format.reference b/tests/queries/0_stateless/01710_normal_projection_format.reference new file mode 100644 index 00000000000..318c7253724 --- /dev/null +++ b/tests/queries/0_stateless/01710_normal_projection_format.reference @@ -0,0 +1 @@ +CREATE TABLE default.test\n(\n `uuid` FixedString(16),\n `id` Int32,\n `ns` FixedString(16),\n `dt` DateTime64(6),\n PROJECTION mtlog_proj_source_reference\n (\n SELECT *\n ORDER BY substring(ns, 1, 5)\n )\n)\nENGINE = MergeTree\nORDER BY (id, dt, uuid)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01710_normal_projection_format.sql b/tests/queries/0_stateless/01710_normal_projection_format.sql new file mode 100644 index 00000000000..6210be166b3 --- /dev/null +++ b/tests/queries/0_stateless/01710_normal_projection_format.sql @@ -0,0 +1,17 @@ +DROP TABLE if exists test; + +CREATE TABLE test +( + uuid FixedString(16), + id int, + ns FixedString(16), + dt DateTime64(6), +) +ENGINE = MergeTree +ORDER BY (id, dt, uuid); + +ALTER TABLE test ADD PROJECTION mtlog_proj_source_reference (SELECT * ORDER BY substring(ns, 1, 5)); + +SHOW CREATE test; + +drop table test; diff --git a/tests/queries/0_stateless/01715_background_checker_blather_zookeeper_long.sql b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper_long.sql index 87e1a039488..32481be1bcd 100644 --- a/tests/queries/0_stateless/01715_background_checker_blather_zookeeper_long.sql +++ b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper_long.sql @@ -18,6 +18,7 @@ DETACH TABLE i20203_1; ATTACH TABLE i20203_2; -- sleep 10 seconds +SET function_sleep_max_microseconds_per_block = 10000000; SELECT number from numbers(10) where sleepEachRow(1) Format Null; SELECT num_tries < 50 diff --git a/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh index d83656e0e8c..adab3906e5b 100755 --- a/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh +++ b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh @@ -54,7 +54,7 @@ if ! $CLICKHOUSE_CLIENT_BINARY --host 127.1 --port "$server_port" --format Null fi query_id="$CLICKHOUSE_DATABASE-$SECONDS" -$CLICKHOUSE_CLIENT_BINARY --query_id "$query_id" --host 127.1 --port "$server_port" --format Null -q 'select sleepEachRow(1) from numbers(10)' 2>/dev/null & +$CLICKHOUSE_CLIENT_BINARY --query_id "$query_id" --host 127.1 --port "$server_port" --format Null --function_sleep_max_microseconds_per_block 0 -q 'select sleepEachRow(1) from numbers(10)' 2>/dev/null & client_pid=$! # wait until the query will appear in processlist (max 10 second) diff --git a/tests/queries/0_stateless/01739_index_hint.reference b/tests/queries/0_stateless/01739_index_hint.reference index 766dff8c7b0..21673bf698b 100644 --- a/tests/queries/0_stateless/01739_index_hint.reference +++ b/tests/queries/0_stateless/01739_index_hint.reference @@ -23,12 +23,12 @@ select * from tbl WHERE indexHint(p in (select toInt64(number) - 2 from numbers( 0 3 0 drop table tbl; drop table if exists XXXX; -create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=128; +create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=128, index_granularity_bytes = '10Mi'; insert into XXXX select number*60, 0 from numbers(100000); SELECT sum(t) FROM XXXX WHERE indexHint(t = 42); 487680 drop table if exists XXXX; -create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=8192; +create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=8192, index_granularity_bytes = '10Mi'; insert into XXXX select number*60, 0 from numbers(100000); SELECT count() FROM XXXX WHERE indexHint(t = toDateTime(0)) SETTINGS optimize_use_implicit_projections = 1; 100000 diff --git a/tests/queries/0_stateless/01739_index_hint.sql b/tests/queries/0_stateless/01739_index_hint.sql index 77c2760535d..cde46a5a2bf 100644 --- a/tests/queries/0_stateless/01739_index_hint.sql +++ b/tests/queries/0_stateless/01739_index_hint.sql @@ -18,7 +18,7 @@ drop table tbl; drop table if exists XXXX; -create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=128; +create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=128, index_granularity_bytes = '10Mi'; insert into XXXX select number*60, 0 from numbers(100000); @@ -26,7 +26,7 @@ SELECT sum(t) FROM XXXX WHERE indexHint(t = 42); drop table if exists XXXX; -create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=8192; +create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=8192, index_granularity_bytes = '10Mi'; insert into XXXX select number*60, 0 from numbers(100000); diff --git a/tests/queries/0_stateless/01746_test_for_tupleElement_must_be_constant_issue.sql b/tests/queries/0_stateless/01746_test_for_tupleElement_must_be_constant_issue.sql index 72ba6a036df..585640665d1 100644 --- a/tests/queries/0_stateless/01746_test_for_tupleElement_must_be_constant_issue.sql +++ b/tests/queries/0_stateless/01746_test_for_tupleElement_must_be_constant_issue.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS ttt01746; -CREATE TABLE ttt01746 (d Date, n UInt64) ENGINE = MergeTree() PARTITION BY toMonday(d) ORDER BY n; +CREATE TABLE ttt01746 (d Date, n UInt64) ENGINE = MergeTree() PARTITION BY toMonday(d) ORDER BY n SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO ttt01746 SELECT toDate('2021-02-14') + (number % 30) AS d, number AS n FROM numbers(1500000); set optimize_move_to_prewhere=0; SELECT arraySort(x -> x.2, [tuple('a', 10)]) AS X FROM ttt01746 WHERE d >= toDate('2021-03-03') - 2 ORDER BY n LIMIT 1; diff --git a/tests/queries/bugs/01747_system_session_log_long.reference b/tests/queries/0_stateless/01747_system_session_log_long.reference similarity index 73% rename from tests/queries/bugs/01747_system_session_log_long.reference rename to tests/queries/0_stateless/01747_system_session_log_long.reference index 9ecf7e05421..e4f0b6f6076 100644 --- a/tests/queries/bugs/01747_system_session_log_long.reference +++ b/tests/queries/0_stateless/01747_system_session_log_long.reference @@ -4,215 +4,291 @@ TCP endpoint TCP 'wrong password' case is skipped for no_password. HTTP endpoint HTTP 'wrong password' case is skipped for no_password. -MySQL endpoint +HTTP endpoint with named session +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint no_password +Wrong username +Wrong password MySQL 'wrong password' case is skipped for no_password. +PostrgreSQL endpoint +PostgreSQL 'wrong password' case is skipped for no_password. # no_password - No profiles no roles TCP endpoint TCP 'wrong password' case is skipped for no_password. HTTP endpoint HTTP 'wrong password' case is skipped for no_password. -MySQL endpoint +HTTP endpoint with named session +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint no_password +Wrong username +Wrong password MySQL 'wrong password' case is skipped for no_password. +PostrgreSQL endpoint +PostgreSQL 'wrong password' case is skipped for no_password. # no_password - Two profiles, no roles TCP endpoint TCP 'wrong password' case is skipped for no_password. HTTP endpoint HTTP 'wrong password' case is skipped for no_password. -MySQL endpoint +HTTP endpoint with named session +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint no_password +Wrong username +Wrong password MySQL 'wrong password' case is skipped for no_password. +PostrgreSQL endpoint +PostgreSQL 'wrong password' case is skipped for no_password. # no_password - Two profiles and two simple roles TCP endpoint TCP 'wrong password' case is skipped for no_password. HTTP endpoint HTTP 'wrong password' case is skipped for no_password. -MySQL endpoint +HTTP endpoint with named session +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint no_password +Wrong username +Wrong password MySQL 'wrong password' case is skipped for no_password. +PostrgreSQL endpoint +PostgreSQL 'wrong password' case is skipped for no_password. # plaintext_password - No profiles no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint plaintext_password +Wrong username +Wrong password +PostrgreSQL endpoint # plaintext_password - Two profiles, no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint plaintext_password +Wrong username +Wrong password +PostrgreSQL endpoint # plaintext_password - Two profiles and two simple roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint plaintext_password +Wrong username +Wrong password +PostrgreSQL endpoint # sha256_password - No profiles no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint sha256_password MySQL 'successful login' case is skipped for sha256_password. +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for sha256_password # sha256_password - Two profiles, no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint sha256_password MySQL 'successful login' case is skipped for sha256_password. +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for sha256_password # sha256_password - Two profiles and two simple roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint sha256_password MySQL 'successful login' case is skipped for sha256_password. +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for sha256_password # double_sha1_password - No profiles no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint double_sha1_password +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for double_sha1_password # double_sha1_password - Two profiles, no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint double_sha1_password +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for double_sha1_password # double_sha1_password - Two profiles and two simple roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint double_sha1_password +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for double_sha1_password ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL Logout 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL Logout 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP LoginFailure 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP Logout 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginFailure 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginFailure many +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginSuccess many +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP Logout many ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL LoginFailure many ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL Logout 1 ${BASE_USERNAME}_no_password_no_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_no_password_no_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_no_password_no_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_no_password_no_profiles_no_roles MySQL Logout 1 ${BASE_USERNAME}_no_password_two_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_no_password_two_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_no_password_two_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_no_password_two_profiles_no_roles MySQL Logout 1 ${BASE_USERNAME}_no_password_two_profiles_two_roles TCP LoginSuccess 1 ${BASE_USERNAME}_no_password_two_profiles_two_roles TCP Logout 1 -${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP LoginSuccess many +${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP Logout many ${BASE_USERNAME}_no_password_two_profiles_two_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_no_password_two_profiles_two_roles MySQL Logout 1 ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL Logout 1 +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles PostgreSQL LoginFailure many ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles PostgreSQL LoginFailure many ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP LoginFailure 1 ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP Logout 1 -${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginFailure 1 -${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginFailure many +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginSuccess many +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP Logout many ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL LoginFailure many ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles PostgreSQL LoginFailure many ${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_sha256_password_no_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_sha256_password_two_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP LoginFailure 1 ${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP LoginSuccess 1 ${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP Logout 1 -${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginFailure 1 -${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginFailure many +${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginSuccess many +${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP Logout many ${BASE_USERNAME}_sha256_password_two_profiles_two_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles MySQL LoginFailure many invalid_session_log_test_xml_user TCP LoginFailure 1 -invalid_session_log_test_xml_user HTTP LoginFailure 1 +invalid_session_log_test_xml_user HTTP LoginFailure many invalid_session_log_test_xml_user MySQL LoginFailure many +invalid_session_log_test_xml_user PostgreSQL LoginFailure many session_log_test_xml_user TCP LoginSuccess 1 session_log_test_xml_user TCP Logout 1 -session_log_test_xml_user HTTP LoginSuccess 1 -session_log_test_xml_user HTTP Logout 1 +session_log_test_xml_user HTTP LoginSuccess many +session_log_test_xml_user HTTP Logout many session_log_test_xml_user MySQL LoginSuccess 1 session_log_test_xml_user MySQL Logout 1 diff --git a/tests/queries/bugs/01747_system_session_log_long.sh b/tests/queries/0_stateless/01747_system_session_log_long.sh similarity index 75% rename from tests/queries/bugs/01747_system_session_log_long.sh rename to tests/queries/0_stateless/01747_system_session_log_long.sh index 9b127e0b48d..3ad33c28ee1 100755 --- a/tests/queries/bugs/01747_system_session_log_long.sh +++ b/tests/queries/0_stateless/01747_system_session_log_long.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash # Tags: long, no-parallel, no-fasttest -# Tag no-fasttest: Accesses CH via mysql table function (which is unavailable) ################################################################################################## # Verify that login, logout, and login failure events are properly stored in system.session_log @@ -11,9 +10,8 @@ # Using multiple protocols # * native TCP protocol with CH client # * HTTP with CURL -# * MySQL - CH server accesses itself via mysql table function, query typically fails (unrelated) -# but auth should be performed properly. -# * PostgreSQL - CH server accesses itself via postgresql table function (currently out of order). +# * MySQL - CH server accesses itself via mysql table function. +# * PostgreSQL - CH server accesses itself via postgresql table function, but can't execute query (No LOGIN SUCCESS entry). # * gRPC - not done yet # # There is way to control how many time a query (e.g. via mysql table function) is retried @@ -35,8 +33,10 @@ set -eu # Since there is no way to cleanup system.session_log table, # make sure that we can identify log entries from this test by a random user name. -readonly BASE_USERNAME="session_log_test_user_$(cat /dev/urandom | tr -cd 'a-f0-9' | head -c 32)" -readonly TMP_QUERY_FILE=$(mktemp /tmp/tmp_query.log.XXXXXX) +BASE_USERNAME="session_log_test_user_$(tr -cd 'a-f0-9' < /dev/urandom | head -c 32)" +readonly BASE_USERNAME +TMP_QUERY_FILE=$(mktemp /tmp/tmp_query.log.XXXXXX) +readonly TMP_QUERY_FILE declare -a ALL_USERNAMES ALL_USERNAMES+=("${BASE_USERNAME}") @@ -53,7 +53,7 @@ function reportError() function executeQuery() { - ## Execute query (provided via heredoc or herestring) and print query in case of error. + # Execute query (provided via heredoc or herestring) and print query in case of error. trap 'rm -f ${TMP_QUERY_FILE}; trap - ERR RETURN' RETURN # Since we want to report with current values supplied to this function call # shellcheck disable=SC2064 @@ -82,7 +82,7 @@ trap "cleanup" EXIT function executeQueryExpectError() { cat - > "${TMP_QUERY_FILE}" - ! ${CLICKHOUSE_CLIENT} "${@}" --multiquery --queries-file "${TMP_QUERY_FILE}" 2>&1 | tee -a ${TMP_QUERY_FILE} + ! ${CLICKHOUSE_CLIENT} --multiquery --queries-file "${TMP_QUERY_FILE}" "${@}" 2>&1 | tee -a "${TMP_QUERY_FILE}" } function createUser() @@ -97,7 +97,8 @@ function createUser() elif [[ "${auth_type}" == "plaintext_password" ]] then - password="${password}" + # password="${password}" + : elif [[ "${auth_type}" == "sha256_password" ]] then @@ -121,6 +122,8 @@ function createUser() executeQuery < /dev/null 2> /dev/null; done & +for _ in {0..100}; do echo "INSERT INTO join_table_race VALUES ($RANDOM, '$RANDOM');"; done | $CLICKHOUSE_CLIENT --ignore-error -nm > /dev/null 2> /dev/null & -for _ in {0..200}; do $CLICKHOUSE_CLIENT -q "SELECT count() FROM join_table_race FORMAT Null" > /dev/null 2> /dev/null; done & +for _ in {0..200}; do echo "SELECT count() FROM join_table_race FORMAT Null;"; done | $CLICKHOUSE_CLIENT --ignore-error -nm > /dev/null 2> /dev/null & -for _ in {0..100}; do $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE join_table_race" > /dev/null 2> /dev/null; done & +for _ in {0..100}; do echo "TRUNCATE TABLE join_table_race;"; done | $CLICKHOUSE_CLIENT --ignore-error -nm > /dev/null 2> /dev/null & -for _ in {0..100}; do $CLICKHOUSE_CLIENT -q "ALTER TABLE join_table_race DELETE WHERE id % 2 = 0" > /dev/null 2> /dev/null; done & +for _ in {0..100}; do echo "ALTER TABLE join_table_race DELETE WHERE id % 2 = 0;"; done | $CLICKHOUSE_CLIENT --ignore-error -nm > /dev/null 2> /dev/null & wait diff --git a/tests/queries/0_stateless/01825_type_json_multiple_files.reference b/tests/queries/0_stateless/01825_type_json_multiple_files.reference index b887abc8590..31b10035614 100644 --- a/tests/queries/0_stateless/01825_type_json_multiple_files.reference +++ b/tests/queries/0_stateless/01825_type_json_multiple_files.reference @@ -5,9 +5,9 @@ {"data":{"k0":0,"k1":0,"k2":0,"k3":0,"k4":100,"k5":0}} {"data":{"k0":0,"k1":0,"k2":0,"k3":0,"k4":0,"k5":100}} Tuple(k0 Int8, k1 Int8, k2 Int8, k3 Int8, k4 Int8, k5 Int8) -{"data":{"k0":100,"k1":0,"k2":0}} -{"data":{"k0":0,"k1":100,"k2":0}} {"data":{"k0":0,"k1":0,"k2":100}} +{"data":{"k0":0,"k1":100,"k2":0}} +{"data":{"k0":100,"k1":0,"k2":0}} Tuple(k0 Int8, k1 Int8, k2 Int8) {"data":{"k1":100,"k3":0}} {"data":{"k1":0,"k3":100}} diff --git a/tests/queries/0_stateless/01825_type_json_multiple_files.sh b/tests/queries/0_stateless/01825_type_json_multiple_files.sh index 91e7ffb5edf..089b7991784 100755 --- a/tests/queries/0_stateless/01825_type_json_multiple_files.sh +++ b/tests/queries/0_stateless/01825_type_json_multiple_files.sh @@ -28,7 +28,7 @@ ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files \ SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON') \ ORDER BY _file LIMIT 3" --max_threads 1 --min_insert_block_size_rows 1 --max_insert_block_size 1 --max_block_size 1 --allow_experimental_object_type 1 -${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 +${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file, data FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" ${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE IF EXISTS t_json_files" diff --git a/tests/queries/0_stateless/01825_type_json_sparse.sql b/tests/queries/0_stateless/01825_type_json_sparse.sql index 343013cb3da..cc7c66382a3 100644 --- a/tests/queries/0_stateless/01825_type_json_sparse.sql +++ b/tests/queries/0_stateless/01825_type_json_sparse.sql @@ -7,7 +7,7 @@ SET allow_experimental_object_type = 1; CREATE TABLE t_json_sparse (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 0.1, -min_bytes_for_wide_part = 0; +min_bytes_for_wide_part = 0, index_granularity = 8192, index_granularity_bytes = '10Mi'; SYSTEM STOP MERGES t_json_sparse; diff --git a/tests/queries/0_stateless/01848_http_insert_segfault.sh b/tests/queries/0_stateless/01848_http_insert_segfault.sh index 1f2e9eebcdc..6397068a77a 100755 --- a/tests/queries/0_stateless/01848_http_insert_segfault.sh +++ b/tests/queries/0_stateless/01848_http_insert_segfault.sh @@ -6,4 +6,4 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_LOCAL} --query "select col1, initializeAggregation('argMaxState', col2, insertTime) as col2, now() as insertTime FROM generateRandom('col1 String, col2 Array(Float64)') LIMIT 1000000 FORMAT CSV" | ${CLICKHOUSE_CURL} -s 'http://localhost:8123/?query=INSERT%20INTO%20non_existing_table%20SELECT%20col1%2C%20initializeAggregation(%27argMaxState%27%2C%20col2%2C%20insertTime)%20as%20col2%2C%20now()%20as%20insertTime%20FROM%20input(%27col1%20String%2C%20col2%20Array(Float64)%27)%20FORMAT%20CSV' --data-binary @- | grep -q "Table default.non_existing_table doesn't exist" && echo 'Ok.' || echo 'FAIL' ||: +${CLICKHOUSE_LOCAL} --query "select col1, initializeAggregation('argMaxState', col2, insertTime) as col2, now() as insertTime FROM generateRandom('col1 String, col2 Array(Float64)') LIMIT 1000000 FORMAT CSV" | ${CLICKHOUSE_CURL} -s 'http://localhost:8123/?query=INSERT%20INTO%20non_existing_table%20SELECT%20col1%2C%20initializeAggregation(%27argMaxState%27%2C%20col2%2C%20insertTime)%20as%20col2%2C%20now()%20as%20insertTime%20FROM%20input(%27col1%20String%2C%20col2%20Array(Float64)%27)%20FORMAT%20CSV' --data-binary @- | grep -q "Table default.non_existing_table does not exist" && echo 'Ok.' || echo 'FAIL' ||: diff --git a/tests/queries/0_stateless/01861_explain_pipeline.reference b/tests/queries/0_stateless/01861_explain_pipeline.reference index 427b3eaefc0..d2370755a84 100644 --- a/tests/queries/0_stateless/01861_explain_pipeline.reference +++ b/tests/queries/0_stateless/01861_explain_pipeline.reference @@ -4,7 +4,7 @@ ExpressionTransform ExpressionTransform ReplacingSorted 2 → 1 ExpressionTransform × 2 - MergeTreeInOrder × 2 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 0 0 1 1 2 2 @@ -20,9 +20,9 @@ ExpressionTransform × 2 FilterSortedStreamByRange Description: filter values in [(5), +inf) ExpressionTransform - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 ReplacingSorted 2 → 1 FilterSortedStreamByRange × 2 Description: filter values in [-inf, (5)) ExpressionTransform × 2 - MergeTreeInOrder × 2 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 diff --git a/tests/queries/0_stateless/01861_explain_pipeline.sql b/tests/queries/0_stateless/01861_explain_pipeline.sql index aafecf57af1..93c82b6e265 100644 --- a/tests/queries/0_stateless/01861_explain_pipeline.sql +++ b/tests/queries/0_stateless/01861_explain_pipeline.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS test; -CREATE TABLE test(a Int, b Int) Engine=ReplacingMergeTree order by a; +CREATE TABLE test(a Int, b Int) Engine=ReplacingMergeTree order by a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO test select number, number from numbers(5); INSERT INTO test select number, number from numbers(5,2); set max_threads =1; diff --git a/tests/queries/0_stateless/01880_remote_ipv6.sql b/tests/queries/0_stateless/01880_remote_ipv6.sql index 057b3ad7ec6..7f15449e556 100644 --- a/tests/queries/0_stateless/01880_remote_ipv6.sql +++ b/tests/queries/0_stateless/01880_remote_ipv6.sql @@ -10,3 +10,14 @@ SELECT * FROM remote('::1', system.one) FORMAT Null; -- { serverError 36 } SELECT * FROM remote('[::1][::1]', system.one) FORMAT Null; -- { serverError 36 } SELECT * FROM remote('[::1][::1', system.one) FORMAT Null; -- { serverError 36 } SELECT * FROM remote('[::1]::1]', system.one) FORMAT Null; -- { serverError 36 } + +SELECT * FROM remote('[::1]') FORMAT Null; +SELECT * FROM remote('[::1]:9000') FORMAT Null; + +SELECT * FROM remote('[::1') FORMAT Null; -- { serverError 36 } +SELECT * FROM remote('::1]') FORMAT Null; -- { serverError 36 } +SELECT * FROM remote('::1') FORMAT Null; -- { serverError 36 } + +SELECT * FROM remote('[::1][::1]') FORMAT Null; -- { serverError 36 } +SELECT * FROM remote('[::1][::1') FORMAT Null; -- { serverError 36 } +SELECT * FROM remote('[::1]::1]') FORMAT Null; -- { serverError 36 } diff --git a/tests/queries/0_stateless/01889_sql_json_functions.reference b/tests/queries/0_stateless/01889_sql_json_functions.reference index cb8e19ea2a0..407b126c4af 100644 --- a/tests/queries/0_stateless/01889_sql_json_functions.reference +++ b/tests/queries/0_stateless/01889_sql_json_functions.reference @@ -77,6 +77,10 @@ SELECT JSON_QUERY('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$. [0, 1, 4, 0, -1, -4] SELECT JSON_QUERY('{"1key":1}', '$.1key'); [1] +SELECT JSON_QUERY('{"123":1}', '$.123'); +[1] +SELECT JSON_QUERY('{"123":1}', '$[123]'); + SELECT JSON_QUERY('{"hello":1}', '$[hello]'); [1] SELECT JSON_QUERY('{"hello":1}', '$["hello"]'); diff --git a/tests/queries/0_stateless/01889_sql_json_functions.sql b/tests/queries/0_stateless/01889_sql_json_functions.sql index 947b0171ec6..0683203fcea 100644 --- a/tests/queries/0_stateless/01889_sql_json_functions.sql +++ b/tests/queries/0_stateless/01889_sql_json_functions.sql @@ -43,6 +43,8 @@ SELECT JSON_QUERY( '{hello:{"world":"!"}}}', '$.hello'); -- invalid json => defa SELECT JSON_QUERY('', '$.hello'); SELECT JSON_QUERY('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$.array[*][0 to 2, 4]'); SELECT JSON_QUERY('{"1key":1}', '$.1key'); +SELECT JSON_QUERY('{"123":1}', '$.123'); +SELECT JSON_QUERY('{"123":1}', '$[123]'); SELECT JSON_QUERY('{"hello":1}', '$[hello]'); SELECT JSON_QUERY('{"hello":1}', '$["hello"]'); SELECT JSON_QUERY('{"hello":1}', '$[\'hello\']'); diff --git a/tests/queries/0_stateless/01906_lc_in_bug.sql b/tests/queries/0_stateless/01906_lc_in_bug.sql index 581053e14e1..035e1fa155f 100644 --- a/tests/queries/0_stateless/01906_lc_in_bug.sql +++ b/tests/queries/0_stateless/01906_lc_in_bug.sql @@ -8,6 +8,6 @@ select count() as c, x in ('a', 'bb') as g from tab group by g order by c; drop table if exists tab; -- https://github.com/ClickHouse/ClickHouse/issues/44503 -CREATE TABLE test(key Int32) ENGINE = MergeTree ORDER BY (key); +CREATE TABLE test(key Int32) ENGINE = MergeTree ORDER BY (key) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into test select intDiv(number,100) from numbers(10000000); SELECT COUNT() FROM test WHERE key <= 100000 AND (NOT (toLowCardinality('') IN (SELECT ''))); diff --git a/tests/queries/0_stateless/01913_quantile_deterministic.sh b/tests/queries/0_stateless/01913_quantile_deterministic.sh index 5a2c7279678..a9c57a61c33 100755 --- a/tests/queries/0_stateless/01913_quantile_deterministic.sh +++ b/tests/queries/0_stateless/01913_quantile_deterministic.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS d" -${CLICKHOUSE_CLIENT} --query "CREATE TABLE d (oid UInt64) ENGINE = MergeTree ORDER BY oid" +${CLICKHOUSE_CLIENT} --query "CREATE TABLE d (oid UInt64) ENGINE = MergeTree ORDER BY oid SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" ${CLICKHOUSE_CLIENT} --min_insert_block_size_rows 0 --min_insert_block_size_bytes 0 --max_block_size 8192 --query "insert into d select * from numbers(1000000)" # In previous ClickHouse versions there was a mistake that makes quantileDeterministic functions not really deterministic (in edge cases). diff --git a/tests/queries/0_stateless/01921_test_progress_bar.py b/tests/queries/0_stateless/01921_test_progress_bar.py index 3b0b429d396..54c7ae59894 100755 --- a/tests/queries/0_stateless/01921_test_progress_bar.py +++ b/tests/queries/0_stateless/01921_test_progress_bar.py @@ -14,6 +14,7 @@ log = None with client(name="client1>", log=log) as client1: client1.expect(prompt) - client1.send("SELECT number FROM numbers(100) FORMAT Null") - client1.expect("Progress: 100\.00 rows, 800\.00 B.*" + end_of_block) - client1.expect("0 rows in set. Elapsed: [\\w]{1}\.[\\w]{3} sec." + end_of_block) + client1.send("SELECT number FROM numbers(1000) FORMAT Null") + client1.expect("Progress: 1\.00 thousand rows, 8\.00 KB .*" + end_of_block) + client1.expect("0 rows in set. Elapsed: [\\w]{1}\.[\\w]{3} sec.") + client1.expect("Peak memory usage: .*B" + end_of_block) diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh index ec5aa141859..4d7e79fae52 100755 --- a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = Memory;" # Rate limit is chosen for operation to spent more than one second. -seq 1 1000 | pv --quiet --rate-limit 1000 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" +seq 1 1000 | pv --quiet --rate-limit 500 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" # We check that the value of NetworkReceiveElapsedMicroseconds correctly includes the time spent waiting data from the client. ${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference index b9a7d17e955..d175d31846b 100644 --- a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference +++ b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference @@ -1,19 +1,19 @@ -- { echo } -explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; -Expression (Projection) - Limit (preliminary LIMIT (without OFFSET)) - Sorting (Merge sorted streams after aggregation stage for ORDER BY) +explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; +Expression + Limit + Sorting Union - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY))))) - ReadFromStorage (SystemNumbers) - ReadFromRemote (Read from remote replica) -explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; -Expression (Projection) - Limit (preliminary LIMIT (without OFFSET)) - Sorting (Merge sorted streams after aggregation stage for ORDER BY) + Sorting + Expression + ReadFromStorage + ReadFromRemote +explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; +Expression + Limit + Sorting Union - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY))))) - ReadFromStorage (SystemNumbers) - ReadFromRemote (Read from remote replica) + Sorting + Expression + ReadFromStorage + ReadFromRemote diff --git a/tests/queries/0_stateless/01951_distributed_push_down_limit.sql b/tests/queries/0_stateless/01951_distributed_push_down_limit.sql index 184e6321988..aee714a494e 100644 --- a/tests/queries/0_stateless/01951_distributed_push_down_limit.sql +++ b/tests/queries/0_stateless/01951_distributed_push_down_limit.sql @@ -3,5 +3,5 @@ set prefer_localhost_replica = 1; -- { echo } -explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; -explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; +explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; +explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; diff --git a/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.reference b/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.reference deleted file mode 100644 index 9e388b62601..00000000000 --- a/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.reference +++ /dev/null @@ -1,3 +0,0 @@ -Loaded 1 queries. -I/O error: Too many open files -70 diff --git a/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh b/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh deleted file mode 100755 index 7740ffcce7b..00000000000 --- a/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-ubsan -# Tag no-ubsan: Limits RLIMIT_NOFILE, see comment in the test - -# shellcheck disable=SC2086 - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# NOTE: Tests with limit for number of opened files cannot be run under UBsan. -# -# UBsan needs to create pipe each time it need to check the type: -# -# pipe() -# __sanitizer::IsAccessibleMemoryRange(unsigned long, unsigned long) -# __ubsan::checkDynamicType(void*, void*, unsigned long) + 271 -# HandleDynamicTypeCacheMiss(__ubsan::DynamicTypeCacheMissData*, unsigned long, unsigned long, __ubsan::ReportOptions) + 34 -# __ubsan_handle_dynamic_type_cache_miss_abort + 58 -# -# Obviously it will fail if RLIMIT_NOFILE exceeded (like in this test), and the UBsan will falsely report [1]: -# -# 01955_clickhouse_benchmark_connection_hang: [ FAIL ] 1.56 sec. - result differs with reference: -# --- /usr/share/clickhouse-test/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.reference 2021-07-21 11:14:58.000000000 +0300 -# +++ /tmp/clickhouse-test/0_stateless/01955_clickhouse_benchmark_connection_hang.stdout 2021-07-21 11:53:45.684050372 +0300 -# @@ -1,3 +1,22 @@ -# Loaded 1 queries. -# -I/O error: Too many open files -# -70 -# +../contrib/libcxx/include/memory:3212:19: runtime error: member call on address 0x00002939d5c0 which does not point to an object of type 'std::__1::__shared_weak_count' -# +0x00002939d5c0: note: object has invalid vptr -# + -# +==558==WARNING: Can't create a socket pair to start external symbolizer (errno: 24) -# +==558==WARNING: Can't create a socket pair to start external symbolizer (errno: 24) -# +==558==WARNING: Can't create a socket pair to start external symbolizer (errno: 24) -# +==558==WARNING: Can't create a socket pair to start external symbolizer (errno: 24) -# +==558==WARNING: Can't create a socket pair to start external symbolizer (errno: 24) -# +==558==WARNING: Failed to use and restart external symbolizer! -# + #0 0xfe86b57 (/usr/bin/clickhouse+0xfe86b57) -# + #1 0xfe83fd7 (/usr/bin/clickhouse+0xfe83fd7) -# + #2 0xfe89af4 (/usr/bin/clickhouse+0xfe89af4) -# + #3 0xfe81fa9 (/usr/bin/clickhouse+0xfe81fa9) -# + #4 0x1f377609 (/usr/bin/clickhouse+0x1f377609) -# + #5 0xfe7e2a1 (/usr/bin/clickhouse+0xfe7e2a1) -# + #6 0xfce1003 (/usr/bin/clickhouse+0xfce1003) -# + #7 0x7f3345bd30b2 (/lib/x86_64-linux-gnu/libc.so.6+0x270b2) -# + #8 0xfcbf0ed (/usr/bin/clickhouse+0xfcbf0ed) -# + -# +SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior ../contrib/libcxx/include/memory:3212:19 in -# +1 -# -# Stacktrace from lldb: -# -# thread #1, name = 'clickhouse-benc', stop reason = Dynamic type mismatch -# * frame #0: 0x000000000fffc070 clickhouse`__ubsan_on_report -# frame #1: 0x000000000fff6511 clickhouse`__ubsan::Diag::~Diag() + 209 -# frame #2: 0x000000000fffcb11 clickhouse`HandleDynamicTypeCacheMiss(__ubsan::DynamicTypeCacheMissData*, unsigned long, unsigned long, __ubsan::ReportOptions) + 609 -# frame #3: 0x000000000fffcf2a clickhouse`__ubsan_handle_dynamic_type_cache_miss_abort + 58 -# frame #4: 0x00000000101a33f8 clickhouse`std::__1::shared_ptr::PoolEntryHelper>::~shared_ptr(this=) + 152 at memory:3212 -# frame #5: 0x00000000101a267a clickhouse`PoolBase::Entry::~Entry(this=) + 26 at PoolBase.h:67 -# frame #6: 0x00000000101a0878 clickhouse`DB::ConnectionPool::get(this=, timeouts=0x00007fffffffc278, settings=, force_connected=true) + 664 at ConnectionPool.h:93 -# frame #7: 0x00000000101a6395 clickhouse`DB::Benchmark::runBenchmark(this=) + 981 at Benchmark.cpp:309 -# frame #8: 0x000000001019e84a clickhouse`DB::Benchmark::main(this=0x00007fffffffd8c8, (null)=) + 586 at Benchmark.cpp:128 -# frame #9: 0x000000001f5d028a clickhouse`Poco::Util::Application::run(this=0x00007fffffffd8c8) + 42 at Application.cpp:334 -# frame #10: 0x000000001019ab42 clickhouse`mainEntryClickHouseBenchmark(argc=, argv=) + 6978 at Benchmark.cpp:655 -# frame #11: 0x000000000fffdfc4 clickhouse`main(argc_=, argv_=) + 356 at main.cpp:366 -# frame #12: 0x00007ffff7de6d0a libc.so.6`__libc_start_main(main=(clickhouse`main at main.cpp:339), argc=7, argv=0x00007fffffffe1e8, init=, fini=, rtld_fini=, stack_end=0x00007fffffffe1d8) + 234 at libc-start.c:308 -# frame #13: 0x000000000ffdc0aa clickhouse`_start + 42 - -# Limit number of files to 50, and we will get EMFILE for some of socket() -prlimit --nofile=50 $CLICKHOUSE_BENCHMARK --iterations 1 --concurrency 50 --query 'select 1' 2>&1 -echo $? diff --git a/tests/queries/0_stateless/02000_table_function_cluster_macros.reference b/tests/queries/0_stateless/02000_table_function_cluster_macros.reference index 6ed281c757a..98fb6a68656 100644 --- a/tests/queries/0_stateless/02000_table_function_cluster_macros.reference +++ b/tests/queries/0_stateless/02000_table_function_cluster_macros.reference @@ -1,2 +1,4 @@ 1 1 +1 +1 diff --git a/tests/queries/0_stateless/02000_table_function_cluster_macros.sql b/tests/queries/0_stateless/02000_table_function_cluster_macros.sql index f1bc1358b55..d133f5fdc2c 100644 --- a/tests/queries/0_stateless/02000_table_function_cluster_macros.sql +++ b/tests/queries/0_stateless/02000_table_function_cluster_macros.sql @@ -1,2 +1,4 @@ SELECT _shard_num FROM cluster("{default_cluster_macro}", system.one); +SELECT _shard_num FROM cluster("{default_cluster_macro}"); SELECT _shard_num FROM clusterAllReplicas("{default_cluster_macro}", system.one); +SELECT _shard_num FROM clusterAllReplicas("{default_cluster_macro}"); diff --git a/tests/queries/0_stateless/02010_lc_native.python b/tests/queries/0_stateless/02010_lc_native.python index a197d32a3b9..219fdf04472 100755 --- a/tests/queries/0_stateless/02010_lc_native.python +++ b/tests/queries/0_stateless/02010_lc_native.python @@ -1,226 +1,33 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import socket import os -import uuid +import sys -CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") -CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) -CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -def writeVarUInt(x, ba): - for _ in range(0, 9): - byte = x & 0x7F - if x > 0x7F: - byte |= 0x80 - - ba.append(byte) - - x >>= 7 - if x == 0: - return - - -def writeStringBinary(s, ba): - b = bytes(s, "utf-8") - writeVarUInt(len(s), ba) - ba.extend(b) - - -def readStrict(s, size=1): - res = bytearray() - while size: - cur = s.recv(size) - # if not res: - # raise "Socket is closed" - size -= len(cur) - res.extend(cur) - - return res - - -def readUInt(s, size=1): - res = readStrict(s, size) - val = 0 - for i in range(len(res)): - val += res[i] << (i * 8) - return val - - -def readUInt8(s): - return readUInt(s) - - -def readUInt16(s): - return readUInt(s, 2) - - -def readUInt32(s): - return readUInt(s, 4) - - -def readUInt64(s): - return readUInt(s, 8) - - -def readVarUInt(s): - x = 0 - for i in range(9): - byte = readStrict(s)[0] - x |= (byte & 0x7F) << (7 * i) - - if not byte & 0x80: - return x - - return x - - -def readStringBinary(s): - size = readVarUInt(s) - s = readStrict(s, size) - return s.decode("utf-8") - - -def sendHello(s): - ba = bytearray() - writeVarUInt(0, ba) # Hello - writeStringBinary("simple native protocol", ba) - writeVarUInt(21, ba) - writeVarUInt(9, ba) - writeVarUInt(54449, ba) - writeStringBinary("default", ba) # database - writeStringBinary("default", ba) # user - writeStringBinary("", ba) # pwd - s.sendall(ba) - - -def receiveHello(s): - p_type = readVarUInt(s) - assert p_type == 0 # Hello - server_name = readStringBinary(s) - # print("Server name: ", server_name) - server_version_major = readVarUInt(s) - # print("Major: ", server_version_major) - server_version_minor = readVarUInt(s) - # print("Minor: ", server_version_minor) - server_revision = readVarUInt(s) - # print("Revision: ", server_revision) - server_timezone = readStringBinary(s) - # print("Timezone: ", server_timezone) - server_display_name = readStringBinary(s) - # print("Display name: ", server_display_name) - server_version_patch = readVarUInt(s) - # print("Version patch: ", server_version_patch) - - -def serializeClientInfo(ba, query_id): - writeStringBinary("default", ba) # initial_user - writeStringBinary(query_id, ba) # initial_query_id - writeStringBinary("127.0.0.1:9000", ba) # initial_address - ba.extend([0] * 8) # initial_query_start_time_microseconds - ba.append(1) # TCP - writeStringBinary("os_user", ba) # os_user - writeStringBinary("client_hostname", ba) # client_hostname - writeStringBinary("client_name", ba) # client_name - writeVarUInt(21, ba) - writeVarUInt(9, ba) - writeVarUInt(54449, ba) - writeStringBinary("", ba) # quota_key - writeVarUInt(0, ba) # distributed_depth - writeVarUInt(1, ba) # client_version_patch - ba.append(0) # No telemetry - - -def sendQuery(s, query): - ba = bytearray() - query_id = uuid.uuid4().hex - writeVarUInt(1, ba) # query - writeStringBinary(query_id, ba) - - ba.append(1) # INITIAL_QUERY - - # client info - serializeClientInfo(ba, query_id) - - writeStringBinary("", ba) # No settings - writeStringBinary("", ba) # No interserver secret - writeVarUInt(2, ba) # Stage - Complete - ba.append(0) # No compression - writeStringBinary(query, ba) # query, finally - s.sendall(ba) - - -def serializeBlockInfo(ba): - writeVarUInt(1, ba) # 1 - ba.append(0) # is_overflows - writeVarUInt(2, ba) # 2 - writeVarUInt(0, ba) # 0 - ba.extend([0] * 4) # bucket_num - - -def sendEmptyBlock(s): - ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary("", ba) - serializeBlockInfo(ba) - writeVarUInt(0, ba) # rows - writeVarUInt(0, ba) # columns - s.sendall(ba) - - -def assertPacket(packet, expected): - assert packet == expected, packet - - -def readHeader(s): - packet_type = readVarUInt(s) - if packet_type == 2: # Exception - raise RuntimeError(readException(s)) - assertPacket(packet_type, 1) # Data - - readStringBinary(s) # external table name - # BlockInfo - assertPacket(readVarUInt(s), 1) # 1 - assertPacket(readUInt8(s), 0) # is_overflows - assertPacket(readVarUInt(s), 2) # 2 - assertPacket(readUInt32(s), 4294967295) # bucket_num - assertPacket(readVarUInt(s), 0) # 0 - columns = readVarUInt(s) # rows - rows = readVarUInt(s) # columns - print("Rows {} Columns {}".format(rows, columns)) - for _ in range(columns): - col_name = readStringBinary(s) - type_name = readStringBinary(s) - print("Column {} type {}".format(col_name, type_name)) - - -def readException(s): - code = readUInt32(s) - name = readStringBinary(s) - text = readStringBinary(s) - readStringBinary(s) # trace - assertPacket(readUInt8(s), 0) # has_nested - return "code {}: {}".format(code, text.replace("DB::Exception:", "")) +from tcp_client import ( + TCPClient, + CLICKHOUSE_DATABASE, + writeVarUInt, + writeStringBinary, + serializeBlockInfo, + assertPacket, +) def insertValidLowCardinalityRow(): - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(30) - s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) - sendHello(s) - receiveHello(s) - sendQuery( - s, + with TCPClient() as client: + client.sendQuery( "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( CLICKHOUSE_DATABASE ), ) # external tables - sendEmptyBlock(s) - readHeader(s) + client.sendEmptyBlock() + client.readHeader() # Data ba = bytearray() @@ -239,31 +46,25 @@ def insertValidLowCardinalityRow(): writeStringBinary("hello", ba) # key ba.extend([1] + [0] * 7) # num_indexes ba.extend([0] * 8) # UInt64 index (0 for 'hello') - s.sendall(ba) + client.send(ba) # Fin block - sendEmptyBlock(s) + client.sendEmptyBlock() - assertPacket(readVarUInt(s), 5) # End of stream - s.close() + assertPacket(client.readVarUInt(), 5) # End of stream def insertLowCardinalityRowWithIndexOverflow(): - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(30) - s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) - sendHello(s) - receiveHello(s) - sendQuery( - s, + with TCPClient() as client: + client.sendQuery( "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( CLICKHOUSE_DATABASE ), ) # external tables - sendEmptyBlock(s) - readHeader(s) + client.sendEmptyBlock() + client.readHeader() # Data ba = bytearray() @@ -282,29 +83,23 @@ def insertLowCardinalityRowWithIndexOverflow(): writeStringBinary("hello", ba) # key ba.extend([1] + [0] * 7) # num_indexes ba.extend([0] * 7 + [1]) # UInt64 index (overflow) - s.sendall(ba) + client.send(ba) - assertPacket(readVarUInt(s), 2) - print(readException(s)) - s.close() + assertPacket(client.readVarUInt(), 2) # Exception + print(client.readException()) def insertLowCardinalityRowWithIncorrectDictType(): - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(30) - s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) - sendHello(s) - receiveHello(s) - sendQuery( - s, + with TCPClient() as client: + client.sendQuery( "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( CLICKHOUSE_DATABASE ), ) # external tables - sendEmptyBlock(s) - readHeader(s) + client.sendEmptyBlock() + client.readHeader() # Data ba = bytearray() @@ -323,29 +118,23 @@ def insertLowCardinalityRowWithIncorrectDictType(): writeStringBinary("hello", ba) # key ba.extend([1] + [0] * 7) # num_indexes ba.extend([0] * 8) # UInt64 index (overflow) - s.sendall(ba) + client.send(ba) - assertPacket(readVarUInt(s), 2) - print(readException(s)) - s.close() + assertPacket(client.readVarUInt(), 2) # Exception + print(client.readException()) def insertLowCardinalityRowWithIncorrectAdditionalKeys(): - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(30) - s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) - sendHello(s) - receiveHello(s) - sendQuery( - s, + with TCPClient() as client: + client.sendQuery( "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( CLICKHOUSE_DATABASE ), ) # external tables - sendEmptyBlock(s) - readHeader(s) + client.sendEmptyBlock() + client.readHeader() # Data ba = bytearray() @@ -364,11 +153,10 @@ def insertLowCardinalityRowWithIncorrectAdditionalKeys(): writeStringBinary("hello", ba) # key ba.extend([1] + [0] * 7) # num_indexes ba.extend([0] * 8) # UInt64 index (0 for 'hello') - s.sendall(ba) + client.send(ba) - assertPacket(readVarUInt(s), 2) - print(readException(s)) - s.close() + assertPacket(client.readVarUInt(), 2) # Exception + print(client.readException()) def main(): diff --git a/tests/queries/0_stateless/02012_get_server_port.sql b/tests/queries/0_stateless/02012_get_server_port.sql index cc7fecb0bf0..2cf2014cfcc 100644 --- a/tests/queries/0_stateless/02012_get_server_port.sql +++ b/tests/queries/0_stateless/02012_get_server_port.sql @@ -1,3 +1,3 @@ select getServerPort('tcp_port'); -select getServerPort('unknown'); -- { serverError 170 } +select getServerPort('unknown'); -- { serverError CLUSTER_DOESNT_EXIST } diff --git a/tests/queries/0_stateless/02020_alter_table_modify_comment.reference b/tests/queries/0_stateless/02020_alter_table_modify_comment.reference index a9c37eb2ba5..d2e74fd07f0 100644 --- a/tests/queries/0_stateless/02020_alter_table_modify_comment.reference +++ b/tests/queries/0_stateless/02020_alter_table_modify_comment.reference @@ -124,21 +124,21 @@ CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\n comment= Test table with comment change a comment -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'new comment on a table\' +comment= new comment on a table remove a comment -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192 +comment= add a comment back -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' +comment= another comment on a table detach table -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' re-attach table -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' +comment= another comment on a table diff --git a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh index 3abf5c52031..32ce1643d4e 100755 --- a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh +++ b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check set -eu diff --git a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh index e0f0114d030..0219a0421cb 100755 --- a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh +++ b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check set -eu diff --git a/tests/queries/0_stateless/02028_system_data_skipping_indices_size.reference b/tests/queries/0_stateless/02028_system_data_skipping_indices_size.reference index e455643c01e..456f9d113be 100644 --- a/tests/queries/0_stateless/02028_system_data_skipping_indices_size.reference +++ b/tests/queries/0_stateless/02028_system_data_skipping_indices_size.reference @@ -1 +1,2 @@ default test_table value_index minmax minmax value 1 38 12 24 +default test_table value_index minmax minmax value 1 38 12 24 diff --git a/tests/queries/0_stateless/02028_system_data_skipping_indices_size.sql b/tests/queries/0_stateless/02028_system_data_skipping_indices_size.sql index 1efb9cff6a4..07237c43bea 100644 --- a/tests/queries/0_stateless/02028_system_data_skipping_indices_size.sql +++ b/tests/queries/0_stateless/02028_system_data_skipping_indices_size.sql @@ -12,4 +12,10 @@ ORDER BY key SETTINGS compress_marks=false; INSERT INTO test_table VALUES (0, 'Value'); SELECT * FROM system.data_skipping_indices WHERE database = currentDatabase(); +ALTER TABLE test_table DROP INDEX value_index; +ALTER TABLE test_table ADD INDEX value_index value TYPE minmax GRANULARITY 1; +ALTER TABLE test_table MATERIALIZE INDEX value_index SETTINGS mutations_sync=1; + +SELECT * FROM system.data_skipping_indices WHERE database = currentDatabase(); + DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02067_lost_part_s3.sql b/tests/queries/0_stateless/02067_lost_part_s3.sql index 7df15ab33c4..6fbde71ff98 100644 --- a/tests/queries/0_stateless/02067_lost_part_s3.sql +++ b/tests/queries/0_stateless/02067_lost_part_s3.sql @@ -1,4 +1,4 @@ --- Tags: no-upgrade-check, no-fasttest +-- Tags: no-fasttest DROP TABLE IF EXISTS partslost_0; DROP TABLE IF EXISTS partslost_1; @@ -6,15 +6,18 @@ DROP TABLE IF EXISTS partslost_2; CREATE TABLE partslost_0 (x String) ENGINE=ReplicatedMergeTree('/clickhouse/table/{database}_02067_lost/partslost', '0') ORDER BY tuple() SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, old_parts_lifetime = 1, - cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0; + cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0, + index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE partslost_1 (x String) ENGINE=ReplicatedMergeTree('/clickhouse/table/{database}_02067_lost/partslost', '1') ORDER BY tuple() SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, old_parts_lifetime = 1, - cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0; + cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0, + index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE partslost_2 (x String) ENGINE=ReplicatedMergeTree('/clickhouse/table/{database}_02067_lost/partslost', '2') ORDER BY tuple() SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, old_parts_lifetime = 1, - cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0; + cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0, + index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO partslost_0 SELECT toString(number) AS x from system.numbers LIMIT 10000; diff --git a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference index c4c0901b9df..432299e9556 100644 --- a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference +++ b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference @@ -8,5 +8,3 @@ OK OK OK OK -OK -OK diff --git a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh index d37155e8506..753d56fb424 100755 --- a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh +++ b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh @@ -9,8 +9,6 @@ ${CLICKHOUSE_LOCAL} --unknown-option 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" ${CLICKHOUSE_LOCAL} --unknown-option-1 --unknown-option-2 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" -${CLICKHOUSE_LOCAL} -- --unknown-option 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" - ${CLICKHOUSE_LOCAL} -- 'positional-argument' 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" ${CLICKHOUSE_LOCAL} -f 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" @@ -22,8 +20,6 @@ ${CLICKHOUSE_CLIENT} --unknown-option 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" ${CLICKHOUSE_CLIENT} --unknown-option-1 --unknown-option-2 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" -${CLICKHOUSE_CLIENT} -- --unknown-option 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" - ${CLICKHOUSE_CLIENT} -- 'positional-argument' 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" ${CLICKHOUSE_CLIENT} --j 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" diff --git a/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh index ba7aac94ddb..48f27a4d1f3 100755 --- a/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh +++ b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh @@ -55,7 +55,7 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT RowBinaryWithNames" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNames" 2>&1 | grep -F -c "CANNOT_SKIP_UNKNOWN_FIELD" +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT RowBinaryWithNames" 2>&1 | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNames" 2>&1 | grep -F -c "CANNOT_SKIP_UNKNOWN_FIELD" $CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" @@ -63,8 +63,8 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" -$CLICKHOUSE_CLIENT -q "SELECT 'text' AS x, toDate('2020-01-01') AS y, toUInt32(1) AS z FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -c "INCORRECT_DATA" +$CLICKHOUSE_CLIENT -q "SELECT 'text' AS x, toDate('2020-01-01') AS y, toUInt32(1) AS z FORMAT RowBinaryWithNamesAndTypes" 2>&1 | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -c "INCORRECT_DATA" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' as z, toDate('2020-01-01') AS y FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -c "INCORRECT_DATA" +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' as z, toDate('2020-01-01') AS y FORMAT RowBinaryWithNamesAndTypes" 2>&1 | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -c "INCORRECT_DATA" $CLICKHOUSE_CLIENT -q "DROP TABLE test_02102" diff --git a/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.reference b/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.reference index bf62392aeee..4ab036484c3 100644 --- a/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.reference +++ b/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.reference @@ -1 +1 @@ -biz.ss kernel.biz.ss +dev.ss kernel.dev.ss diff --git a/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.sh b/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.sh index 02a454957d4..13a20ad5ade 100755 --- a/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.sh +++ b/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.sh @@ -13,9 +13,9 @@ function clickhouse_local() $CLICKHOUSE_LOCAL "${opts[@]}" "$@" } -# -- biz.ss is not in the default TLD list, hence: +# -- dev.ss is not in the default TLD list, hence: clickhouse_local -q " select - cutToFirstSignificantSubdomain('foo.kernel.biz.ss'), - cutToFirstSignificantSubdomainCustom('foo.kernel.biz.ss', 'public_suffix_list') + cutToFirstSignificantSubdomain('foo.kernel.dev.ss'), + cutToFirstSignificantSubdomainCustom('foo.kernel.dev.ss', 'public_suffix_list') " |& grep -v -e 'Processing configuration file' diff --git a/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.tld.dat b/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.tld.dat index 510e6dd9ec0..e1b4e342c19 100644 --- a/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.tld.dat +++ b/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.tld.dat @@ -1 +1 @@ -biz.ss +dev.ss diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 76a6b376a57..9b633314bd3 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -50,7 +50,8 @@ CREATE TABLE system.clusters `estimated_recovery_time` UInt32, `database_shard_name` String, `database_replica_name` String, - `is_active` Nullable(UInt8) + `is_active` Nullable(UInt8), + `name` String ) ENGINE = SystemClusters COMMENT 'SYSTEM TABLE is built on the fly.' @@ -144,6 +145,7 @@ CREATE TABLE system.detached_parts `partition_id` Nullable(String), `name` String, `bytes_on_disk` UInt64, + `modification_time` DateTime, `disk` String, `path` String, `reason` Nullable(String), @@ -262,7 +264,8 @@ CREATE TABLE system.events ( `event` String, `value` UInt64, - `description` String + `description` String, + `name` String ) ENGINE = SystemEvents COMMENT 'SYSTEM TABLE is built on the fly.' @@ -297,7 +300,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'REDIS' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM SYNC FILESYSTEM CACHE' = 106, 'SYSTEM DROP SCHEMA CACHE' = 107, 'SYSTEM DROP S3 CLIENT CACHE' = 108, 'SYSTEM DROP CACHE' = 109, 'SYSTEM RELOAD CONFIG' = 110, 'SYSTEM RELOAD USERS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM PULLING REPLICATION LOG' = 122, 'SYSTEM DISTRIBUTED SENDS' = 123, 'SYSTEM REPLICATED SENDS' = 124, 'SYSTEM SENDS' = 125, 'SYSTEM REPLICATION QUEUES' = 126, 'SYSTEM DROP REPLICA' = 127, 'SYSTEM SYNC REPLICA' = 128, 'SYSTEM RESTART REPLICA' = 129, 'SYSTEM RESTORE REPLICA' = 130, 'SYSTEM WAIT LOADING PARTS' = 131, 'SYSTEM SYNC DATABASE REPLICA' = 132, 'SYSTEM SYNC TRANSACTION LOG' = 133, 'SYSTEM SYNC FILE CACHE' = 134, 'SYSTEM FLUSH DISTRIBUTED' = 135, 'SYSTEM FLUSH LOGS' = 136, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 137, 'SYSTEM FLUSH' = 138, 'SYSTEM THREAD FUZZER' = 139, 'SYSTEM UNFREEZE' = 140, 'SYSTEM FAILPOINT' = 141, 'SYSTEM LISTEN' = 142, 'SYSTEM' = 143, 'dictGet' = 144, 'displaySecretsInShowAndSelect' = 145, 'addressToLine' = 146, 'addressToLineWithInlines' = 147, 'addressToSymbol' = 148, 'demangle' = 149, 'INTROSPECTION' = 150, 'FILE' = 151, 'URL' = 152, 'REMOTE' = 153, 'MONGO' = 154, 'REDIS' = 155, 'MEILISEARCH' = 156, 'MYSQL' = 157, 'POSTGRES' = 158, 'SQLITE' = 159, 'ODBC' = 160, 'JDBC' = 161, 'HDFS' = 162, 'S3' = 163, 'HIVE' = 164, 'AZURE' = 165, 'SOURCES' = 166, 'CLUSTER' = 167, 'ALL' = 168, 'NONE' = 169), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -363,6 +366,7 @@ CREATE TABLE system.merges `source_part_paths` Array(String), `result_part_path` String, `partition_id` String, + `partition` String, `is_mutation` UInt8, `total_size_bytes_compressed` UInt64, `total_size_bytes_uncompressed` UInt64, @@ -383,7 +387,8 @@ CREATE TABLE system.metrics ( `metric` String, `value` Int64, - `description` String + `description` String, + `name` String ) ENGINE = SystemMetrics COMMENT 'SYSTEM TABLE is built on the fly.' @@ -584,10 +589,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'REDIS' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM SYNC FILESYSTEM CACHE' = 106, 'SYSTEM DROP SCHEMA CACHE' = 107, 'SYSTEM DROP S3 CLIENT CACHE' = 108, 'SYSTEM DROP CACHE' = 109, 'SYSTEM RELOAD CONFIG' = 110, 'SYSTEM RELOAD USERS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM PULLING REPLICATION LOG' = 122, 'SYSTEM DISTRIBUTED SENDS' = 123, 'SYSTEM REPLICATED SENDS' = 124, 'SYSTEM SENDS' = 125, 'SYSTEM REPLICATION QUEUES' = 126, 'SYSTEM DROP REPLICA' = 127, 'SYSTEM SYNC REPLICA' = 128, 'SYSTEM RESTART REPLICA' = 129, 'SYSTEM RESTORE REPLICA' = 130, 'SYSTEM WAIT LOADING PARTS' = 131, 'SYSTEM SYNC DATABASE REPLICA' = 132, 'SYSTEM SYNC TRANSACTION LOG' = 133, 'SYSTEM SYNC FILE CACHE' = 134, 'SYSTEM FLUSH DISTRIBUTED' = 135, 'SYSTEM FLUSH LOGS' = 136, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 137, 'SYSTEM FLUSH' = 138, 'SYSTEM THREAD FUZZER' = 139, 'SYSTEM UNFREEZE' = 140, 'SYSTEM FAILPOINT' = 141, 'SYSTEM LISTEN' = 142, 'SYSTEM' = 143, 'dictGet' = 144, 'displaySecretsInShowAndSelect' = 145, 'addressToLine' = 146, 'addressToLineWithInlines' = 147, 'addressToSymbol' = 148, 'demangle' = 149, 'INTROSPECTION' = 150, 'FILE' = 151, 'URL' = 152, 'REMOTE' = 153, 'MONGO' = 154, 'REDIS' = 155, 'MEILISEARCH' = 156, 'MYSQL' = 157, 'POSTGRES' = 158, 'SQLITE' = 159, 'ODBC' = 160, 'JDBC' = 161, 'HDFS' = 162, 'S3' = 163, 'HIVE' = 164, 'AZURE' = 165, 'SOURCES' = 166, 'CLUSTER' = 167, 'ALL' = 168, 'NONE' = 169), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'REDIS' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM SYNC FILESYSTEM CACHE' = 106, 'SYSTEM DROP SCHEMA CACHE' = 107, 'SYSTEM DROP S3 CLIENT CACHE' = 108, 'SYSTEM DROP CACHE' = 109, 'SYSTEM RELOAD CONFIG' = 110, 'SYSTEM RELOAD USERS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM PULLING REPLICATION LOG' = 122, 'SYSTEM DISTRIBUTED SENDS' = 123, 'SYSTEM REPLICATED SENDS' = 124, 'SYSTEM SENDS' = 125, 'SYSTEM REPLICATION QUEUES' = 126, 'SYSTEM DROP REPLICA' = 127, 'SYSTEM SYNC REPLICA' = 128, 'SYSTEM RESTART REPLICA' = 129, 'SYSTEM RESTORE REPLICA' = 130, 'SYSTEM WAIT LOADING PARTS' = 131, 'SYSTEM SYNC DATABASE REPLICA' = 132, 'SYSTEM SYNC TRANSACTION LOG' = 133, 'SYSTEM SYNC FILE CACHE' = 134, 'SYSTEM FLUSH DISTRIBUTED' = 135, 'SYSTEM FLUSH LOGS' = 136, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 137, 'SYSTEM FLUSH' = 138, 'SYSTEM THREAD FUZZER' = 139, 'SYSTEM UNFREEZE' = 140, 'SYSTEM FAILPOINT' = 141, 'SYSTEM LISTEN' = 142, 'SYSTEM' = 143, 'dictGet' = 144, 'displaySecretsInShowAndSelect' = 145, 'addressToLine' = 146, 'addressToLineWithInlines' = 147, 'addressToSymbol' = 148, 'demangle' = 149, 'INTROSPECTION' = 150, 'FILE' = 151, 'URL' = 152, 'REMOTE' = 153, 'MONGO' = 154, 'REDIS' = 155, 'MEILISEARCH' = 156, 'MYSQL' = 157, 'POSTGRES' = 158, 'SQLITE' = 159, 'ODBC' = 160, 'JDBC' = 161, 'HDFS' = 162, 'S3' = 163, 'HIVE' = 164, 'AZURE' = 165, 'SOURCES' = 166, 'CLUSTER' = 167, 'ALL' = 168, 'NONE' = 169)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' @@ -958,6 +963,7 @@ CREATE TABLE system.role_grants `user_name` Nullable(String), `role_name` Nullable(String), `granted_role_name` String, + `granted_role_id` UUID, `granted_role_is_default` UInt8, `with_admin_option` UInt8 ) diff --git a/tests/queries/0_stateless/02122_parallel_formatting.lib b/tests/queries/0_stateless/02122_parallel_formatting.lib index 5175e004cc5..a26e6917c90 100755 --- a/tests/queries/0_stateless/02122_parallel_formatting.lib +++ b/tests/queries/0_stateless/02122_parallel_formatting.lib @@ -23,3 +23,5 @@ $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(nu diff $non_parallel_file $parallel_file rm $non_parallel_file $parallel_file + +# vi: ft=bash diff --git a/tests/queries/0_stateless/02125_many_mutations.sh b/tests/queries/0_stateless/02125_many_mutations.sh index 5a139e8b01d..474fc8ef199 100755 --- a/tests/queries/0_stateless/02125_many_mutations.sh +++ b/tests/queries/0_stateless/02125_many_mutations.sh @@ -43,11 +43,13 @@ job & wait +# truncate before drop, avoid removing all the mutations (it's slow) in DatabaseCatalog's thread (may affect other tests) $CLICKHOUSE_CLIENT --multiquery -q " select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; system start merges many_mutations; optimize table many_mutations final SETTINGS optimize_throw_if_noop = 1; select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; select x, y from many_mutations order by x; +truncate table many_mutations; drop table many_mutations; " diff --git a/tests/queries/0_stateless/02125_many_mutations_2.sh b/tests/queries/0_stateless/02125_many_mutations_2.sh index 819ac8c9524..b7585991738 100755 --- a/tests/queries/0_stateless/02125_many_mutations_2.sh +++ b/tests/queries/0_stateless/02125_many_mutations_2.sh @@ -47,6 +47,7 @@ job & wait +# truncate before drop, avoid removing all the mutations (it's slow) in DatabaseCatalog's thread (may affect other tests) $CLICKHOUSE_CLIENT --multiquery -q " select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; system start merges many_mutations; @@ -55,5 +56,6 @@ system flush logs; select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; select count() from many_mutations; select * from system.part_log where database = currentDatabase() and table == 'many_mutations' and peak_memory_usage > 1e9; +truncate table many_mutations; drop table many_mutations; " diff --git a/tests/queries/0_stateless/02136_scalar_progress.sh b/tests/queries/0_stateless/02136_scalar_progress.sh index 9f4429b0caa..f2016b04f82 100755 --- a/tests/queries/0_stateless/02136_scalar_progress.sh +++ b/tests/queries/0_stateless/02136_scalar_progress.sh @@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CURL -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d "SELECT (SELECT max(number), count(number) FROM numbers(100000) settings max_block_size=65505);" -v 2>&1 | grep -E "X-ClickHouse-Summary|X-ClickHouse-Progress" +$CLICKHOUSE_CURL -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d "SELECT (SELECT max(number), count(number) FROM numbers(100000) settings max_block_size=65505);" -v 2>&1 | grep -E "X-ClickHouse-Summary|X-ClickHouse-Progress" | sed 's/,\"elapsed_ns[^}]*//' diff --git a/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql b/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql index 180610288aa..17ff367a58d 100644 --- a/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql +++ b/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql @@ -6,7 +6,7 @@ SELECT '#02136_scalar_subquery_4', (SELECT max(number) FROM numbers(1000)) as n SYSTEM FLUSH LOGS; SELECT read_rows, query FROM system.query_log WHERE - event_date > yesterday() + event_date >= yesterday() AND type = 'QueryFinish' AND current_database == currentDatabase() AND query LIKE 'SELECT ''#02136_scalar_subquery_%' diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference index e4c93e9e1c5..4c1d5dc829f 100644 --- a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference +++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference @@ -1 +1,2 @@ CREATE TABLE _local.table\n(\n `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\') +CREATE TABLE foo.table\n(\n `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\') diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh index fc71f779fa1..934d87616ac 100755 --- a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh +++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh @@ -5,3 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_LOCAL --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create table table' +$CLICKHOUSE_LOCAL --database foo --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create table table' diff --git a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference index d1662cdeb81..f2a4ef1f634 100644 --- a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference +++ b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference @@ -14,7 +14,7 @@ ExpressionTransform (Expression) ExpressionTransform × 2 (ReadFromMergeTree) - MergeTreeInOrder × 2 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 2020-10-01 9 2020-10-01 9 2020-10-01 9 @@ -32,9 +32,9 @@ ExpressionTransform ExpressionTransform × 2 (ReadFromMergeTree) ReverseTransform - MergeTreeReverse 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1 ReverseTransform - MergeTreeReverse 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1 2020-10-01 9 2020-10-01 9 2020-10-01 9 @@ -51,7 +51,7 @@ ExpressionTransform (Expression) ExpressionTransform × 2 (ReadFromMergeTree) - MergeTreeInOrder × 2 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 2020-10-11 0 2020-10-11 0 2020-10-11 0 @@ -65,7 +65,7 @@ ExpressionTransform (Expression) ExpressionTransform (ReadFromMergeTree) - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform (Limit) @@ -77,7 +77,7 @@ ExpressionTransform ExpressionTransform (ReadFromMergeTree) ExpressionTransform - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 2020-10-11 0 0 2020-10-11 0 10 2020-10-11 0 20 @@ -93,7 +93,7 @@ ExpressionTransform (Expression) ExpressionTransform (ReadFromMergeTree) - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform (Limit) @@ -107,7 +107,7 @@ ExpressionTransform ExpressionTransform (ReadFromMergeTree) ExpressionTransform - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 2020-10-12 0 2020-10-12 1 2020-10-12 2 @@ -129,7 +129,7 @@ ExpressionTransform ExpressionTransform (ReadFromMergeTree) ReverseTransform - MergeTreeReverse 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1 (Expression) ExpressionTransform (Limit) @@ -142,7 +142,7 @@ ExpressionTransform (ReadFromMergeTree) ExpressionTransform ReverseTransform - MergeTreeReverse 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1 2020-10-12 99999 2020-10-12 99998 2020-10-12 99997 @@ -169,7 +169,7 @@ ExpressionTransform (Expression) ExpressionTransform (ReadFromMergeTree) - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 2020-10-10 00:00:00 0.01 2020-10-10 00:00:00 0.01 2020-10-10 00:00:00 0.01 @@ -183,7 +183,7 @@ ExpressionTransform (Expression) ExpressionTransform (ReadFromMergeTree) - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 2020-10-10 00:00:00 0.01 2020-10-10 00:00:00 0.01 2020-10-10 00:00:00 0.01 diff --git a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql index 5e662bd7842..0834b76d4ec 100644 --- a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql +++ b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql @@ -5,7 +5,7 @@ SET read_in_order_two_level_merge_threshold=100; DROP TABLE IF EXISTS t_read_in_order; CREATE TABLE t_read_in_order(date Date, i UInt64, v UInt64) -ENGINE = MergeTree ORDER BY (date, i); +ENGINE = MergeTree ORDER BY (date, i) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_read_in_order SELECT '2020-10-10', number % 10, number FROM numbers(100000); INSERT INTO t_read_in_order SELECT '2020-10-11', number % 10, number FROM numbers(100000); @@ -55,7 +55,7 @@ SELECT a, b FROM t_read_in_order WHERE a = 1 ORDER BY b DESC SETTINGS read_in_or DROP TABLE t_read_in_order; CREATE TABLE t_read_in_order(dt DateTime, d Decimal64(5), v UInt64) -ENGINE = MergeTree ORDER BY (toStartOfDay(dt), d); +ENGINE = MergeTree ORDER BY (toStartOfDay(dt), d) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_read_in_order SELECT toDateTime('2020-10-10 00:00:00') + number, 1 / (number % 100 + 1), number FROM numbers(1000); diff --git a/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh b/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh index da2dcd055ea..114f60cc393 100755 --- a/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh +++ b/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_index_hypothesis" -$CLICKHOUSE_CLIENT -q "CREATE TABLE t_index_hypothesis (a UInt32, b UInt32, INDEX t a != b TYPE hypothesis GRANULARITY 1) ENGINE = MergeTree ORDER BY a" +$CLICKHOUSE_CLIENT -q "CREATE TABLE t_index_hypothesis (a UInt32, b UInt32, INDEX t a != b TYPE hypothesis GRANULARITY 1) ENGINE = MergeTree ORDER BY a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" $CLICKHOUSE_CLIENT -q "INSERT INTO t_index_hypothesis SELECT number, number + 1 FROM numbers(10000000)" diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh index fd6e44577d9..bf79e5f769d 100755 --- a/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh @@ -17,9 +17,9 @@ prepare_table() { table_name="t_hash_table_sizes_stats_$RANDOM$RANDOM" $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS $table_name;" if [ -z "$1" ]; then - $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY tuple();" + $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';" else - $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY $1;" + $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY $1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';" fi $CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES $table_name;" for ((i = 1; i <= max_threads; i++)); do diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh index 703b2c4357c..77b9b2942c5 100755 --- a/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, distributed, no-tsan +# Tags: long, distributed, no-tsan, no-debug # These tests don't use `current_database = currentDatabase()` condition, because database name isn't propagated during remote queries. @@ -19,9 +19,9 @@ prepare_table() { table_name="t_hash_table_sizes_stats_$RANDOM$RANDOM" $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS $table_name;" if [ -z "$1" ]; then - $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY tuple();" + $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';" else - $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY $1;" + $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY $1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';" fi $CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES $table_name;" for ((i = 1; i <= max_threads; i++)); do diff --git a/tests/queries/0_stateless/02151_lc_prefetch.sql b/tests/queries/0_stateless/02151_lc_prefetch.sql index 83d8d23264e..c2b97231145 100644 --- a/tests/queries/0_stateless/02151_lc_prefetch.sql +++ b/tests/queries/0_stateless/02151_lc_prefetch.sql @@ -1,6 +1,6 @@ -- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug drop table if exists tab_lc; -CREATE TABLE tab_lc (x UInt64, y LowCardinality(String)) engine = MergeTree order by x; +CREATE TABLE tab_lc (x UInt64, y LowCardinality(String)) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into tab_lc select number, toString(number % 10) from numbers(20000000); optimize table tab_lc; select count() from tab_lc where y == '0' settings local_filesystem_read_prefetch=1; diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh index 4132ac91ae4..5f9eb460e44 100755 --- a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh +++ b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-cpu-aarch64 +# Tags: no-tsan, no-cpu-aarch64, no-parallel # TSan does not supports tracing. # trace_log doesn't work on aarch64 diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache.sql b/tests/queries/0_stateless/02174_cte_scalar_cache.sql index 50a10834e64..86cfff21446 100644 --- a/tests/queries/0_stateless/02174_cte_scalar_cache.sql +++ b/tests/queries/0_stateless/02174_cte_scalar_cache.sql @@ -6,7 +6,7 @@ WITH ( SELECT sleep(0.0001) FROM system.one ) as a3, ( SELECT sleep(0.0001) FROM system.one ) as a4, ( SELECT sleep(0.0001) FROM system.one ) as a5 -SELECT '02177_CTE_GLOBAL_ON', a5 FROM system.numbers LIMIT 100 +SELECT '02177_CTE_GLOBAL_ON', a1, a2, a3, a4, a5 FROM system.numbers LIMIT 100 FORMAT Null SETTINGS enable_global_with_statement = 1; @@ -16,17 +16,17 @@ WITH ( SELECT sleep(0.0001) FROM system.one ) as a3, ( SELECT sleep(0.0001) FROM system.one ) as a4, ( SELECT sleep(0.0001) FROM system.one ) as a5 -SELECT '02177_CTE_GLOBAL_OFF', a5 FROM system.numbers LIMIT 100 +SELECT '02177_CTE_GLOBAL_OFF', a1, a2, a3, a4, a5 FROM system.numbers LIMIT 100 FORMAT Null SETTINGS enable_global_with_statement = 0; WITH - ( SELECT sleep(0.0001) FROM system.one ), - ( SELECT sleep(0.0001) FROM system.one ), - ( SELECT sleep(0.0001) FROM system.one ), - ( SELECT sleep(0.0001) FROM system.one ), + ( SELECT sleep(0.0001) FROM system.one ) as a1, + ( SELECT sleep(0.0001) FROM system.one ) as a2, + ( SELECT sleep(0.0001) FROM system.one ) as a3, + ( SELECT sleep(0.0001) FROM system.one ) as a4, ( SELECT sleep(0.0001) FROM system.one ) as a5 -SELECT '02177_CTE_NEW_ANALYZER', a5 FROM system.numbers LIMIT 100 +SELECT '02177_CTE_NEW_ANALYZER', a1, a2, a3, a4, a5 FROM system.numbers LIMIT 100 FORMAT Null SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/02177_issue_31009.sql b/tests/queries/0_stateless/02177_issue_31009.sql index 280627954d9..f25df59f4b4 100644 --- a/tests/queries/0_stateless/02177_issue_31009.sql +++ b/tests/queries/0_stateless/02177_issue_31009.sql @@ -5,8 +5,8 @@ SET max_threads=0; DROP TABLE IF EXISTS left; DROP TABLE IF EXISTS right; -CREATE TABLE left ( key UInt32, value String ) ENGINE = MergeTree ORDER BY key; -CREATE TABLE right ( key UInt32, value String ) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE left ( key UInt32, value String ) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +CREATE TABLE right ( key UInt32, value String ) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO left SELECT number, toString(number) FROM numbers(25367182); INSERT INTO right SELECT number, toString(number) FROM numbers(23124707); diff --git a/tests/queries/0_stateless/02206_information_schema_show_database.reference b/tests/queries/0_stateless/02206_information_schema_show_database.reference index 821fddbb933..0cf7913e28e 100644 --- a/tests/queries/0_stateless/02206_information_schema_show_database.reference +++ b/tests/queries/0_stateless/02206_information_schema_show_database.reference @@ -1,4 +1,6 @@ CREATE DATABASE INFORMATION_SCHEMA\nENGINE = Memory CREATE VIEW INFORMATION_SCHEMA.COLUMNS\n(\n `table_catalog` String,\n `table_schema` String,\n `table_name` String,\n `TABLE_SCHEMA` String,\n `TABLE_NAME` String,\n `column_name` String,\n `ordinal_position` UInt64,\n `column_default` String,\n `is_nullable` String,\n `data_type` String,\n `character_maximum_length` Nullable(UInt64),\n `character_octet_length` Nullable(UInt64),\n `numeric_precision` Nullable(UInt64),\n `numeric_precision_radix` Nullable(UInt64),\n `numeric_scale` Nullable(UInt64),\n `datetime_precision` Nullable(UInt64),\n `character_set_catalog` Nullable(String),\n `character_set_schema` Nullable(String),\n `character_set_name` Nullable(String),\n `collation_catalog` Nullable(String),\n `collation_schema` Nullable(String),\n `collation_name` Nullable(String),\n `domain_catalog` Nullable(String),\n `domain_schema` Nullable(String),\n `domain_name` Nullable(String),\n `column_comment` String,\n `column_type` String,\n `TABLE_CATALOG` String ALIAS table_catalog,\n `COLUMN_NAME` String ALIAS column_name,\n `ORDINAL_POSITION` UInt64 ALIAS ordinal_position,\n `COLUMN_DEFAULT` String ALIAS column_default,\n `IS_NULLABLE` String ALIAS is_nullable,\n `DATA_TYPE` String ALIAS data_type,\n `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64) ALIAS character_maximum_length,\n `CHARACTER_OCTET_LENGTH` Nullable(UInt64) ALIAS character_octet_length,\n `NUMERIC_PRECISION` Nullable(UInt64) ALIAS numeric_precision,\n `NUMERIC_PRECISION_RADIX` Nullable(UInt64) ALIAS numeric_precision_radix,\n `NUMERIC_SCALE` Nullable(UInt64) ALIAS numeric_scale,\n `DATETIME_PRECISION` Nullable(UInt64) ALIAS datetime_precision,\n `CHARACTER_SET_CATALOG` Nullable(String) ALIAS character_set_catalog,\n `CHARACTER_SET_SCHEMA` Nullable(String) ALIAS character_set_schema,\n `CHARACTER_SET_NAME` Nullable(String) ALIAS character_set_name,\n `COLLATION_CATALOG` Nullable(String) ALIAS collation_catalog,\n `COLLATION_SCHEMA` Nullable(String) ALIAS collation_schema,\n `COLLATION_NAME` Nullable(String) ALIAS collation_name,\n `DOMAIN_CATALOG` Nullable(String) ALIAS domain_catalog,\n `DOMAIN_SCHEMA` Nullable(String) ALIAS domain_schema,\n `DOMAIN_NAME` Nullable(String) ALIAS domain_name,\n `COLUMN_COMMENT` String ALIAS column_comment,\n `COLUMN_TYPE` String ALIAS column_type\n) AS\nSELECT\n database AS table_catalog,\n database AS table_schema,\n database AS TABLE_SCHEMA,\n table AS table_name,\n table AS TABLE_NAME,\n name AS column_name,\n position AS ordinal_position,\n default_expression AS column_default,\n type LIKE \'Nullable(%)\' AS is_nullable,\n type AS data_type,\n character_octet_length AS character_maximum_length,\n character_octet_length,\n numeric_precision,\n numeric_precision_radix,\n numeric_scale,\n datetime_precision,\n NULL AS character_set_catalog,\n NULL AS character_set_schema,\n NULL AS character_set_name,\n NULL AS collation_catalog,\n NULL AS collation_schema,\n NULL AS collation_name,\n NULL AS domain_catalog,\n NULL AS domain_schema,\n NULL AS domain_name,\n comment AS column_comment,\n type AS column_type\nFROM system.columns CREATE VIEW INFORMATION_SCHEMA.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables +CREATE VIEW INFORMATION_SCHEMA.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables +CREATE VIEW information_schema.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables CREATE VIEW information_schema.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables diff --git a/tests/queries/0_stateless/02210_processors_profile_log.reference b/tests/queries/0_stateless/02210_processors_profile_log.reference index 41543d0706a..f480236111f 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.reference +++ b/tests/queries/0_stateless/02210_processors_profile_log.reference @@ -38,4 +38,5 @@ LazyOutputFormat 1 1 1 0 0 LimitsCheckingTransform 1 1 1 1 1 NullSource 1 0 0 0 0 NullSource 1 0 0 0 0 +NullSource 0 0 0 0 0 SourceFromSingleChunk 1 0 0 1 1 diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh index 26646bd91a0..9cb4fb939e7 100755 --- a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-upgrade-check +# Tags: no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02231_bloom_filter_sizing.reference b/tests/queries/0_stateless/02231_bloom_filter_sizing.reference new file mode 100644 index 00000000000..bdba311c092 --- /dev/null +++ b/tests/queries/0_stateless/02231_bloom_filter_sizing.reference @@ -0,0 +1,6 @@ +Bloom filter on sort key +10000 +0 +Bloom filter on non-sort key +10000 +0 diff --git a/tests/queries/0_stateless/02231_bloom_filter_sizing.sql b/tests/queries/0_stateless/02231_bloom_filter_sizing.sql new file mode 100644 index 00000000000..233e3111067 --- /dev/null +++ b/tests/queries/0_stateless/02231_bloom_filter_sizing.sql @@ -0,0 +1,53 @@ +SELECT 'Bloom filter on sort key'; +DROP TABLE IF EXISTS bloom_filter_sizing_pk; +CREATE TABLE bloom_filter_sizing_pk( + key UInt64, + value UInt64, + + -- Very high granularity to have one filter per part. + INDEX key_bf key TYPE bloom_filter(0.01) GRANULARITY 2147483648 +) ENGINE=MergeTree ORDER BY key; + +INSERT INTO bloom_filter_sizing_pk +SELECT +number % 100 as key, -- 100 unique keys +number as value -- whatever +FROM numbers(1000 * 1000); + +-- +-- Merge everything into a single part +-- +OPTIMIZE TABLE bloom_filter_sizing_pk FINAL; + +SELECT COUNT() from bloom_filter_sizing_pk WHERE key = 1; + +-- Check bloom filter size. According to https://hur.st/bloomfilter/?n=100&p=0.01 for 100 keys it should be less that 200B +SELECT COUNT() from system.parts where database = currentDatabase() AND table = 'bloom_filter_sizing_pk' and secondary_indices_uncompressed_bytes > 200 and active; + +SELECT 'Bloom filter on non-sort key'; +DROP TABLE IF EXISTS bloom_filter_sizing_sec; +CREATE TABLE bloom_filter_sizing_sec( + key1 UInt64, + key2 UInt64, + value UInt64, + + -- Very high granularity to have one filter per part. + INDEX key_bf key2 TYPE bloom_filter(0.01) GRANULARITY 2147483648 +) ENGINE=MergeTree ORDER BY key1; + +INSERT INTO bloom_filter_sizing_sec +SELECT +number % 100 as key1, -- 100 unique keys +rand() % 100 as key2, -- 100 unique keys +number as value -- whatever +FROM numbers(1000 * 1000); + +-- +-- Merge everything into a single part +-- +OPTIMIZE TABLE bloom_filter_sizing_sec FINAL; + +SELECT COUNT() from bloom_filter_sizing_sec WHERE key1 = 1; + +-- Check bloom filter size. According to https://hur.st/bloomfilter/?n=100&p=0.01 for 100 keys it should be less that 200B +SELECT COUNT() from system.parts where database = currentDatabase() AND table = 'bloom_filter_sizing_sec' and secondary_indices_uncompressed_bytes > 200 and active; diff --git a/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.reference b/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.reference index f98effbec67..d6c1c5d3fc4 100644 --- a/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.reference +++ b/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.reference @@ -19,7 +19,7 @@ ExpressionTransform × 2 (Expression) ExpressionTransform (ReadFromMergeTree) - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 explain pipeline select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1, optimize_aggregation_in_order=0, read_in_order_two_level_merge_threshold=1; (Expression) ExpressionTransform × 2 @@ -36,7 +36,7 @@ ExpressionTransform × 2 (Expression) ExpressionTransform (ReadFromMergeTree) - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1, optimize_aggregation_in_order=1; 0 0 4 0 1 3 diff --git a/tests/queries/0_stateless/02233_set_enable_with_statement_cte_perf.sql b/tests/queries/0_stateless/02233_set_enable_with_statement_cte_perf.sql index 71321b4dfe4..3b474369c98 100644 --- a/tests/queries/0_stateless/02233_set_enable_with_statement_cte_perf.sql +++ b/tests/queries/0_stateless/02233_set_enable_with_statement_cte_perf.sql @@ -1,8 +1,8 @@ DROP TABLE IF EXISTS ev; DROP TABLE IF EXISTS idx; -CREATE TABLE ev (a Int32, b Int32) Engine=MergeTree() ORDER BY a; -CREATE TABLE idx (a Int32) Engine=MergeTree() ORDER BY a; +CREATE TABLE ev (a Int32, b Int32) Engine=MergeTree() ORDER BY a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +CREATE TABLE idx (a Int32) Engine=MergeTree() ORDER BY a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO ev SELECT number, number FROM numbers(10000000); INSERT INTO idx SELECT number * 5 FROM numbers(1000); diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql index 1de6447172d..dc8fceddc52 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql @@ -12,7 +12,8 @@ CREATE TABLE t_1 ) ENGINE = MergeTree PARTITION BY toYYYYMM(p_time) -ORDER BY order_0; +ORDER BY order_0 +SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE t_random_1 ( diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference index eb6c9305ebd..6b79d7bfcab 100644 --- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference @@ -12,9 +12,9 @@ SETTINGS min_bytes_for_wide_part = 10485760, disk = disk( type = cache, max_size = '128Mi', - path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', - enable_bypass_cache_with_threashold = 1, - bypass_cache_threashold = 100, + path = '${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + enable_bypass_cache_with_threshold = 1, + bypass_cache_threshold = 100, delayed_cleanup_interval_ms = 100, disk = 's3_disk'); INSERT INTO test SELECT number, toString(number) FROM numbers(100); diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql index 8871f8655dd..094ef5414ba 100644 --- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql @@ -15,9 +15,9 @@ SETTINGS min_bytes_for_wide_part = 10485760, disk = disk( type = cache, max_size = '128Mi', - path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', - enable_bypass_cache_with_threashold = 1, - bypass_cache_threashold = 100, + path = '${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + enable_bypass_cache_with_threshold = 1, + bypass_cache_threshold = 100, delayed_cleanup_interval_ms = 100, disk = 's3_disk'); diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.reference b/tests/queries/0_stateless/02240_filesystem_query_cache.reference index 26340c271e1..b06ab436dd8 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.reference +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.reference @@ -14,7 +14,7 @@ SETTINGS min_bytes_for_wide_part = 10485760, disk = disk( type = cache, max_size = '128Mi', - path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + path = '${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', cache_on_write_operations= 1, enable_filesystem_query_cache_limit = 1, delayed_cleanup_interval_ms = 100, diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql index d85b3f543e1..b1a64cb47cf 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.sql +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.sql @@ -17,7 +17,7 @@ SETTINGS min_bytes_for_wide_part = 10485760, disk = disk( type = cache, max_size = '128Mi', - path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + path = '${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', cache_on_write_operations= 1, enable_filesystem_query_cache_limit = 1, delayed_cleanup_interval_ms = 100, diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference index b3b7d12d219..c6099b96d31 100644 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference @@ -24,8 +24,8 @@ state: DOWNLOADED 8 2014 8 2014 24 84045 -35 168815 -44 252113 +32 167243 +41 250541 5010500 18816 Using storage policy: local_cache @@ -54,7 +54,7 @@ state: DOWNLOADED 8 2014 8 2014 24 84045 -35 168815 -44 252113 +32 167243 +41 250541 5010500 18816 diff --git a/tests/queries/0_stateless/02242_delete_user_race.sh b/tests/queries/0_stateless/02242_delete_user_race.sh index 8f387333c33..2af54276469 100755 --- a/tests/queries/0_stateless/02242_delete_user_race.sh +++ b/tests/queries/0_stateless/02242_delete_user_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-fasttest, no-parallel, no-upgrade-check +# Tags: race, no-fasttest, no-parallel # Test tries to reproduce a race between threads: # - deletes user diff --git a/tests/queries/0_stateless/02243_drop_user_grant_race.sh b/tests/queries/0_stateless/02243_drop_user_grant_race.sh index 46ad776006e..4dce8e8124c 100755 --- a/tests/queries/0_stateless/02243_drop_user_grant_race.sh +++ b/tests/queries/0_stateless/02243_drop_user_grant_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-fasttest, no-parallel, no-upgrade-check +# Tags: race, no-fasttest, no-parallel set -e diff --git a/tests/queries/0_stateless/02244_make_datetime.sql b/tests/queries/0_stateless/02244_make_datetime.sql index 9b8f561994b..a3d88d89682 100644 --- a/tests/queries/0_stateless/02244_make_datetime.sql +++ b/tests/queries/0_stateless/02244_make_datetime.sql @@ -20,7 +20,7 @@ select makeDateTime(1984, 1, 41, 0, 0, 0, 'UTC'); select makeDateTime(1984, 1, 1, 25, 0, 0, 'UTC'); select makeDateTime(1984, 1, 1, 0, 70, 0, 'UTC'); select makeDateTime(1984, 1, 1, 0, 0, 70, 'UTC'); -select makeDateTime(1984, 1, 1, 0, 0, 0, 'not a timezone'); -- { serverError 1000 } +select makeDateTime(1984, 1, 1, 0, 0, 0, 'not a timezone'); -- { serverError BAD_ARGUMENTS } select makeDateTime(1984, 1, 1, 0, 0, 0, 'UTC'); select makeDateTime(1983, 2, 29, 0, 0, 0, 'UTC'); diff --git a/tests/queries/0_stateless/02245_make_datetime64.sql b/tests/queries/0_stateless/02245_make_datetime64.sql index 054cbd8956d..a7b3a3d23c5 100644 --- a/tests/queries/0_stateless/02245_make_datetime64.sql +++ b/tests/queries/0_stateless/02245_make_datetime64.sql @@ -38,7 +38,7 @@ select makeDateTime64(1984, 1, 41, 0, 0, 0, 0, 9, 'UTC'); select makeDateTime64(1984, 1, 1, 25, 0, 0, 0, 9, 'UTC'); select makeDateTime64(1984, 1, 1, 0, 70, 0, 0, 9, 'UTC'); select makeDateTime64(1984, 1, 1, 0, 0, 70, 0, 9, 'UTC'); -select makeDateTime64(1984, 1, 1, 0, 0, 0, 0, 9, 'not a timezone'); -- { serverError 1000 } +select makeDateTime64(1984, 1, 1, 0, 0, 0, 0, 9, 'not a timezone'); -- { serverError BAD_ARGUMENTS } select makeDateTime64(1984, 1, 1, 2, 3, 4, 5, 9, 'UTC'); select makeDateTime64(1984, 2, 29, 2, 3, 4, 5, 9, 'UTC'); diff --git a/tests/queries/0_stateless/02250_ON_CLUSTER_grant.reference b/tests/queries/0_stateless/02250_ON_CLUSTER_grant.reference index d43b658d5d4..f78c46e54f4 100644 --- a/tests/queries/0_stateless/02250_ON_CLUSTER_grant.reference +++ b/tests/queries/0_stateless/02250_ON_CLUSTER_grant.reference @@ -1,3 +1,3 @@ with_on_cluster_02250_ON_CLUSTER_grant_default without_on_cluster_02250_ON_CLUSTER_grant_default -Not enough privileges. To execute this query it's necessary to have grant CLUSTER ON *.*. (ACCESS_DENIED) +Not enough privileges. To execute this query, it's necessary to have the grant CLUSTER ON *.*. (ACCESS_DENIED) diff --git a/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh b/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh index 6129d0d39c6..66417e9694a 100755 --- a/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh +++ b/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh @@ -27,5 +27,5 @@ echo "with_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME" $CLICKHOUSE_CLIENT --user "with_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME" -q "CREATE DATABASE IF NOT EXISTS db_with_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME ON CLUSTER test_shard_localhost" >/dev/null echo "without_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME" $CLICKHOUSE_CLIENT --user "without_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME" -q "CREATE DATABASE IF NOT EXISTS db_without_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME ON CLUSTER test_shard_localhost" |& { - grep -m1 -F -o "Not enough privileges. To execute this query it's necessary to have grant CLUSTER ON *.*. (ACCESS_DENIED)" + grep -m1 -F -o "Not enough privileges. To execute this query, it's necessary to have the grant CLUSTER ON *.*. (ACCESS_DENIED)" } diff --git a/tests/queries/0_stateless/02253_empty_part_checksums.reference b/tests/queries/0_stateless/02253_empty_part_checksums.reference new file mode 100644 index 00000000000..65a8c9ee65e --- /dev/null +++ b/tests/queries/0_stateless/02253_empty_part_checksums.reference @@ -0,0 +1,8 @@ +1 +1000 +0 +0 +0 +1 +0 +0_0_0_0 Wide 370db59d5dcaef5d762b11d319c368c7 514a8be2dac94fd039dbd230065e58a4 b324ada5cd6bb14402c1e59200bd003a diff --git a/tests/queries/0_stateless/02253_empty_part_checksums.sh b/tests/queries/0_stateless/02253_empty_part_checksums.sh new file mode 100755 index 00000000000..5d4e750f46d --- /dev/null +++ b/tests/queries/0_stateless/02253_empty_part_checksums.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Tags: zookeeper, no-replicated-database +# no-replicated-database because it adds extra replicas + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists rmt sync;" +$CLICKHOUSE_CLIENT -q "CREATE TABLE rmt (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) +ENGINE = ReplicatedMergeTree('/test/02253/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', '1') ORDER BY a PARTITION BY b % 10 +SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, +cleanup_thread_preferred_points_per_iteration=0, min_bytes_for_wide_part=0, remove_empty_parts=0" + +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "INSERT INTO rmt SELECT rand(1), 0, 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(1000);" + +$CLICKHOUSE_CLIENT -q "check table rmt" +$CLICKHOUSE_CLIENT -q "select count() from rmt" + +path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLICKHOUSE_DATABASE' and table='rmt' and name='0_0_0_0'") +# ensure that path is absolute before removing +$CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit +rm -rf "$path" + +# detach the broken part, replace it with empty one +$CLICKHOUSE_CLIENT -q "check table rmt" 2>/dev/null +$CLICKHOUSE_CLIENT -q "select count() from rmt" + +$CLICKHOUSE_CLIENT --receive_timeout=60 -q "system sync replica rmt" + +# the empty part should pass the check +$CLICKHOUSE_CLIENT -q "check table rmt" +$CLICKHOUSE_CLIENT -q "select count() from rmt" + +$CLICKHOUSE_CLIENT -q "select name, part_type, hash_of_all_files, hash_of_uncompressed_files, uncompressed_hash_of_compressed_files from system.parts where database=currentDatabase()" + +$CLICKHOUSE_CLIENT -q "drop table rmt sync;" diff --git a/tests/queries/0_stateless/02263_format_insert_settings.reference b/tests/queries/0_stateless/02263_format_insert_settings.reference index 721e7960875..e2d1ec3980e 100644 --- a/tests/queries/0_stateless/02263_format_insert_settings.reference +++ b/tests/queries/0_stateless/02263_format_insert_settings.reference @@ -1,6 +1,6 @@ -insert into foo settings max_threads=1 +[multi] insert into foo settings max_threads=1 Syntax error (query): failed at position 40 (end of query): -insert into foo format tsv settings max_threads=1 +[multi] insert into foo format tsv settings max_threads=1 Can't format ASTInsertQuery with data, since data will be lost. [multi] insert into foo format tsv settings max_threads=1 INSERT INTO foo @@ -8,7 +8,7 @@ SETTINGS max_threads = 1 FORMAT tsv [oneline] insert into foo format tsv settings max_threads=1 INSERT INTO foo SETTINGS max_threads = 1 FORMAT tsv -insert into foo settings max_threads=1 format tsv settings max_threads=1 +[multi] insert into foo settings max_threads=1 format tsv settings max_threads=1 You have SETTINGS before and after FORMAT Cannot parse input: expected '\n' before: 'settings max_threads=1 1' 1 diff --git a/tests/queries/0_stateless/02263_format_insert_settings.sh b/tests/queries/0_stateless/02263_format_insert_settings.sh index 3d5f780a38c..8b156ffec83 100755 --- a/tests/queries/0_stateless/02263_format_insert_settings.sh +++ b/tests/queries/0_stateless/02263_format_insert_settings.sh @@ -8,7 +8,7 @@ function run_format() { local q="$1" && shift - echo "$q" + echo "[multi] $q" $CLICKHOUSE_FORMAT "$@" <<<"$q" } function run_format_both() @@ -22,18 +22,20 @@ function run_format_both() } # NOTE: that those queries may work slow, due to stack trace obtaining -run_format 'insert into foo settings max_threads=1' 2> >(grep -m1 -o "Syntax error (query): failed at position .* (end of query):") +run_format 'insert into foo settings max_threads=1' |& grep --max-count 2 --only-matching -e "Syntax error (query): failed at position .* (end of query):" -e '^\[.*$' + # compatibility -run_format 'insert into foo format tsv settings max_threads=1' 2> >(grep -m1 -F -o "Can't format ASTInsertQuery with data, since data will be lost.") +run_format 'insert into foo format tsv settings max_threads=1' |& grep --max-count 2 --only-matching -e "Can't format ASTInsertQuery with data, since data will be lost." -e '^\[.*$' run_format_both 'insert into foo format tsv settings max_threads=1' --allow_settings_after_format_in_insert -run_format 'insert into foo settings max_threads=1 format tsv settings max_threads=1' --allow_settings_after_format_in_insert 2> >(grep -m1 -F -o "You have SETTINGS before and after FORMAT") +run_format 'insert into foo settings max_threads=1 format tsv settings max_threads=1' --allow_settings_after_format_in_insert |& grep --max-count 2 --only-matching -e "You have SETTINGS before and after FORMAT" -e '^\[.*$' + # and via server (since this is a separate code path) $CLICKHOUSE_CLIENT -q 'drop table if exists data_02263' $CLICKHOUSE_CLIENT -q 'create table data_02263 (key Int) engine=Memory()' -$CLICKHOUSE_CLIENT -q 'insert into data_02263 format TSV settings max_threads=1 1' 2> >(grep -m1 -F -o "Cannot parse input: expected '\n' before: 'settings max_threads=1 1'") +$CLICKHOUSE_CLIENT -q 'insert into data_02263 format TSV settings max_threads=1 1' |& grep --max-count 1 -F --only-matching "Cannot parse input: expected '\n' before: 'settings max_threads=1 1'" $CLICKHOUSE_CLIENT --allow_settings_after_format_in_insert=1 -q 'insert into data_02263 format TSV settings max_threads=1 1' $CLICKHOUSE_CLIENT -q 'select * from data_02263' -$CLICKHOUSE_CLIENT --allow_settings_after_format_in_insert=1 -q 'insert into data_02263 settings max_threads=1 format tsv settings max_threads=1' 2> >(grep -m1 -F -o "You have SETTINGS before and after FORMAT") +$CLICKHOUSE_CLIENT --allow_settings_after_format_in_insert=1 -q 'insert into data_02263 settings max_threads=1 format tsv settings max_threads=1' |& grep --max-count 1 -F --only-matching "You have SETTINGS before and after FORMAT" $CLICKHOUSE_CLIENT -q 'drop table data_02263' run_format_both 'insert into foo values' diff --git a/tests/queries/0_stateless/02270_client_name.reference b/tests/queries/0_stateless/02270_client_name.reference index fbb2921010e..8d1f2863fad 100644 --- a/tests/queries/0_stateless/02270_client_name.reference +++ b/tests/queries/0_stateless/02270_client_name.reference @@ -1 +1 @@ -"ClickHouse" +"ClickHouse client" diff --git a/tests/queries/0_stateless/02273_full_sort_join.sql.j2 b/tests/queries/0_stateless/02273_full_sort_join.sql.j2 index 6b6aa53836e..2a13e11e116 100644 --- a/tests/queries/0_stateless/02273_full_sort_join.sql.j2 +++ b/tests/queries/0_stateless/02273_full_sort_join.sql.j2 @@ -1,6 +1,4 @@ --- Tags: long, no-upgrade-check - --- TODO(@vdimir): remove no-upgrade-check tag after https://github.com/ClickHouse/ClickHouse/pull/51737 is released +-- Tags: long, no-random-settings DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 b/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 index 621352f9c25..53fab9d6271 100644 --- a/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 +++ b/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 @@ -2,8 +2,8 @@ DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; -CREATE TABLE t1 (key UInt32, s String) ENGINE = MergeTree ORDER BY key; -CREATE TABLE t2 (key UInt32, s String) ENGINE = MergeTree ORDER BY key; +CREATE TABLE t1 (key UInt32, s String) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +CREATE TABLE t2 (key UInt32, s String) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; {% set ltable_size = 10000000 -%} {% set rtable_size = 1000000 -%} diff --git a/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql index 8e0fb4a55a0..7f7285d5472 100644 --- a/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql +++ b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check SET max_bytes_in_join = '100', join_algorithm = 'auto'; diff --git a/tests/queries/0_stateless/02306_window_move_row_number_fix.sql b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql index f73525f92be..96dd8f6176b 100644 --- a/tests/queries/0_stateless/02306_window_move_row_number_fix.sql +++ b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql @@ -1,2 +1 @@ --- Tags: no-upgrade-check SELECT nth_value(NULL, 1048577) OVER (Rows BETWEEN 1023 FOLLOWING AND UNBOUNDED FOLLOWING) diff --git a/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql b/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql index 08e8843f763..45390c0e8ef 100644 --- a/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql +++ b/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check -- https://github.com/ClickHouse/ClickHouse/issues/37561 diff --git a/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.reference b/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.reference index aa47d0d46d4..44e0be8e356 100644 --- a/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.reference +++ b/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.reference @@ -1,2 +1,4 @@ 0 0 +0 +0 diff --git a/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.sql b/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.sql index 6a5e4a0ae65..f0085b7660f 100644 --- a/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.sql +++ b/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.sql @@ -4,3 +4,7 @@ SET optimize_monotonous_functions_in_order_by = 1; SELECT * FROM cluster(test_cluster_two_shards_localhost, system, one) ORDER BY toDateTime(dummy); + +SELECT * +FROM cluster(test_cluster_two_shards_localhost) +ORDER BY toDateTime(dummy) diff --git a/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql b/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql index df20e5c42d4..cbf71f1d555 100644 --- a/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql +++ b/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check SET join_algorithm = 'partial_merge'; diff --git a/tests/queries/0_stateless/02316_const_string_intersact.sql b/tests/queries/0_stateless/02316_const_string_intersact.sql index 148d048952b..33629d2a56a 100644 --- a/tests/queries/0_stateless/02316_const_string_intersact.sql +++ b/tests/queries/0_stateless/02316_const_string_intersact.sql @@ -1,3 +1,2 @@ --- Tags: no-upgrade-check SELECT 'Play ClickHouse' InterSect SELECT 'Play ClickHouse' diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference index 0a123a2a50f..da07e94cead 100644 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference @@ -38,19 +38,20 @@ DistinctTransform DistinctTransform -- Check reading in order for distinct -- disabled, distinct columns match sorting key -MergeTreeThread +algorithm: Thread -- enabled, distinct columns match sorting key -MergeTreeInOrder +algorithm: InOrder -- enabled, distinct columns form prefix of sorting key -MergeTreeInOrder +algorithm: InOrder -- enabled, distinct columns DON't form prefix of sorting key -MergeTreeThread +algorithm: Thread -- enabled, distinct columns contains constant columns, non-const columns form prefix of sorting key -MergeTreeInOrder +algorithm: InOrder -- enabled, distinct columns contains constant columns, non-const columns match prefix of sorting key -MergeTreeInOrder +algorithm: InOrder -- enabled, only part of distinct columns form prefix of sorting key -MergeTreeThread +algorithm: Thread +=== disable new analyzer === -- enabled, check that sorting properties are propagated from ReadFromMergeTree till preliminary distinct Sorting (Stream): a ASC, b ASC Sorting (Stream): a ASC, b ASC @@ -80,3 +81,38 @@ Sorting (Stream): a DESC, b DESC Sorting (Stream): a ASC, b ASC Sorting (Stream): a ASC, b ASC Sorting (Stream): a ASC, b ASC +=== enable new analyzer === +-- enabled, check that sorting properties are propagated from ReadFromMergeTree till preliminary distinct +Sorting (Stream): a_1 ASC, b_0 ASC +Sorting (Stream): a_1 ASC, b_0 ASC +Sorting (Stream): a_1 ASC, b_0 ASC +Sorting (Stream): a_1 ASC, b ASC +-- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns +Sorting (Stream): a_1 ASC +Sorting (Stream): a_1 ASC +Sorting (Stream): a_1 ASC +Sorting (Stream): a ASC +-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization i.e. it contains columns from DISTINCT clause +Sorting (Stream): a_1 ASC, b_0 ASC +Sorting (Stream): a_1 ASC, b_0 ASC +Sorting (Stream): a_1 ASC, b_0 ASC +Sorting (Stream): a ASC, b ASC +-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization, but direction used from ORDER BY clause +Sorting (Stream): a_1 DESC, b_0 DESC +Sorting (Stream): a_1 DESC, b_0 DESC +Sorting (Stream): a_1 DESC, b_0 DESC +Sorting (Stream): a DESC, b DESC +-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (1), - it contains columns from ORDER BY clause +Sorting (Stream): a_0 ASC, b_1 ASC +Sorting (Stream): a_0 ASC, b_1 ASC +Sorting (Stream): a_0 ASC, b_1 ASC +Sorting (Stream): a ASC, b ASC +-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (2), - direction used from ORDER BY clause +Sorting (Stream): a_1 DESC, b_0 DESC +Sorting (Stream): a_1 DESC, b_0 DESC +Sorting (Stream): a_1 DESC, b_0 DESC +Sorting (Stream): a DESC, b DESC +-- enabled, check that disabling other 'read in order' optimizations do not disable distinct in order optimization +Sorting (Stream): a_0 ASC, b_1 ASC +Sorting (Stream): a_0 ASC, b_1 ASC +Sorting (Stream): a ASC, b ASC diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh index 46919ae49b2..07c7bc4af56 100755 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh @@ -12,8 +12,8 @@ GREP_DISTINCT="grep 'DistinctSortedChunkTransform\|DistinctSortedStreamTransform TRIM_LEADING_SPACES="sed -e 's/^[ \t]*//'" REMOVE_NON_LETTERS="sed 's/[^a-zA-Z]//g'" FIND_DISTINCT="$GREP_DISTINCT | $TRIM_LEADING_SPACES | $REMOVE_NON_LETTERS" -FIND_READING_IN_ORDER="grep 'MergeTreeInOrder' | $TRIM_LEADING_SPACES | $REMOVE_NON_LETTERS" -FIND_READING_DEFAULT="grep 'MergeTreeThread' | $TRIM_LEADING_SPACES | $REMOVE_NON_LETTERS" +FIND_READING_IN_ORDER="grep -o 'algorithm: InOrder' | $TRIM_LEADING_SPACES" +FIND_READING_DEFAULT="grep -o 'algorithm: Thread' | $TRIM_LEADING_SPACES" FIND_SORTING_PROPERTIES="grep 'Sorting (Stream)' | $TRIM_LEADING_SPACES" $CLICKHOUSE_CLIENT -q "drop table if exists distinct_in_order_explain sync" @@ -76,23 +76,46 @@ $CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTI echo "-- enabled, only part of distinct columns form prefix of sorting key" $CLICKHOUSE_CLIENT --max_threads=0 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain" | eval $FIND_READING_DEFAULT +echo "=== disable new analyzer ===" +DISABLE_ANALYZER="set allow_experimental_analyzer=0" + echo "-- enabled, check that sorting properties are propagated from ReadFromMergeTree till preliminary distinct" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain plan sorting=1 select distinct b, a from distinct_in_order_explain where a > 0" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;explain plan sorting=1 select distinct b, a from distinct_in_order_explain where a > 0" | eval $FIND_SORTING_PROPERTIES echo "-- check that reading in order optimization for ORDER BY and DISTINCT applied correctly in the same query" ENABLE_READ_IN_ORDER="set optimize_read_in_order=1" echo "-- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns" -$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$DISABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization i.e. it contains columns from DISTINCT clause" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization, but direction used from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (1), - it contains columns from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct a from distinct_in_order_explain order by a, b" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct a from distinct_in_order_explain order by a, b" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (2), - direction used from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC, b DESC" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC, b DESC" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that disabling other 'read in order' optimizations do not disable distinct in order optimization" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;set optimize_read_in_order=0;set optimize_aggregation_in_order=0;set optimize_read_in_window_order=0;explain plan sorting=1 select distinct a,b from distinct_in_order_explain" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;set optimize_read_in_order=0;set optimize_aggregation_in_order=0;set optimize_read_in_window_order=0;explain plan sorting=1 select distinct a,b from distinct_in_order_explain" | eval $FIND_SORTING_PROPERTIES + +echo "=== enable new analyzer ===" +ENABLE_ANALYZER="set allow_experimental_analyzer=1" + +echo "-- enabled, check that sorting properties are propagated from ReadFromMergeTree till preliminary distinct" +$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;explain plan sorting=1 select distinct b, a from distinct_in_order_explain where a > 0 settings optimize_move_to_prewhere=1" | eval $FIND_SORTING_PROPERTIES + +echo "-- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns" +$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$DISABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES +echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization i.e. it contains columns from DISTINCT clause" +$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES +echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization, but direction used from ORDER BY clause" +$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC" | eval $FIND_SORTING_PROPERTIES +echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (1), - it contains columns from ORDER BY clause" +$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct a from distinct_in_order_explain order by a, b" | eval $FIND_SORTING_PROPERTIES +echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (2), - direction used from ORDER BY clause" +$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC, b DESC" | eval $FIND_SORTING_PROPERTIES + +echo "-- enabled, check that disabling other 'read in order' optimizations do not disable distinct in order optimization" +$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;set optimize_read_in_order=0;set optimize_aggregation_in_order=0;set optimize_read_in_window_order=0;explain plan sorting=1 select distinct a,b from distinct_in_order_explain" | eval $FIND_SORTING_PROPERTIES $CLICKHOUSE_CLIENT -q "drop table if exists distinct_in_order_explain sync" diff --git a/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql b/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql index e6bc475b081..27f58dbff5e 100644 --- a/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql +++ b/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql @@ -4,7 +4,7 @@ DROP TABLE IF EXISTS test_table; CREATE TABLE test_table (`n` UInt64, `s` String) ENGINE = MergeTree -PRIMARY KEY n ORDER BY n; +PRIMARY KEY n ORDER BY n SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO test_table SELECT number, concat('some string ', CAST(number, 'String')) FROM numbers(1000000); diff --git a/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql b/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql index 734c597051e..1dd06a26894 100644 --- a/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql +++ b/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check select arrayMap(x -> toNullable(1), range(number)) from numbers(3); select arrayFilter(x -> toNullable(1), range(number)) from numbers(3); diff --git a/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh b/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh index 503b94be715..5bbe31c9237 100755 --- a/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh +++ b/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02336_sparse_columns_s3.sql b/tests/queries/0_stateless/02336_sparse_columns_s3.sql index 23512359728..bf4622adedc 100644 --- a/tests/queries/0_stateless/02336_sparse_columns_s3.sql +++ b/tests/queries/0_stateless/02336_sparse_columns_s3.sql @@ -5,7 +5,8 @@ DROP TABLE IF EXISTS t_sparse_s3; CREATE TABLE t_sparse_s3 (id UInt32, cond UInt8, s String) engine = MergeTree ORDER BY id settings ratio_of_defaults_for_sparse_serialization = 0.01, storage_policy = 's3_cache', -min_bytes_for_wide_part = 0, min_compress_block_size = 1; +min_bytes_for_wide_part = 0, min_compress_block_size = 1, +index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_sparse_s3 SELECT 1, number % 2, '' FROM numbers(8192); INSERT INTO t_sparse_s3 SELECT 2, number % 2, '' FROM numbers(24576); diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference index e225ce389cb..ae4fafae829 100644 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference @@ -1,2 +1,2 @@ -data_02340 1_2_2_0 5 -data_02340_rep 1_0_0_0 5 +data_02340 1_2_2_0 6 +data_02340_rep 1_0_0_0 6 diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.reference b/tests/queries/0_stateless/02343_aggregation_pipeline.reference index ca838fdf4e0..acb0368c3cc 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.reference +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.reference @@ -100,7 +100,7 @@ ExpressionTransform × 16 (Expression) ExpressionTransform (ReadFromMergeTree) - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (ReadFromRemote) explain pipeline SELECT k1, k3, sum(value) v FROM remote('127.0.0.{1,2}', currentDatabase(), proj_agg_02343) GROUP BY k1, k3 SETTINGS distributed_aggregation_memory_efficient = 1; (Expression) @@ -117,7 +117,7 @@ ExpressionTransform × 16 (Expression) ExpressionTransform (ReadFromMergeTree) - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (ReadFromRemote) -- { echoOn } @@ -144,6 +144,6 @@ ExpressionTransform × 2 (Expression) ExpressionTransform × 2 (ReadFromMergeTree) - MergeTreeThread × 2 0 → 1 + MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 2 0 → 1 (ReadFromRemote) (ReadFromRemote) diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.sql b/tests/queries/0_stateless/02343_aggregation_pipeline.sql index b018cf21f91..ee82c8d7c47 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.sql +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.sql @@ -5,6 +5,8 @@ set max_threads = 16; set prefer_localhost_replica = 1; set optimize_aggregation_in_order = 0; set max_block_size = 65505; +set allow_prefetched_read_pool_for_remote_filesystem = 0; +set allow_prefetched_read_pool_for_local_filesystem = 0; -- { echoOn } diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index da84cdabf79..5db1aa3e7f5 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1 +1 @@ -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 100 2 0 +134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache/ 2 0 diff --git a/tests/queries/0_stateless/02344_distinct_limit_distiributed.sql b/tests/queries/0_stateless/02344_distinct_limit_distiributed.sql index d0d9b130b7e..c963199e05c 100644 --- a/tests/queries/0_stateless/02344_distinct_limit_distiributed.sql +++ b/tests/queries/0_stateless/02344_distinct_limit_distiributed.sql @@ -1,7 +1,7 @@ drop table if exists t_distinct_limit; create table t_distinct_limit (d Date, id Int64) -engine = MergeTree partition by toYYYYMM(d) order by d; +engine = MergeTree partition by toYYYYMM(d) order by d SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; set max_threads = 10; diff --git a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql index 07f705acd84..35ec675b7c0 100644 --- a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql +++ b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check -- Regression for PartialSortingTransform optimization that requires at least 1500 rows. SELECT * FROM (SELECT * FROM (SELECT 0 a, toNullable(number) b, toString(number) c FROM numbers(1e6)) ORDER BY a DESC, b DESC, c LIMIT 1500) limit 10; diff --git a/tests/queries/0_stateless/02352_rwlock.sh b/tests/queries/0_stateless/02352_rwlock.sh index 7a0b9ef8911..7de2c7089b8 100755 --- a/tests/queries/0_stateless/02352_rwlock.sh +++ b/tests/queries/0_stateless/02352_rwlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-parallel, no-fasttest, no-debug # Tag no-parallel -- to avoid running it in parallel, this will avoid possible issues due to high pressure # Test that ensures that WRITE lock failure notifies READ. @@ -12,6 +12,11 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function wait_query_by_id_started() { + # As the query we are waiting for is running simultaneously, let's give it a little time to actually start. The + # queries are supposed to run for multiple seconds, so sleeping 0.5 seconds is not a big deal, especially when + # flushing the logs can take up to 3 to 5 seconds. Basically waiting a bit here we can increase the chance that we + # don't have spend precious time on flushing logs. + sleep 0.5 local query_id=$1 && shift # wait for query to be started while [ "$($CLICKHOUSE_CLIENT "$@" -q "select count() from system.processes where query_id = '$query_id'")" -ne 1 ]; do @@ -51,7 +56,7 @@ while :; do insert_query_id="insert-$(random_str 10)" # 20 seconds sleep - $CLICKHOUSE_CLIENT --query_id "$insert_query_id" -q "INSERT INTO ${CLICKHOUSE_DATABASE}_ordinary.data_02352 SELECT sleepEachRow(1) FROM numbers(20) GROUP BY number" & + $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 20000000 --query_id "$insert_query_id" -q "INSERT INTO ${CLICKHOUSE_DATABASE}_ordinary.data_02352 SELECT sleepEachRow(1) FROM numbers(20) GROUP BY number" & if ! wait_query_by_id_started "$insert_query_id"; then wait continue diff --git a/tests/queries/0_stateless/02354_annoy_index.reference b/tests/queries/0_stateless/02354_annoy_index.reference index 5e01a6e566e..a0ffb1e1f7f 100644 --- a/tests/queries/0_stateless/02354_annoy_index.reference +++ b/tests/queries/0_stateless/02354_annoy_index.reference @@ -94,7 +94,7 @@ Expression ((Projection + Before ORDER BY)) Name: annoy_index Description: annoy GRANULARITY 2 Parts: 0/1 - Granules: 2/4 + Granules: 0/4 ORDER BY type, L2Distance, check that index is used Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) @@ -110,7 +110,7 @@ Expression (Projection) Name: annoy_index Description: annoy GRANULARITY 2 Parts: 1/1 - Granules: 4/4 + Granules: 2/4 --- Test with Array, GRANULARITY = 4, index_granularity = 4 --- WHERE type, L2Distance, check that index is used Expression ((Projection + Before ORDER BY)) @@ -125,7 +125,7 @@ Expression ((Projection + Before ORDER BY)) Name: annoy_index Description: annoy GRANULARITY 4 Parts: 0/1 - Granules: 3/4 + Granules: 0/4 ORDER BY type, L2Distance, check that index is used Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) @@ -141,4 +141,9 @@ Expression (Projection) Name: annoy_index Description: annoy GRANULARITY 4 Parts: 1/1 - Granules: 4/4 + Granules: 1/4 +--- Test correctness of Annoy index with > 1 mark +1 [1,0,0,0] +9000 [9000,0,0,0] +1 (1,0,0,0) +9000 (9000,0,0,0) diff --git a/tests/queries/0_stateless/02354_annoy_index.sql b/tests/queries/0_stateless/02354_annoy_index.sql index fefb51dfcc9..eab7a62c5f0 100644 --- a/tests/queries/0_stateless/02354_annoy_index.sql +++ b/tests/queries/0_stateless/02354_annoy_index.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-upgrade-check +-- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database, no-asan SET allow_experimental_annoy_index = 1; SET allow_experimental_analyzer = 0; @@ -249,3 +249,35 @@ DROP TABLE tab; -- (*) Storage and search in Annoy indexes is inherently random. Tests which check for exact row matches would be unstable. Therefore, -- comment them out. + +SELECT '--- Test correctness of Annoy index with > 1 mark'; + +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX annoy_index vector TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; -- disable adaptive granularity due to bug +INSERT INTO tab SELECT number, [toFloat32(number), 0., 0., 0.] from numbers(10000); + +SELECT * +FROM tab +ORDER BY L2Distance(vector, [1.0, 0.0, 0.0, 0.0]) +LIMIT 1; + +SELECT * +FROM tab +ORDER BY L2Distance(vector, [9000.0, 0.0, 0.0, 0.0]) +LIMIT 1; + +DROP TABLE tab; + +CREATE TABLE tab(id Int32, vector Tuple(Float32, Float32, Float32, Float32), INDEX annoy_index vector TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; -- disable adaptive granularity due to bug +INSERT INTO tab SELECT number, (toFloat32(number), 0., 0., 0.) from numbers(10000); + +SELECT * +FROM tab +ORDER BY L2Distance(vector, (1.0, 0.0, 0.0, 0.0)) +LIMIT 1; + +SELECT * +FROM tab +ORDER BY L2Distance(vector, (9000.0, 0.0, 0.0, 0.0)) +LIMIT 1; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index c8ec40bb0a7..a5a3da82324 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; -create table t_2354_dist_with_external_aggr(a UInt64, b String, c FixedString(100)) engine = MergeTree order by tuple(); +create table t_2354_dist_with_external_aggr(a UInt64, b String, c FixedString(100)) engine = MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t_2354_dist_with_external_aggr select number, toString(number) as s, toFixedString(s, 100) from numbers_mt(5e7); @@ -16,13 +16,13 @@ set max_bytes_before_external_group_by = '2G', max_block_size = 65505; -- whole aggregation state of local aggregation uncompressed is 5.8G --- it is hard to provide an accurate estimation for memory usage, so 4G is just the actual value taken from the logs + delta +-- it is hard to provide an accurate estimation for memory usage, so 5G is just the actual value taken from the logs + delta -- also avoid using localhost, so the queries will go over separate connections -- (otherwise the memory usage for merge will be counted together with the localhost query) select a, b, c, sum(a) as s from remote('127.0.0.{2,3}', currentDatabase(), t_2354_dist_with_external_aggr) group by a, b, c format Null -settings max_memory_usage = '4Gi'; +settings max_memory_usage = '5Gi'; DROP TABLE t_2354_dist_with_external_aggr; diff --git a/tests/queries/0_stateless/02354_usearch_index.reference b/tests/queries/0_stateless/02354_usearch_index.reference new file mode 100644 index 00000000000..f966b5ce33f --- /dev/null +++ b/tests/queries/0_stateless/02354_usearch_index.reference @@ -0,0 +1,148 @@ +--- Negative tests --- +--- Test default GRANULARITY (should be 100 mio. for usearch)--- +CREATE TABLE default.tab\n(\n `id` Int32,\n `vector` Array(Float32),\n INDEX usearch_index vector TYPE usearch GRANULARITY 100000000\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `id` Int32,\n `vector` Array(Float32),\n INDEX usearch_index vector TYPE usearch GRANULARITY 100000000\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192 +--- Test with Array, GRANULARITY = 1, index_granularity = 5 --- +WHERE type, L2Distance, check that index is used +Expression ((Projection + Before ORDER BY)) + Limit (preliminary LIMIT (without OFFSET)) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 3/3 + Skip + Name: usearch_index + Description: usearch GRANULARITY 1 + Parts: 1/1 + Granules: 1/3 +ORDER BY type, L2Distance, check that index is used +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 3/3 + Skip + Name: usearch_index + Description: usearch GRANULARITY 1 + Parts: 1/1 + Granules: 3/3 +Reference ARRAYs with non-matching dimension are rejected +Special case: MaximumDistance is negative +WHERE type, L2Distance +Special case: setting max_limit_for_ann_queries +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 3/3 +--- Test with Tuple, GRANULARITY = 1, index_granularity = 5 --- +WHERE type, L2Distance, check that index is used +Expression ((Projection + Before ORDER BY)) + Limit (preliminary LIMIT (without OFFSET)) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 3/3 + Skip + Name: usearch_index + Description: usearch GRANULARITY 1 + Parts: 1/1 + Granules: 1/3 +ORDER BY type, L2Distance, check that index is used +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 3/3 + Skip + Name: usearch_index + Description: usearch GRANULARITY 1 + Parts: 1/1 + Granules: 3/3 +--- Test non-default metric (cosine distance) --- +--- Test with Array, GRANULARITY = 2, index_granularity = 4 --- +WHERE type, L2Distance, check that index is used +Expression ((Projection + Before ORDER BY)) + Limit (preliminary LIMIT (without OFFSET)) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 4/4 + Skip + Name: usearch_index + Description: usearch GRANULARITY 2 + Parts: 0/1 + Granules: 0/4 +ORDER BY type, L2Distance, check that index is used +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 4/4 + Skip + Name: usearch_index + Description: usearch GRANULARITY 2 + Parts: 1/1 + Granules: 2/4 +--- Test with Array, GRANULARITY = 4, index_granularity = 4 --- +WHERE type, L2Distance, check that index is used +Expression ((Projection + Before ORDER BY)) + Limit (preliminary LIMIT (without OFFSET)) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 4/4 + Skip + Name: usearch_index + Description: usearch GRANULARITY 4 + Parts: 0/1 + Granules: 0/4 +ORDER BY type, L2Distance, check that index is used +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 4/4 + Skip + Name: usearch_index + Description: usearch GRANULARITY 4 + Parts: 1/1 + Granules: 1/4 +--- Test correctness of Usearch index with > 1 mark +1 [1,0,0,0] +9000 [9000,0,0,0] +1 (1,0,0,0) +9000 (9000,0,0,0) diff --git a/tests/queries/0_stateless/02354_usearch_index.sql b/tests/queries/0_stateless/02354_usearch_index.sql new file mode 100644 index 00000000000..f771e2835fa --- /dev/null +++ b/tests/queries/0_stateless/02354_usearch_index.sql @@ -0,0 +1,262 @@ +-- Tags: no-fasttest +-- no-fasttest because needs usearch lib + +SET allow_experimental_usearch_index = 1; +SET allow_experimental_analyzer = 0; + +SELECT '--- Negative tests ---'; + +DROP TABLE IF EXISTS tab; + +-- must have at most 1 arguments +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch('too', 'many')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } + +-- first argument (distance_function) must be String +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } + +-- must be created on single column +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index (vector, id) TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS } + +-- reject unsupported distance functions +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch('wormholeDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA } + +-- must be created on Array/Tuple(Float32) columns +SET allow_suspicious_low_cardinality_types = 1; +CREATE TABLE tab(id Int32, vector Float32, INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } +CREATE TABLE tab(id Int32, vector Array(Float64), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } +CREATE TABLE tab(id Int32, vector Tuple(Float64), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } +CREATE TABLE tab(id Int32, vector LowCardinality(Float32), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } +CREATE TABLE tab(id Int32, vector Nullable(Float32), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } + +SELECT '--- Test default GRANULARITY (should be 100 mio. for usearch)---'; + +CREATE TABLE tab (id Int32, vector Array(Float32), INDEX usearch_index(vector) TYPE usearch) ENGINE=MergeTree ORDER BY id; +SHOW CREATE TABLE tab; +DROP TABLE tab; + +CREATE TABLE tab (id Int32, vector Array(Float32)) ENGINE=MergeTree ORDER BY id; +ALTER TABLE tab ADD INDEX usearch_index(vector) TYPE usearch; +SHOW CREATE TABLE tab; + +DROP TABLE tab; + + +SELECT '--- Test with Array, GRANULARITY = 1, index_granularity = 5 ---'; + +DROP TABLE IF EXISTS tab; +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch() GRANULARITY 1) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 5; +INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]); + +-- rows = 15, index_granularity = 5, GRANULARITY = 1 gives 3 usearch-indexed blocks (each comprising a single granule) +-- condition 'L2Distance(vector, reference_vector) < 1.0' ensures that only one usearch-indexed block produces results --> "Granules: 1/3" + +-- SELECT 'WHERE type, L2Distance'; +-- SELECT * +-- FROM tab +-- WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0 +-- LIMIT 3; + +SELECT 'WHERE type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0 +LIMIT 3; + +-- SELECT 'ORDER BY type, L2Distance'; +-- SELECT * +-- FROM tab +-- ORDER BY L2Distance(vector, [0.0, 0.0, 10.0]) +-- LIMIT 3; + +SELECT 'ORDER BY type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +ORDER BY L2Distance(vector, [0.0, 0.0, 10.0]) +LIMIT 3; + +-- Test special cases. Corresponding special case tests are omitted from later tests. + +SELECT 'Reference ARRAYs with non-matching dimension are rejected'; +SELECT * +FROM tab +ORDER BY L2Distance(vector, [0.0, 0.0]) +LIMIT 3; -- { serverError INCORRECT_QUERY } + +SELECT 'Special case: MaximumDistance is negative'; +SELECT 'WHERE type, L2Distance'; +SELECT * +FROM tab +WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < -1.0 +LIMIT 3; -- { serverError INCORRECT_QUERY } + +SELECT 'Special case: setting max_limit_for_ann_queries'; +EXPLAIN indexes=1 +SELECT * +FROM tab +ORDER BY L2Distance(vector, [5.3, 7.3, 2.1]) +LIMIT 3 +SETTINGS max_limit_for_ann_queries=2; -- doesn't use the ann index + +DROP TABLE tab; + +-- Test Tuple embeddings. Triggers different logic than Array inside MergeTreeIndexUSearch but the same logic as Array above MergeTreeIndexusearch. +-- Therefore test Tuple case just once. + +SELECT '--- Test with Tuple, GRANULARITY = 1, index_granularity = 5 ---'; + +CREATE TABLE tab(id Int32, vector Tuple(Float32, Float32, Float32), INDEX usearch_index vector TYPE usearch() GRANULARITY 1) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 5; +INSERT INTO tab VALUES (1, (0.0, 0.0, 10.0)), (2, (0.0, 0.0, 10.5)), (3, (0.0, 0.0, 9.5)), (4, (0.0, 0.0, 9.7)), (5, (0.0, 0.0, 10.2)), (6, (10.0, 0.0, 0.0)), (7, (9.5, 0.0, 0.0)), (8, (9.7, 0.0, 0.0)), (9, (10.2, 0.0, 0.0)), (10, (10.5, 0.0, 0.0)), (11, (0.0, 10.0, 0.0)), (12, (0.0, 9.5, 0.0)), (13, (0.0, 9.7, 0.0)), (14, (0.0, 10.2, 0.0)), (15, (0.0, 10.5, 0.0)); + +-- SELECT 'WHERE type, L2Distance'; +-- SELECT * +-- FROM tab +-- WHERE L2Distance(vector, (0.0, 0.0, 10.0)) < 1.0 +-- LIMIT 3; + +SELECT 'WHERE type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +WHERE L2Distance(vector, (0.0, 0.0, 10.0)) < 1.0 +LIMIT 3; + +-- SELECT 'ORDER BY type, L2Distance'; +-- SELECT * +-- FROM tab +-- ORDER BY L2Distance(vector, (0.0, 0.0, 10.0)) +-- LIMIT 3; + +SELECT 'ORDER BY type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +ORDER BY L2Distance(vector, (0.0, 0.0, 10.0)) +LIMIT 3; + +DROP TABLE tab; + +-- Not a systematic test, just to make sure no bad things happen +SELECT '--- Test non-default metric (cosine distance) ---'; + +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch('cosineDistance') GRANULARITY 1) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 5; +INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]); + +-- SELECT 'WHERE type, L2Distance'; +-- SELECT * +-- FROM tab +-- WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0 +-- LIMIT 3; + +-- SELECT 'ORDER BY type, L2Distance'; +-- SELECT * +-- FROM tab +-- ORDER BY L2Distance(vector, [0.0, 0.0, 10.0]) +-- LIMIT 3; + +DROP TABLE tab; + +SELECT '--- Test with Array, GRANULARITY = 2, index_granularity = 4 ---'; + +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 4; +INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0, 0.0]), (2, [0.0, 0.0, 10.5, 0.0]), (3, [0.0, 0.0, 9.5, 0.0]), (4, [0.0, 0.0, 9.7, 0.0]), (5, [10.0, 0.0, 0.0, 0.0]), (6, [9.5, 0.0, 0.0, 0.0]), (7, [9.7, 0.0, 0.0, 0.0]), (8, [10.2, 0.0, 0.0, 0.0]), (9, [0.0, 10.0, 0.0, 0.0]), (10, [0.0, 9.5, 0.0, 0.0]), (11, [0.0, 9.7, 0.0, 0.0]), (12, [0.0, 9.7, 0.0, 0.0]), (13, [0.0, 0.0, 0.0, 10.3]), (14, [0.0, 0.0, 0.0, 9.5]), (15, [0.0, 0.0, 0.0, 10.0]), (16, [0.0, 0.0, 0.0, 10.5]); + +-- rows = 16, index_granularity = 4, GRANULARITY = 2 gives 2 usearch-indexed blocks (each comprising two granules) +-- condition 'L2Distance(vector, reference_vector) < 1.0' ensures that only one usearch-indexed block produces results --> "Granules: 2/4" + +-- SELECT 'WHERE type, L2Distance'; +-- SELECT * +-- FROM tab +-- WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0 +-- LIMIT 3; + +SELECT 'WHERE type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0 +LIMIT 3; + +-- SELECT 'ORDER BY type, L2Distance'; +-- SELECT * +-- FROM tab +-- ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) +-- LIMIT 3; + +SELECT 'ORDER BY type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) +LIMIT 3; + +DROP TABLE tab; + +SELECT '--- Test with Array, GRANULARITY = 4, index_granularity = 4 ---'; + +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch() GRANULARITY 4) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 4; +INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0, 0.0]), (2, [0.0, 0.0, 10.5, 0.0]), (3, [0.0, 0.0, 9.5, 0.0]), (4, [0.0, 0.0, 9.7, 0.0]), (5, [10.0, 0.0, 0.0, 0.0]), (6, [9.5, 0.0, 0.0, 0.0]), (7, [9.7, 0.0, 0.0, 0.0]), (8, [10.2, 0.0, 0.0, 0.0]), (9, [0.0, 10.0, 0.0, 0.0]), (10, [0.0, 9.5, 0.0, 0.0]), (11, [0.0, 9.7, 0.0, 0.0]), (12, [0.0, 9.7, 0.0, 0.0]), (13, [0.0, 0.0, 0.0, 10.3]), (14, [0.0, 0.0, 0.0, 9.5]), (15, [0.0, 0.0, 0.0, 10.0]), (16, [0.0, 0.0, 0.0, 10.5]); + +-- rows = 16, index_granularity = 4, GRANULARITY = 4 gives a single usearch-indexed block (comprising all granules) +-- no two matches happen to be located in the same granule, so with LIMIT = 3, we'll get "Granules: 2/4" + +-- SELECT 'WHERE type, L2Distance'; +-- SELECT * +-- FROM tab +-- WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0 +-- LIMIT 3; + +SELECT 'WHERE type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0 +LIMIT 3; + +-- SELECT 'ORDER BY type, L2Distance'; +-- SELECT * +-- FROM tab +-- ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) +-- LIMIT 3; + +SELECT 'ORDER BY type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) +LIMIT 3; + +DROP TABLE tab; + +SELECT '--- Test correctness of Usearch index with > 1 mark'; + +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; -- disable adaptive granularity due to bug +INSERT INTO tab SELECT number, [toFloat32(number), 0., 0., 0.] from numbers(10000); + +SELECT * +FROM tab +ORDER BY L2Distance(vector, [1.0, 0.0, 0.0, 0.0]) +LIMIT 1; + +SELECT * +FROM tab +ORDER BY L2Distance(vector, [9000.0, 0.0, 0.0, 0.0]) +LIMIT 1; + +DROP TABLE tab; + +CREATE TABLE tab(id Int32, vector Tuple(Float32, Float32, Float32, Float32), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; -- disable adaptive granularity due to bug +INSERT INTO tab SELECT number, (toFloat32(number), 0., 0., 0.) from numbers(10000); + +SELECT * +FROM tab +ORDER BY L2Distance(vector, (1.0, 0.0, 0.0, 0.0)) +LIMIT 1; + +SELECT * +FROM tab +ORDER BY L2Distance(vector, (9000.0, 0.0, 0.0, 0.0)) +LIMIT 1; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql b/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql index b4754c6d6fe..f9f9661a7c4 100644 --- a/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql +++ b/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql @@ -1,6 +1,7 @@ SET max_block_size = 4213; -SELECT DISTINCT (blockSize() <= 4213) +--- We allocate space for one more row in case nullKeyData is present. +SELECT DISTINCT (blockSize() <= 4214) FROM ( SELECT number diff --git a/tests/queries/0_stateless/02363_mapupdate_improve.sql b/tests/queries/0_stateless/02363_mapupdate_improve.sql index b4a4b8e5d91..c3cd8fff9ee 100644 --- a/tests/queries/0_stateless/02363_mapupdate_improve.sql +++ b/tests/queries/0_stateless/02363_mapupdate_improve.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check DROP TABLE IF EXISTS map_test; CREATE TABLE map_test(`tags` Map(String, String)) ENGINE = MergeTree PRIMARY KEY tags ORDER BY tags SETTINGS index_granularity = 8192; INSERT INTO map_test (tags) VALUES (map('fruit','apple','color','red')); diff --git a/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql b/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql index cf9f2971cb0..41334811464 100644 --- a/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql +++ b/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check DROP TABLE IF EXISTS test_table; CREATE TABLE test_table diff --git a/tests/queries/0_stateless/02366_with_fill_date.sql b/tests/queries/0_stateless/02366_with_fill_date.sql index 4d41facf423..aca57b127af 100644 --- a/tests/queries/0_stateless/02366_with_fill_date.sql +++ b/tests/queries/0_stateless/02366_with_fill_date.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check SELECT toDate('2022-02-01') AS d1 FROM numbers(18) AS number diff --git a/tests/queries/0_stateless/02373_progress_contain_result.sh b/tests/queries/0_stateless/02373_progress_contain_result.sh index 1b257b699f5..c87a5ec7615 100755 --- a/tests/queries/0_stateless/02373_progress_contain_result.sh +++ b/tests/queries/0_stateless/02373_progress_contain_result.sh @@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo 'SELECT 1 FROM numbers(100)' | ${CLICKHOUSE_CURL_COMMAND} -v "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0" --data-binary @- 2>&1 | - grep 'X-ClickHouse-Summary' + grep 'X-ClickHouse-Summary' | sed 's/,\"elapsed_ns[^}]*//' diff --git a/tests/queries/0_stateless/02375_system_schema_inference_cache.reference b/tests/queries/0_stateless/02375_system_schema_inference_cache.reference index 8653863981c..94bdf1b5ddb 100644 --- a/tests/queries/0_stateless/02375_system_schema_inference_cache.reference +++ b/tests/queries/0_stateless/02375_system_schema_inference_cache.reference @@ -3,7 +3,8 @@ source String format String additional_format_info String registration_time DateTime -schema String +schema Nullable(String) +number_of_rows Nullable(UInt64) x Nullable(Int64) s Nullable(String) x Nullable(Int64) diff --git a/tests/queries/0_stateless/02377_modify_column_from_lc.sql b/tests/queries/0_stateless/02377_modify_column_from_lc.sql index a578e7cb03a..efee323e88d 100644 --- a/tests/queries/0_stateless/02377_modify_column_from_lc.sql +++ b/tests/queries/0_stateless/02377_modify_column_from_lc.sql @@ -9,7 +9,7 @@ CREATE TABLE t_modify_from_lc_1 a LowCardinality(UInt32) CODEC(NONE) ) ENGINE = MergeTree ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0; +SETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE t_modify_from_lc_2 ( @@ -17,7 +17,7 @@ CREATE TABLE t_modify_from_lc_2 a LowCardinality(UInt32) CODEC(NONE) ) ENGINE = MergeTree ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0; +SETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_modify_from_lc_1 SELECT number, number FROM numbers(100000); INSERT INTO t_modify_from_lc_2 SELECT number, number FROM numbers(100000); diff --git a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql index 2fe0943745d..0c228c13f19 100644 --- a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql +++ b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql @@ -1,4 +1,4 @@ --- Tags: no-upgrade-check, no-random-merge-tree-settings +-- Tags: no-random-merge-tree-settings drop table if exists test_02381; create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; diff --git a/tests/queries/0_stateless/02383_join_and_filtering_set.reference b/tests/queries/0_stateless/02383_join_and_filtering_set.reference index 2ad282ca07f..822d76aafc6 100644 --- a/tests/queries/0_stateless/02383_join_and_filtering_set.reference +++ b/tests/queries/0_stateless/02383_join_and_filtering_set.reference @@ -8,3 +8,12 @@ Ok Ok Ok Ok +Ok +Ok +Ok +Ok +Ok +Ok +Ok +Ok +Ok diff --git a/tests/queries/0_stateless/02383_join_and_filtering_set.sh b/tests/queries/0_stateless/02383_join_and_filtering_set.sh index 3356be58ff7..3a6d60811c9 100755 --- a/tests/queries/0_stateless/02383_join_and_filtering_set.sh +++ b/tests/queries/0_stateless/02383_join_and_filtering_set.sh @@ -10,6 +10,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -mn -q """ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + CREATE TABLE t1 (x UInt64, y UInt64) ENGINE = MergeTree ORDER BY y AS SELECT sipHash64(number, 't1_x') % 100 AS x, sipHash64(number, 't1_y') % 100 AS y FROM numbers(100); @@ -17,6 +20,26 @@ CREATE TABLE t2 (x UInt64, y UInt64) ENGINE = MergeTree ORDER BY y AS SELECT sipHash64(number, 't2_x') % 100 AS x, sipHash64(number, 't2_y') % 100 AS y FROM numbers(100); """ +# Arguments: +# - Query result +# - Processor name +# - Expected description +# - Check first occurrence +function match_description() { + +QUERY_RESULT=$1 +PROCESSOR_NAME=$2 +EXPECTED_DESCRIPTION=$3 +CHECK_FIRST_OCCURRENCE=${4:-true} + +SED_EXPR="/$PROCESSOR_NAME/{ n; s/^[ \t]*Description: //; p" +[ $CHECK_FIRST_OCCURRENCE = true ] && SED_EXPR+="; q }" || SED_EXPR+=" }" + +DESC=$(sed -n "$SED_EXPR" <<< "$QUERY_RESULT") +[[ "$DESC" == "$EXPECTED_DESCRIPTION" ]] && echo "Ok" || echo "Fail: ReadHeadBalancedProcessor description '$DESC' != '$EXPECTED_DESCRIPTION' " + +} + # Arguments: # - value of max_rows_in_set_to_optimize_join # - join kind @@ -37,10 +60,20 @@ RES=$( EXPECTED_PIPELINE_STEPS=$4 RES=$( $CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join=${PARAM_VALUE} --join_algorithm='full_sorting_merge' \ - -q "EXPLAIN PIPELINE SELECT count() FROM t1 ${JOIN_KIND} JOIN t2 ON t1.x = t2.x" \ - | grep -o -e ReadHeadBalancedProcessor -e FilterBySetOnTheFlyTransform -e CreatingSetsOnTheFlyTransform | wc -l + -q "EXPLAIN PIPELINE SELECT count() FROM t1 ${JOIN_KIND} JOIN t2 ON t1.x = t2.x" ) -[ "$RES" -eq "$EXPECTED_PIPELINE_STEPS" ] && echo "Ok" || echo "Fail: $RES != $EXPECTED_PIPELINE_STEPS" + +# Count match +COUNT=$(echo "$RES" | grep -o -e ReadHeadBalancedProcessor -e FilterBySetOnTheFlyTransform -e CreatingSetsOnTheFlyTransform | wc -l) +[ "$COUNT" -eq "$EXPECTED_PIPELINE_STEPS" ] && echo "Ok" || echo "Fail: $COUNT != $EXPECTED_PIPELINE_STEPS" + +# Description matchers +if [ "$EXPECTED_PIPELINE_STEPS" -ne 0 ]; then + match_description "$RES" 'ReadHeadBalancedProcessor' 'Reads rows from two streams evenly' + match_description "$RES" 'FilterBySetOnTheFlyTransform' "Filter rows using other join table side\'s set" + match_description "$RES" 'CreatingSetsOnTheFlyTransform' 'Create set and filter Left joined stream +Create set and filter Right joined stream' false +fi } diff --git a/tests/queries/0_stateless/02391_hashed_dictionary_shards.sql b/tests/queries/0_stateless/02391_hashed_dictionary_shards.sql index ac43c12afc0..018f6b2cf4f 100644 --- a/tests/queries/0_stateless/02391_hashed_dictionary_shards.sql +++ b/tests/queries/0_stateless/02391_hashed_dictionary_shards.sql @@ -89,7 +89,7 @@ SOURCE(CLICKHOUSE(TABLE test_table_string)) LAYOUT(SPARSE_HASHED(SHARDS 10)) LIFETIME(0); -SYSTEM RELOAD DICTIONARY test_dictionary_10_shards_string; -- { serverError CANNOT_PARSE_TEXT } +SYSTEM RELOAD DICTIONARY test_dictionary_10_shards_string; DROP DICTIONARY test_dictionary_10_shards_string; diff --git a/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh index 39e513f6be4..48efc98f7c7 100755 --- a/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh +++ b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel, no-upgrade-check, disabled +# Tags: race, zookeeper, no-parallel, disabled CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.reference b/tests/queries/0_stateless/02404_memory_bound_merging.reference index d9fac433189..0409c48f846 100644 --- a/tests/queries/0_stateless/02404_memory_bound_merging.reference +++ b/tests/queries/0_stateless/02404_memory_bound_merging.reference @@ -16,7 +16,7 @@ ExpressionTransform × 4 (Expression) ExpressionTransform × 4 (ReadFromMergeTree) - MergeTreeInOrder × 4 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 4 0 → 1 (ReadFromRemote) select a from remote(test_cluster_two_shards, currentDatabase(), t) group by a order by a limit 5 offset 100500; 100500 @@ -47,7 +47,7 @@ ExpressionTransform × 4 (Expression) ExpressionTransform × 4 (ReadFromMergeTree) - MergeTreeInOrder × 4 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 4 0 → 1 (ReadFromRemote) (ReadFromRemote) select a from remote(test_cluster_two_shards, currentDatabase(), dist_t) group by a order by a limit 5 offset 100500; @@ -84,7 +84,7 @@ ExpressionTransform (Expression) ExpressionTransform × 4 (ReadFromMergeTree) - MergeTreeInOrder × 4 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 4 0 → 1 (ReadFromRemote) select a, count() from dist_t_different_dbs group by a order by a limit 5 offset 500; 500 2000 @@ -118,8 +118,7 @@ ExpressionTransform MergingAggregatedBucketTransform × 4 Resize 1 → 4 GroupingAggregatedTransform 3 → 1 - (Union) - (ReadFromRemoteParallelReplicas) + (ReadFromRemoteParallelReplicas) select a, count() from pr_t group by a order by a limit 5 offset 500; 500 1000 501 1000 diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.sql b/tests/queries/0_stateless/02404_memory_bound_merging.sql index 23c29ca8d93..5e017e79309 100644 --- a/tests/queries/0_stateless/02404_memory_bound_merging.sql +++ b/tests/queries/0_stateless/02404_memory_bound_merging.sql @@ -57,6 +57,8 @@ select a, count() from dist_t_different_dbs group by a, b order by a limit 5 off -- { echoOff } -- +drop table if exists pr_t; + create table pr_t(a UInt64, b UInt64) engine=MergeTree order by a; insert into pr_t select number % 1000, number % 1000 from numbers_mt(1e6); diff --git a/tests/queries/0_stateless/02413_replace_partition_zero_copy.reference b/tests/queries/0_stateless/02413_replace_partition_zero_copy.reference new file mode 100644 index 00000000000..d755a4551f1 --- /dev/null +++ b/tests/queries/0_stateless/02413_replace_partition_zero_copy.reference @@ -0,0 +1,4 @@ +1 +1 +6 0 +12 0 diff --git a/tests/queries/0_stateless/02413_replace_partition_zero_copy.sql b/tests/queries/0_stateless/02413_replace_partition_zero_copy.sql new file mode 100644 index 00000000000..80161e7ab2a --- /dev/null +++ b/tests/queries/0_stateless/02413_replace_partition_zero_copy.sql @@ -0,0 +1,35 @@ +-- Tags: no-replicated-database, no-fasttest +-- Tag no-replicated-database: different number of replicas + +create table src1 (n int) engine=ReplicatedMergeTree('/test/02413/{database}/src', '1') order by tuple() settings storage_policy='s3_cache', allow_remote_fs_zero_copy_replication=1; +create table src2 (n int) engine=ReplicatedMergeTree('/test/02413/{database}/src', '2') order by tuple() settings storage_policy='s3_cache', allow_remote_fs_zero_copy_replication=1; +create table dst1 (n int) engine=ReplicatedMergeTree('/test/02413/{database}/dst', '1') order by tuple() settings storage_policy='s3_cache', allow_remote_fs_zero_copy_replication=1; +create table dst2 (n int) engine=ReplicatedMergeTree('/test/02413/{database}/dst', '2') order by tuple() settings storage_policy='s3_cache', allow_remote_fs_zero_copy_replication=1; + +-- FIXME zero-copy locks may remain in ZooKeeper forever if we failed to insert a part. +-- Probably that's why we have to replace repsistent lock with ephemeral sometimes. +-- See also "Replacing persistent lock with ephemeral for path {}. It can happen only in case of local part loss" +-- in StorageReplicatedMergeTree::createZeroCopyLockNode +set insert_keeper_fault_injection_probability=0; + +insert into src1 values(1); +insert into src2 values(2); +system sync replica src1 lightweight; + +alter table dst1 replace partition id 'all' from src1; +system sync replica dst2; + +select count() != 0 from dst1; +select count() != 0 from dst2; + +-- ensure that locks exist and they are not ephemeral +set allow_unrestricted_reads_from_keeper=1; +select count(), sum(ephemeralOwner) from system.zookeeper where path like '/clickhouse/zero_copy/zero_copy_s3/' || + (select value from system.zookeeper where path='/test/02413/'||currentDatabase()||'/dst' and name='table_shared_id') || '/%'; + +-- check the same for move partition +alter table dst2 move partition id 'all' to table src2; +system sync replica src1; + +select count(), sum(ephemeralOwner) from system.zookeeper where path like '/clickhouse/zero_copy/zero_copy_s3/' || + (select value from system.zookeeper where path='/test/02413/'||currentDatabase()||'/src' and name='table_shared_id') || '/%'; diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 10701dbfc63..dd843058281 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -229,6 +229,7 @@ dateName dateTime64ToSnowflake dateTimeToSnowflake dateTrunc +decodeHTMLComponent decodeURLComponent decodeURLFormComponent decodeXMLComponent @@ -238,10 +239,6 @@ defaultValueOfArgumentType defaultValueOfTypeName degrees demangle -detectCharset -detectLanguageUnknown -detectProgrammingLanguage -detectTonality divide dotProduct dumpColumnStructure @@ -306,6 +303,7 @@ formatRowNoNewline fragment fromModifiedJulianDay fromModifiedJulianDayOrNull +fromUTCTimestamp fromUnixTimestamp fromUnixTimestamp64Micro fromUnixTimestamp64Milli @@ -671,6 +669,8 @@ sqrt startsWith subBitmap substring +substringIndex +substringIndexUTF8 substringUTF8 subtractDays subtractHours @@ -851,6 +851,7 @@ toUInt8 toUInt8OrDefault toUInt8OrNull toUInt8OrZero +toUTCTimestamp toUUID toUUIDOrDefault toUUIDOrNull @@ -883,11 +884,18 @@ tumble tumbleEnd tumbleStart tuple +tupleConcat tupleDivide tupleDivideByNumber tupleElement tupleHammingDistance +tupleIntDiv +tupleIntDivByNumber +tupleIntDivOrZero +tupleIntDivOrZeroByNumber tupleMinus +tupleModulo +tupleModuloByNumber tupleMultiply tupleMultiplyByNumber tupleNegate diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql index ed95c06d016..4f40da6c626 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql @@ -15,5 +15,7 @@ AND name NOT IN ( 'h3ToGeoBoundary', 'h3ToParent', 'h3ToString', 'h3UnidirectionalEdgeIsValid', 'h3kRing', 'stringToH3', 'geoToS2', 's2CapContains', 's2CapUnion', 's2CellsIntersect', 's2GetNeighbors', 's2RectAdd', 's2RectContains', 's2RectIntersection', 's2RectUnion', 's2ToGeo', 'normalizeUTF8NFC', 'normalizeUTF8NFD', 'normalizeUTF8NFKC', 'normalizeUTF8NFKD', - 'lemmatize', 'tokenize', 'stem', 'synonyms' -- these functions are not enabled in fast test + 'lemmatize', 'tokenize', 'stem', 'synonyms', + 'detectCharset', 'detectLanguageUnknown', 'detectProgrammingLanguage', 'detectTonality' + -- these functions are not enabled in fast test ) ORDER BY name; diff --git a/tests/queries/0_stateless/02417_load_marks_async.sh b/tests/queries/0_stateless/02417_load_marks_async.sh index a5cbcd08f75..72b35a565df 100755 --- a/tests/queries/0_stateless/02417_load_marks_async.sh +++ b/tests/queries/0_stateless/02417_load_marks_async.sh @@ -21,7 +21,7 @@ n8 UInt64, n9 UInt64 ) ENGINE = MergeTree -ORDER BY n0 SETTINGS min_bytes_for_wide_part = 1;" +ORDER BY n0 SETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192, index_granularity_bytes = '10Mi';" ${CLICKHOUSE_CLIENT} -q "INSERT INTO test select number, number % 3, number % 5, number % 10, number % 13, number % 15, number % 17, number % 18, number % 22, number % 25 from numbers(1000000)" ${CLICKHOUSE_CLIENT} -q "SYSTEM STOP MERGES test" diff --git a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference index 98827438920..a0689a0a090 100644 --- a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference +++ b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference @@ -3,8 +3,8 @@ {"operation_name":"void DB::DistributedSink::writeToLocal(const Cluster::ShardInfo &, const Block &, size_t)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} 1 ===2=== -{"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} -{"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} +{"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} +{"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} 3 2 ===3=== diff --git a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh index edc3d06e5bf..5a1e33a8459 100755 --- a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh +++ b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh @@ -20,7 +20,9 @@ function insert() -H "tracestate: $4" \ "${CLICKHOUSE_URL}" \ --data @- - ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH DISTRIBUTED ${CLICKHOUSE_DATABASE}.dist_opentelemetry" + + # disable probabilistic tracing to avoid stealing the trace context + ${CLICKHOUSE_CLIENT} --opentelemetry_start_trace_probability=0 -q "SYSTEM FLUSH DISTRIBUTED ${CLICKHOUSE_DATABASE}.dist_opentelemetry" } function check_span() @@ -69,6 +71,8 @@ DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.local_opentelemetry; CREATE TABLE ${CLICKHOUSE_DATABASE}.dist_opentelemetry (key UInt64) Engine=Distributed('test_cluster_two_shards_localhost', ${CLICKHOUSE_DATABASE}, local_opentelemetry, key % 2); CREATE TABLE ${CLICKHOUSE_DATABASE}.local_opentelemetry (key UInt64) Engine=MergeTree ORDER BY key; + +SYSTEM STOP DISTRIBUTED SENDS ${CLICKHOUSE_DATABASE}.dist_opentelemetry; " # diff --git a/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.reference b/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.reference index 71c9a23879f..c13142dd26a 100644 --- a/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.reference +++ b/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.reference @@ -11,3 +11,16 @@ 1 1 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.sql b/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.sql index f5d182be3e3..fde893626c1 100644 --- a/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.sql +++ b/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.sql @@ -1,10 +1,30 @@ DROP TABLE IF EXISTS dtest; -SELECT count() == 0 FROM (SELECT '33.3' :: Decimal(9, 1) AS a WHERE a IN ('33.33' :: Decimal(9, 2))); CREATE TABLE dtest ( `a` Decimal(18, 0), `b` Decimal(18, 1), `c` Decimal(36, 0) ) ENGINE = Memory; INSERT INTO dtest VALUES ('33', '44.4', '35'); +SELECT count() == 0 FROM (SELECT '33.3' :: Decimal(9, 1) AS a WHERE a IN ('33.33' :: Decimal(9, 2))); + +SELECT count() == 0 FROM dtest WHERE a IN toDecimal32('33.3000', 4); +SELECT count() == 0 FROM dtest WHERE a IN toDecimal64('33.3000', 4); +SELECT count() == 0 FROM dtest WHERE a IN toDecimal128('33.3000', 4); +SELECT count() == 0 FROM dtest WHERE a IN toDecimal256('33.3000', 4); + +SELECT count() == 0 FROM dtest WHERE b IN toDecimal32('44.4000', 0); +SELECT count() == 0 FROM dtest WHERE b IN toDecimal64('44.4000', 0); +SELECT count() == 0 FROM dtest WHERE b IN toDecimal128('44.4000', 0); +SELECT count() == 0 FROM dtest WHERE b IN toDecimal256('44.4000', 0); + +SELECT count() == 1 FROM dtest WHERE b IN toDecimal32('44.4000', 4); +SELECT count() == 1 FROM dtest WHERE b IN toDecimal64('44.4000', 4); +SELECT count() == 1 FROM dtest WHERE b IN toDecimal128('44.4000', 4); +SELECT count() == 1 FROM dtest WHERE b IN toDecimal256('44.4000', 4); + +SET allow_experimental_analyzer = 1; + +SELECT count() == 0 FROM (SELECT '33.3' :: Decimal(9, 1) AS a WHERE a IN ('33.33' :: Decimal(9, 2))); + SELECT count() == 0 FROM dtest WHERE a IN toDecimal32('33.3000', 4); SELECT count() == 0 FROM dtest WHERE a IN toDecimal64('33.3000', 4); SELECT count() == 0 FROM dtest WHERE a IN toDecimal128('33.3000', 4); diff --git a/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference b/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference index a89ce339f6c..9f725a55439 100644 --- a/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference +++ b/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference @@ -5,19 +5,13 @@ tx21 3 UNKNOWN_TABLE concurrent_insert 2 -all_1_1_1 0 -all_2_2_1 0 -all_3_3_1 0 -all_4_4_1 0 all_5_5_0 1 -all_6_6_1 0 concurrent_drop_part_before SERIALIZATION_ERROR INVALID_TRANSACTION 1 3 all_1_1_0 1 -all_2_2_1 0 all_3_3_0 1 read_from_snapshot tx51 3 @@ -28,15 +22,9 @@ tx51 3 concurrent_drop_part_after NO_SUCH_DATA_PART INVALID_TRANSACTION -all_1_1_1 0 -all_2_2_1 0 -all_3_3_1 0 NewPart all_1_1_0 -NewPart all_1_1_1 NewPart all_2_2_0 -NewPart all_2_2_1 NewPart all_3_3_0 -NewPart all_3_3_1 concurrent_truncate_notx_after tx71 3 tx71 0 diff --git a/tests/queries/0_stateless/02423_insert_summary_behaviour.sh b/tests/queries/0_stateless/02423_insert_summary_behaviour.sh index 6c75efa5150..b184d9ccf47 100755 --- a/tests/queries/0_stateless/02423_insert_summary_behaviour.sh +++ b/tests/queries/0_stateless/02423_insert_summary_behaviour.sh @@ -11,11 +11,11 @@ $CLICKHOUSE_CLIENT -q "CREATE MATERIALIZED VIEW floats_to_target TO target_1 AS $CLICKHOUSE_CLIENT -q "CREATE MATERIALIZED VIEW floats_to_target_2 TO target_2 AS SELECT * FROM floats, numbers(2) n" echo "No materialized views" -${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+target_1" -d "VALUES(1.0)" -v 2>&1 | grep 'X-ClickHouse-Summary' -$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format Native | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+target_1+FORMAT+Native" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' -$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format RowBinary | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+target_1+FORMAT+RowBinary" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' +${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+target_1" -d "VALUES(1.0)" -v 2>&1 | grep 'X-ClickHouse-Summary' | sed 's/,\"elapsed_ns[^}]*//' +$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format Native | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+target_1+FORMAT+Native" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' | sed 's/,\"elapsed_ns[^}]*//' +$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format RowBinary | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+target_1+FORMAT+RowBinary" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' | sed 's/,\"elapsed_ns[^}]*//' echo "With materialized views" -${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+floats" -d "VALUES(1.0)" -v 2>&1 | grep 'X-ClickHouse-Summary' -$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format Native | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+floats+FORMAT+Native" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' -$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format RowBinary | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+floats+FORMAT+RowBinary" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' +${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+floats" -d "VALUES(1.0)" -v 2>&1 | grep 'X-ClickHouse-Summary' | sed 's/,\"elapsed_ns[^}]*//' +$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format Native | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+floats+FORMAT+Native" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' | sed 's/,\"elapsed_ns[^}]*//' +$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format RowBinary | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+floats+FORMAT+RowBinary" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' | sed 's/,\"elapsed_ns[^}]*//' diff --git a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference index 378b7d8cec4..66c3d28dfc0 100644 --- a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference +++ b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference @@ -19,3 +19,24 @@ 1 1 1 +0 +1 +1 +0 +0 +1 +1 +0 +0 +1 +1 +0 +0 +1 +1 +0 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql index 579f468ee54..a84cb5572ba 100644 --- a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql +++ b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql @@ -1,3 +1,8 @@ +DROP TABLE IF EXISTS decimal_in_float_test; + +CREATE TABLE decimal_in_float_test ( `a` Decimal(18, 0), `b` Decimal(36, 2) ) ENGINE = Memory; +INSERT INTO decimal_in_float_test VALUES ('33', '44.44'); + SELECT toDecimal32(1.555,3) IN (1.5551); SELECT toDecimal32(1.555,3) IN (1.5551,1.555); SELECT toDecimal32(1.555,3) IN (1.5551,1.555000); @@ -18,10 +23,36 @@ SELECT toDecimal256(1.555,3) IN (1.5551,1.555); SELECT toDecimal256(1.555,3) IN (1.5551,1.555000); SELECT toDecimal256(1.555,3) IN (1.550,1.5); -DROP TABLE IF EXISTS decimal_in_float_test; -CREATE TABLE decimal_in_float_test ( `a` Decimal(18, 0), `b` Decimal(36, 2) ) ENGINE = Memory; -INSERT INTO decimal_in_float_test VALUES ('33', '44.44'); +SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33); +SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33.0); +SELECT count() == 1 FROM decimal_in_float_test WHERE a NOT IN (33.333); +SELECT count() == 1 FROM decimal_in_float_test WHERE b IN (44.44); +SELECT count() == 1 FROM decimal_in_float_test WHERE b NOT IN (44.4,44.444); + +SET allow_experimental_analyzer = 1; + + +SELECT toDecimal32(1.555,3) IN (1.5551); +SELECT toDecimal32(1.555,3) IN (1.5551,1.555); +SELECT toDecimal32(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal32(1.555,3) IN (1.550,1.5); + +SELECT toDecimal64(1.555,3) IN (1.5551); +SELECT toDecimal64(1.555,3) IN (1.5551,1.555); +SELECT toDecimal64(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal64(1.555,3) IN (1.550,1.5); + +SELECT toDecimal128(1.555,3) IN (1.5551); +SELECT toDecimal128(1.555,3) IN (1.5551,1.555); +SELECT toDecimal128(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal128(1.555,3) IN (1.550,1.5); + +SELECT toDecimal256(1.555,3) IN (1.5551); +SELECT toDecimal256(1.555,3) IN (1.5551,1.555); +SELECT toDecimal256(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal256(1.555,3) IN (1.550,1.5); + SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33); SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33.0); diff --git a/tests/queries/0_stateless/02428_delete_with_settings.sql b/tests/queries/0_stateless/02428_delete_with_settings.sql index 071a3f74184..618c08608fc 100644 --- a/tests/queries/0_stateless/02428_delete_with_settings.sql +++ b/tests/queries/0_stateless/02428_delete_with_settings.sql @@ -1,5 +1,5 @@ drop table if exists test; -create table test (id Int32, key String) engine=MergeTree() order by tuple(); +create table test (id Int32, key String) engine=MergeTree() order by tuple() settings index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into test select number, toString(number) from numbers(1000000); delete from test where id % 2 = 0 SETTINGS mutations_sync=0; select count() from test; diff --git a/tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql b/tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql index 33b0ea4b818..091fbbe1711 100644 --- a/tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql +++ b/tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql @@ -1,7 +1,7 @@ -- From https://github.com/ClickHouse/ClickHouse/issues/41814 drop table if exists test; -create table test(a UInt64, m UInt64, d DateTime) engine MergeTree partition by toYYYYMM(d) order by (a, m, d); +create table test(a UInt64, m UInt64, d DateTime) engine MergeTree partition by toYYYYMM(d) order by (a, m, d) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into test select number, number, '2022-01-01 00:00:00' from numbers(1000000); @@ -12,7 +12,7 @@ drop table test; -- From https://github.com/ClickHouse/ClickHouse/issues/34063 drop table if exists test_null_filter; -create table test_null_filter(key UInt64, value UInt32) engine MergeTree order by key; +create table test_null_filter(key UInt64, value UInt32) engine MergeTree order by key SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into test_null_filter select number, number from numbers(10000000); diff --git a/tests/queries/0_stateless/02429_low_cardinality_trash.sh b/tests/queries/0_stateless/02429_low_cardinality_trash.sh index 91618cb2796..e115ee0824e 100755 --- a/tests/queries/0_stateless/02429_low_cardinality_trash.sh +++ b/tests/queries/0_stateless/02429_low_cardinality_trash.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-upgrade-check +# Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh index 5e2da509314..f85aaed7716 100755 --- a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh +++ b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-random-settings +# Tags: no-random-settings, no-asan, no-msan, no-tsan, no-debug # shellcheck disable=SC2009 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/01306_benchmark_json.reference b/tests/queries/0_stateless/02436_system_zookeeper_context.reference similarity index 100% rename from tests/queries/0_stateless/01306_benchmark_json.reference rename to tests/queries/0_stateless/02436_system_zookeeper_context.reference diff --git a/tests/queries/0_stateless/02436_system_zookeeper_context.sql b/tests/queries/0_stateless/02436_system_zookeeper_context.sql new file mode 100644 index 00000000000..ae44405e76e --- /dev/null +++ b/tests/queries/0_stateless/02436_system_zookeeper_context.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS mt; +create table mt (n int, s String) engine=MergeTree order by n; +insert into mt values (1, ''); +set allow_nondeterministic_mutations=1; +alter table mt update s = (select toString(groupArray((*,))) from system.zookeeper where path='/') where n=1 settings mutations_sync=2; +select distinct n from mt; +DROP TABLE mt; diff --git a/tests/queries/0_stateless/02437_drop_mv_restart_replicas.sh b/tests/queries/0_stateless/02437_drop_mv_restart_replicas.sh index ca5e1245046..44076aeba18 100755 --- a/tests/queries/0_stateless/02437_drop_mv_restart_replicas.sh +++ b/tests/queries/0_stateless/02437_drop_mv_restart_replicas.sh @@ -55,7 +55,7 @@ TIMEOUT=15 timeout $TIMEOUT bash -c thread_ddl 2>&1| grep -Fa "Exception: " | grep -Fv -e "TABLE_IS_DROPPED" -e "UNKNOWN_TABLE" -e "DATABASE_NOT_EMPTY" & timeout $TIMEOUT bash -c thread_insert 2> /dev/null & -timeout $TIMEOUT bash -c thread_restart 2>&1| grep -Fa "Exception: " | grep -Fv -e "is currently dropped or renamed" & +timeout $TIMEOUT bash -c thread_restart 2>&1| grep -Fa "Exception: " | grep -Fv -e "is currently dropped or renamed" -e "is being dropped or detached" & wait diff --git a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql index dfdbbf5d597..1d01fde56d6 100644 --- a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql +++ b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql @@ -24,6 +24,11 @@ system flush logs; select * from system.zookeeper_log where path like '/test/02439/s1/' || currentDatabase() || '/block_numbers/%' and op_num in ('List', 'SimpleList', 'FilteredList') and path not like '%/block_numbers/1' and path not like '%/block_numbers/123' - and event_time >= now() - interval 1 minute; + and event_time >= now() - interval 1 minute + -- avoid race with tests like 02311_system_zookeeper_insert + and (query_id is null or query_id='' or query_id in + (select query_id from system.query_log + where event_time >= now() - interval 1 minute and current_database=currentDatabase()) + ); drop table rmt; diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.reference b/tests/queries/0_stateless/02443_detach_attach_partition.reference new file mode 100644 index 00000000000..3f1b907d51b --- /dev/null +++ b/tests/queries/0_stateless/02443_detach_attach_partition.reference @@ -0,0 +1,5 @@ +default begin inserts +default end inserts +default threads finished +30 465 +30 465 diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh new file mode 100755 index 00000000000..d72d771a150 --- /dev/null +++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# Tags: race, zookeeper, long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib + + +$CLICKHOUSE_CLIENT -n -q " + DROP TABLE IF EXISTS alter_table0; + DROP TABLE IF EXISTS alter_table1; + + CREATE TABLE alter_table0 (a UInt8, b Int16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a; + CREATE TABLE alter_table1 (a UInt8, b Int16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a; +" || exit 1 + +function thread_detach() +{ + while true; do + $CLICKHOUSE_CLIENT -mn -q "ALTER TABLE alter_table$(($RANDOM % 2)) DETACH PARTITION ID 'all'; SELECT sleep($RANDOM / 32000) format Null;" 2>/dev/null ||: + done +} +function thread_attach() +{ + while true; do + $CLICKHOUSE_CLIENT -mn -q "ALTER TABLE alter_table$(($RANDOM % 2)) ATTACH PARTITION ID 'all'; SELECT sleep($RANDOM / 32000) format Null;" 2>/dev/null ||: + done +} + +insert_type=$(($RANDOM % 3)) + +engine=$($CLICKHOUSE_CLIENT -q "SELECT engine FROM system.tables WHERE database=currentDatabase() AND table='alter_table0'") +if [[ "$engine" == "ReplicatedMergeTree" ]]; then + insert_type=$(($RANDOM % 2)) +fi +$CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'insert_type $insert_type' FORMAT Null" + +function insert() +{ + # Fault injection may lead to duplicates + if [[ "$insert_type" -eq 0 ]]; then + $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $1" 2>/dev/null + elif [[ "$insert_type" -eq 1 ]]; then + $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $1, $1" 2>/dev/null + else + # It may reproduce something interesting: if the insert status is unknown (due to fault injection in retries) + # and the part was committed locally but not in zk, then it will be active and DETACH may detach it. + # And we will ATTACH it later. But the next INSERT attempt will not be deduplicated because the first one failed. + # So we will get duplicates. + $CLICKHOUSE_CLIENT --insert_deduplication_token=$1 -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $1" 2>/dev/null + fi +} + +thread_detach & PID_1=$! +thread_attach & PID_2=$! +thread_detach & PID_3=$! +thread_attach & PID_4=$! + +function do_inserts() +{ + for i in {1..30}; do + while ! insert $i; do $CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'retrying insert $i' FORMAT Null"; done + done +} + +$CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'begin inserts'" +do_inserts 2>&1| grep -Fa "Exception: " | grep -Fv "was cancelled by concurrent ALTER PARTITION" +$CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'end inserts'" + +kill -TERM $PID_1 && kill -TERM $PID_2 && kill -TERM $PID_3 && kill -TERM $PID_4 +wait + +$CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'threads finished'" +wait_for_queries_to_finish + +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0" +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" +query_with_retry "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'" 2>/dev/null; +$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'" 2>/dev/null +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" +$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'" +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0" +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" + +if [[ "$engine" == "ReplicatedMergeTree" ]]; then + # ReplicatedMergeTree may duplicate data on ATTACH PARTITION (when one replica has a merged part and another replica has source parts only) + $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table0 FINAL DEDUPLICATE" + $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" +fi + +$CLICKHOUSE_CLIENT -q "SELECT count(), sum(b) FROM alter_table0" +$CLICKHOUSE_CLIENT -q "SELECT count(), sum(b) FROM alter_table1" + +$CLICKHOUSE_CLIENT -q "DROP TABLE alter_table0" +$CLICKHOUSE_CLIENT -q "DROP TABLE alter_table1" diff --git a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh index abcf1bf4c5b..03c43843d3a 100755 --- a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh +++ b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-upgrade-check +# Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Test that running distributed query and cancel it ASAP, # this can trigger a hung/deadlock in ProcessorList. -for i in {1..100}; do +for i in {1..50}; do query_id="$CLICKHOUSE_TEST_UNIQUE_NAME-$i" $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null & while :; do diff --git a/tests/queries/0_stateless/02457_insert_select_progress_http.sh b/tests/queries/0_stateless/02457_insert_select_progress_http.sh index 656ab3dc403..ae62ee4b77e 100755 --- a/tests/queries/0_stateless/02457_insert_select_progress_http.sh +++ b/tests/queries/0_stateless/02457_insert_select_progress_http.sh @@ -5,5 +5,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d @- <<< "insert into function null('_ Int') select * from numbers(5) settings max_block_size=1" -v |& { - grep -F -e X-ClickHouse-Progress: -e X-ClickHouse-Summary: + grep -F -e X-ClickHouse-Progress: -e X-ClickHouse-Summary: | sed 's/,\"elapsed_ns[^}]*//' } diff --git a/tests/queries/0_stateless/02457_morton_coding.sql b/tests/queries/0_stateless/02457_morton_coding.sql index 4fc26f255f4..955cb2e053b 100644 --- a/tests/queries/0_stateless/02457_morton_coding.sql +++ b/tests/queries/0_stateless/02457_morton_coding.sql @@ -11,7 +11,7 @@ create table morton_numbers_02457( n8 UInt8 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; SELECT '----- CONST -----'; select mortonEncode(1,2,3,4); @@ -45,7 +45,7 @@ create table morton_numbers_1_02457( n8 UInt64 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_1_02457 select untuple(mortonDecode(8, mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8))) @@ -80,7 +80,7 @@ create table morton_numbers_2_02457( n4 UInt64 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_2_02457 select untuple(mortonDecode(4, mortonEncode(n1, n2, n3, n4))) @@ -114,7 +114,7 @@ create table morton_numbers_3_02457( n2 UInt64 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_3_02457 select untuple(mortonDecode(2, mortonEncode(n1, n2))) diff --git a/tests/queries/0_stateless/02457_morton_coding_with_mask.sql b/tests/queries/0_stateless/02457_morton_coding_with_mask.sql index 5aeb1f380be..c95205769d2 100644 --- a/tests/queries/0_stateless/02457_morton_coding_with_mask.sql +++ b/tests/queries/0_stateless/02457_morton_coding_with_mask.sql @@ -20,7 +20,7 @@ create table morton_numbers_mask_02457( n4 UInt8 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_mask_02457 select n1.number, n2.number, n3.number, n4.number @@ -37,7 +37,7 @@ create table morton_numbers_mask_1_02457( n4 UInt64 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_mask_1_02457 select untuple(mortonDecode((1,2,1,2), mortonEncode((1,2,1,2), n1, n2, n3, n4))) @@ -64,7 +64,7 @@ create table morton_numbers_mask_02457( n2 UInt8 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_mask_02457 select n1.number, n2.number @@ -77,7 +77,7 @@ create table morton_numbers_mask_2_02457( n2 UInt64 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_mask_2_02457 select untuple(mortonDecode((1,4), mortonEncode((1,4), n1, n2))) @@ -105,7 +105,7 @@ create table morton_numbers_mask_02457( n3 UInt8, ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_mask_02457 select n1.number, n2.number, n3.number @@ -120,7 +120,7 @@ create table morton_numbers_mask_3_02457( n3 UInt64 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_mask_3_02457 select untuple(mortonDecode((1,1,2), mortonEncode((1,1,2), n1, n2, n3))) diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python index 696eb01ff7e..fdc64a8dba8 100644 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python @@ -1,187 +1,30 @@ #!/usr/bin/env python3 -import socket -import os -import uuid import json +import os +import sys -CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") -CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) -CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -def writeVarUInt(x, ba): - for _ in range(0, 9): - byte = x & 0x7F - if x > 0x7F: - byte |= 0x80 - - ba.append(byte) - - x >>= 7 - if x == 0: - return - - -def writeStringBinary(s, ba): - b = bytes(s, "utf-8") - writeVarUInt(len(s), ba) - ba.extend(b) - - -def readStrict(s, size=1): - res = bytearray() - while size: - cur = s.recv(size) - # if not res: - # raise "Socket is closed" - size -= len(cur) - res.extend(cur) - - return res - - -def readUInt(s, size=1): - res = readStrict(s, size) - val = 0 - for i in range(len(res)): - val += res[i] << (i * 8) - return val - - -def readUInt8(s): - return readUInt(s) - - -def readUInt16(s): - return readUInt(s, 2) - - -def readUInt32(s): - return readUInt(s, 4) - - -def readUInt64(s): - return readUInt(s, 8) - - -def readVarUInt(s): - x = 0 - for i in range(9): - byte = readStrict(s)[0] - x |= (byte & 0x7F) << (7 * i) - - if not byte & 0x80: - return x - - return x - - -def readStringBinary(s): - size = readVarUInt(s) - s = readStrict(s, size) - return s.decode("utf-8") - - -def sendHello(s): - ba = bytearray() - writeVarUInt(0, ba) # Hello - writeStringBinary("simple native protocol", ba) - writeVarUInt(21, ba) - writeVarUInt(9, ba) - writeVarUInt(54449, ba) - writeStringBinary(CLICKHOUSE_DATABASE, ba) # database - writeStringBinary("default", ba) # user - writeStringBinary("", ba) # pwd - s.sendall(ba) - - -def receiveHello(s): - p_type = readVarUInt(s) - assert p_type == 0 # Hello - server_name = readStringBinary(s) - # print("Server name: ", server_name) - server_version_major = readVarUInt(s) - # print("Major: ", server_version_major) - server_version_minor = readVarUInt(s) - # print("Minor: ", server_version_minor) - server_revision = readVarUInt(s) - # print("Revision: ", server_revision) - server_timezone = readStringBinary(s) - # print("Timezone: ", server_timezone) - server_display_name = readStringBinary(s) - # print("Display name: ", server_display_name) - server_version_patch = readVarUInt(s) - # print("Version patch: ", server_version_patch) - - -def serializeClientInfo(ba, query_id): - writeStringBinary("default", ba) # initial_user - writeStringBinary(query_id, ba) # initial_query_id - writeStringBinary("127.0.0.1:9000", ba) # initial_address - ba.extend([0] * 8) # initial_query_start_time_microseconds - ba.append(1) # TCP - writeStringBinary("os_user", ba) # os_user - writeStringBinary("client_hostname", ba) # client_hostname - writeStringBinary("client_name", ba) # client_name - writeVarUInt(21, ba) - writeVarUInt(9, ba) - writeVarUInt(54449, ba) - writeStringBinary("", ba) # quota_key - writeVarUInt(0, ba) # distributed_depth - writeVarUInt(1, ba) # client_version_patch - ba.append(0) # No telemetry - - -def sendQuery(s, query): - ba = bytearray() - query_id = uuid.uuid4().hex - writeVarUInt(1, ba) # query - writeStringBinary(query_id, ba) - - ba.append(1) # INITIAL_QUERY - - # client info - serializeClientInfo(ba, query_id) - - writeStringBinary("", ba) # No settings - writeStringBinary("", ba) # No interserver secret - writeVarUInt(2, ba) # Stage - Complete - ba.append(0) # No compression - writeStringBinary(query, ba) # query, finally - s.sendall(ba) - - -def serializeBlockInfo(ba): - writeVarUInt(1, ba) # 1 - ba.append(0) # is_overflows - writeVarUInt(2, ba) # 2 - writeVarUInt(0, ba) # 0 - ba.extend([0] * 4) # bucket_num - - -def sendEmptyBlock(s): - ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary("", ba) - serializeBlockInfo(ba) - writeVarUInt(0, ba) # rows - writeVarUInt(0, ba) # columns - s.sendall(ba) - - -def assertPacket(packet, expected): - assert packet == expected, packet +from tcp_client import TCPClient class Progress: - def __init__(self): + def __init__( + self, + read_rows=0, + read_bytes=0, + total_rows_to_read=0, + written_rows=0, + written_bytes=0, + ): # NOTE: this is done in ctor to initialize __dict__ - self.read_rows = 0 - self.read_bytes = 0 - self.total_rows_to_read = 0 - self.written_rows = 0 - self.written_bytes = 0 + self.read_rows = read_rows + self.read_bytes = read_bytes + self.total_rows_to_read = total_rows_to_read + self.written_rows = written_rows + self.written_bytes = written_bytes def __str__(self): return json.dumps(self.__dict__) @@ -194,13 +37,6 @@ class Progress: self.written_bytes += b.written_bytes return self - def readPacket(self, s): - self.read_rows += readVarUInt(s) - self.read_bytes += readVarUInt(s) - self.total_rows_to_read += readVarUInt(s) - self.written_rows += readVarUInt(s) - self.written_bytes += readVarUInt(s) - def __bool__(self): return ( self.read_rows > 0 @@ -211,52 +47,25 @@ class Progress: ) -def readProgress(s): - packet_type = readVarUInt(s) - if packet_type == 2: # Exception - raise RuntimeError(readException(s)) - - if packet_type == 5: # End stream - return None - - assertPacket(packet_type, 3) # Progress - - progress = Progress() - progress.readPacket(s) - return progress - - -def readException(s): - code = readUInt32(s) - name = readStringBinary(s) - text = readStringBinary(s) - readStringBinary(s) # trace - assertPacket(readUInt8(s), 0) # has_nested - return "code {}: {}".format(code, text.replace("DB::Exception:", "")) - - def main(): - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(30) - s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) - sendHello(s) - receiveHello(s) + with TCPClient() as client: # For 1 second sleep and 1000ms of interactive_delay we definitelly should have non zero progress packet. # NOTE: interactive_delay=0 cannot be used since in this case CompletedPipelineExecutor will not call cancelled callback. - sendQuery( - s, + client.sendQuery( "insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000", ) # external tables - sendEmptyBlock(s) + client.sendEmptyBlock() summary_progress = Progress() non_empty_progress_packets = 0 while True: - progress = readProgress(s) - if progress is None: + progress_info = client.readProgress() + if progress_info is None: break + + progress = Progress(*progress_info) summary_progress += progress if progress: non_empty_progress_packets += 1 @@ -267,8 +76,6 @@ def main(): # - 1 or 2 for each SELECT block assert non_empty_progress_packets in (3, 4), f"{non_empty_progress_packets=:}" - s.close() - if __name__ == "__main__": main() diff --git a/tests/queries/0_stateless/02473_infile_progress.py b/tests/queries/0_stateless/02473_infile_progress.py index 842acf2b697..9941736107f 100755 --- a/tests/queries/0_stateless/02473_infile_progress.py +++ b/tests/queries/0_stateless/02473_infile_progress.py @@ -32,7 +32,7 @@ with client( ) client1.expect(prompt) client1.send(f"INSERT INTO test.infile_progress FROM INFILE '{filename}'") - client1.expect("Progress: 5.00 rows, 30.00 B.*\)") + client1.expect("Progress: 5.00 rows, 10.00 B.*\)") client1.expect(prompt) # send Ctrl-C diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.sh b/tests/queries/0_stateless/02473_optimize_old_parts.sh index 0c2dd04d024..b563bc31b39 100755 --- a/tests/queries/0_stateless/02473_optimize_old_parts.sh +++ b/tests/queries/0_stateless/02473_optimize_old_parts.sh @@ -61,7 +61,7 @@ INSERT INTO test_with_merge SELECT 3;" wait_for_number_of_parts 'test_with_merge' 1 100 $CLICKHOUSE_CLIENT -nmq " -SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one +SELECT sleepEachRow(1) FROM numbers(9) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one SELECT (now() - modification_time) > 5 FROM system.parts WHERE database = currentDatabase() AND table='test_with_merge' AND active; DROP TABLE test_with_merge;" diff --git a/tests/queries/0_stateless/02481_async_insert_dedup.python b/tests/queries/0_stateless/02481_async_insert_dedup.python index ca83253eaf8..0e80a21bf46 100644 --- a/tests/queries/0_stateless/02481_async_insert_dedup.python +++ b/tests/queries/0_stateless/02481_async_insert_dedup.python @@ -12,18 +12,21 @@ sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient +table_engine = sys.argv[1] + client = ClickHouseClient() # test table without partition client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part SYNC") -client.query( - """ +create_query = f""" CREATE TABLE t_async_insert_dedup_no_part ( KeyID UInt32 -) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}') +) Engine = {table_engine}('/clickhouse/tables/{{shard}}/{{database}}/t_async_insert_dedup', '{{replica}}') ORDER BY (KeyID) """ -) + +client.query(create_query) + client.query( "insert into t_async_insert_dedup_no_part values (1), (2), (3), (4), (5)", @@ -101,22 +104,22 @@ def fetch_and_insert_data(q, client): # main process client.query("DROP TABLE IF EXISTS t_async_insert_dedup SYNC") -client.query( - """ +create_query = f""" CREATE TABLE t_async_insert_dedup ( EventDate DateTime, KeyID UInt32 -) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}') +) Engine = {table_engine}('/clickhouse/tables/{{shard}}/{{database}}/t_async_insert_dedup', '{{replica}}') PARTITION BY toYYYYMM(EventDate) ORDER BY (KeyID, EventDate) SETTINGS use_async_block_ids_cache = 1 """ -) + +client.query(create_query) q = queue.Queue(100) total_number = 10000 use_token = False -if sys.argv[-1] == "token": +if len(sys.argv) > 3 and sys.argv[2] == "token": use_token = True gen = Thread(target=generate_data, args=[q, total_number, use_token]) @@ -158,13 +161,14 @@ while True: break result = client.query( - "SELECT value FROM system.metrics where metric = 'AsyncInsertCacheSize'" + "SELECT value FROM system.metrics where metric = 'AsyncInsertCacheSize'" ) result = int(result.split()[0]) if result <= 0: raise Exception(f"AsyncInsertCacheSize should > 0, but got {result}") + result = client.query( - "SELECT value FROM system.events where event = 'AsyncInsertCacheHits'" + "SELECT value FROM system.events where event = 'AsyncInsertCacheHits'" ) result = int(result.split()[0]) if result <= 0: diff --git a/tests/queries/0_stateless/02481_async_insert_dedup.sh b/tests/queries/0_stateless/02481_async_insert_dedup.sh index e7cb5c33bf5..0fe06e6ab58 100755 --- a/tests/queries/0_stateless/02481_async_insert_dedup.sh +++ b/tests/queries/0_stateless/02481_async_insert_dedup.sh @@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh # We should have correct env vars from shell_config.sh to run this test -python3 "$CURDIR"/02481_async_insert_dedup.python +python3 "$CURDIR"/02481_async_insert_dedup.python ReplicatedMergeTree diff --git a/tests/queries/0_stateless/02481_async_insert_dedup_token.sh b/tests/queries/0_stateless/02481_async_insert_dedup_token.sh index 8ef6eecda24..bb2d07066a5 100755 --- a/tests/queries/0_stateless/02481_async_insert_dedup_token.sh +++ b/tests/queries/0_stateless/02481_async_insert_dedup_token.sh @@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh # We should have correct env vars from shell_config.sh to run this test -python3 "$CURDIR"/02481_async_insert_dedup.python token +python3 "$CURDIR"/02481_async_insert_dedup.python ReplicatedMergeTree token diff --git a/tests/queries/0_stateless/02481_merge_array_join_sample_by.sql b/tests/queries/0_stateless/02481_merge_array_join_sample_by.sql index 39fc751f331..1c2123a99d5 100644 --- a/tests/queries/0_stateless/02481_merge_array_join_sample_by.sql +++ b/tests/queries/0_stateless/02481_merge_array_join_sample_by.sql @@ -1,7 +1,7 @@ DROP TABLE IF EXISTS 02481_mergetree; DROP TABLE IF EXISTS 02481_merge; -CREATE TABLE 02481_mergetree(x UInt64, y UInt64, arr Array(String)) ENGINE = MergeTree ORDER BY x SAMPLE BY x; +CREATE TABLE 02481_mergetree(x UInt64, y UInt64, arr Array(String)) ENGINE = MergeTree ORDER BY x SAMPLE BY x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE 02481_merge(x UInt64, y UInt64, arr Array(String)) ENGINE = Merge(currentDatabase(), '^(02481_mergetree)$'); diff --git a/tests/queries/0_stateless/02481_pk_analysis_with_enum_to_string.sql b/tests/queries/0_stateless/02481_pk_analysis_with_enum_to_string.sql index 91402bbed60..021a55ef2e8 100644 --- a/tests/queries/0_stateless/02481_pk_analysis_with_enum_to_string.sql +++ b/tests/queries/0_stateless/02481_pk_analysis_with_enum_to_string.sql @@ -10,7 +10,7 @@ CREATE TABLE gen ) ENGINE = GenerateRandom; -CREATE TABLE github_events AS gen ENGINE=MergeTree ORDER BY (event_type, repo_name, created_at); +CREATE TABLE github_events AS gen ENGINE=MergeTree ORDER BY (event_type, repo_name, created_at) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO github_events SELECT * FROM gen LIMIT 100000; diff --git a/tests/queries/0_stateless/02482_load_parts_refcounts.sh b/tests/queries/0_stateless/02482_load_parts_refcounts.sh index 4d588dabeb9..fe3cee1359e 100755 --- a/tests/queries/0_stateless/02482_load_parts_refcounts.sh +++ b/tests/queries/0_stateless/02482_load_parts_refcounts.sh @@ -5,23 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function query_with_retry -{ - retry=0 - until [ $retry -ge 5 ] - do - result=$($CLICKHOUSE_CLIENT $2 --query="$1" 2>&1) - if [ "$?" == 0 ]; then - echo -n "$result" - return - else - retry=$(($retry + 1)) - sleep 3 - fi - done - echo "Query '$1' failed with '$result'" -} - $CLICKHOUSE_CLIENT -n --query " DROP TABLE IF EXISTS load_parts_refcounts SYNC; diff --git a/tests/queries/0_stateless/02483_elapsed_time.sh b/tests/queries/0_stateless/02483_elapsed_time.sh index e3b983129fb..fdb23d6da01 100755 --- a/tests/queries/0_stateless/02483_elapsed_time.sh +++ b/tests/queries/0_stateless/02483_elapsed_time.sh @@ -32,7 +32,7 @@ OK_QUERY_JSON=" WITH ( SELECT sleepEachRow(1.0) ) AS sub -SELECT * +SELECT *, sub FROM ( SELECT * @@ -50,7 +50,7 @@ WITH ( SELECT * FROM ( - SELECT * + SELECT *, sub FROM system.one ) FORMAT XML diff --git a/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.reference b/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.reference index dfa09193761..ad541387510 100644 --- a/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.reference +++ b/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.reference @@ -3,33 +3,33 @@ -- The number of output streams is limited by max_streams_for_merge_tree_reading select sum(x) from t settings max_threads=32, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=0; 49999995000000 -select * from (explain pipeline select sum(x) from t settings max_threads=32, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=0) where explain like '%Resize%' or explain like '%MergeTreeThread%'; +select * from (explain pipeline select sum(x) from t settings max_threads=32, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=0) where explain like '%Resize%' or explain like '%MergeTreeSelect%'; Resize 16 → 32 StrictResize 16 → 16 - MergeTreeThread × 16 0 → 1 + MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 16 0 → 1 -- Without asynchronous_read, max_streams_for_merge_tree_reading limits max_streams * max_streams_to_max_threads_ratio select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=0, max_streams_to_max_threads_ratio=8; 49999995000000 -select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=0, max_streams_to_max_threads_ratio=8) where explain like '%Resize%' or explain like '%MergeTreeThread%'; +select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=0, max_streams_to_max_threads_ratio=8) where explain like '%Resize%' or explain like '%MergeTreeSelect%'; Resize 16 → 4 StrictResize 16 → 16 - MergeTreeThread × 16 0 → 1 + MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 16 0 → 1 -- With asynchronous_read, read in max_streams_for_merge_tree_reading async streams and resize to max_threads select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1; 49999995000000 -select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1) where explain like '%Resize%' or explain like '%MergeTreeThread%'; +select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1) where explain like '%Resize%' or explain like '%MergeTreeSelect%'; Resize 4 → 4 StrictResize 4 → 4 Resize 16 → 4 - MergeTreeThread × 16 0 → 1 + MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 16 0 → 1 -- With asynchronous_read, read using max_streams * max_streams_to_max_threads_ratio async streams, resize to max_streams_for_merge_tree_reading outp[ut streams, resize to max_threads after aggregation select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8; 49999995000000 -select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8) where explain like '%Resize%' or explain like '%MergeTreeThread%'; +select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8) where explain like '%Resize%' or explain like '%MergeTreeSelect%'; Resize 16 → 4 StrictResize 16 → 16 Resize 32 → 16 - MergeTreeThread × 32 0 → 1 + MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 32 0 → 1 -- For read-in-order, disable everything set query_plan_remove_redundant_sorting=0; -- to keep reading in order select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, optimize_read_in_order=1, query_plan_read_in_order=1; diff --git a/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.sql b/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.sql index c8643b5c758..f2e81273f12 100644 --- a/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.sql +++ b/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.sql @@ -4,23 +4,26 @@ drop table if exists t; create table t (x UInt64) engine = MergeTree order by x; insert into t select number from numbers_mt(10000000) settings max_insert_threads=8; +set allow_prefetched_read_pool_for_remote_filesystem = 0; +set allow_prefetched_read_pool_for_local_filesystem = 0; + -- { echo } -- The number of output streams is limited by max_streams_for_merge_tree_reading select sum(x) from t settings max_threads=32, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=0; -select * from (explain pipeline select sum(x) from t settings max_threads=32, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=0) where explain like '%Resize%' or explain like '%MergeTreeThread%'; +select * from (explain pipeline select sum(x) from t settings max_threads=32, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=0) where explain like '%Resize%' or explain like '%MergeTreeSelect%'; -- Without asynchronous_read, max_streams_for_merge_tree_reading limits max_streams * max_streams_to_max_threads_ratio select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=0, max_streams_to_max_threads_ratio=8; -select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=0, max_streams_to_max_threads_ratio=8) where explain like '%Resize%' or explain like '%MergeTreeThread%'; +select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=0, max_streams_to_max_threads_ratio=8) where explain like '%Resize%' or explain like '%MergeTreeSelect%'; -- With asynchronous_read, read in max_streams_for_merge_tree_reading async streams and resize to max_threads select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1; -select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1) where explain like '%Resize%' or explain like '%MergeTreeThread%'; +select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1) where explain like '%Resize%' or explain like '%MergeTreeSelect%'; -- With asynchronous_read, read using max_streams * max_streams_to_max_threads_ratio async streams, resize to max_streams_for_merge_tree_reading outp[ut streams, resize to max_threads after aggregation select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8; -select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8) where explain like '%Resize%' or explain like '%MergeTreeThread%'; +select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8) where explain like '%Resize%' or explain like '%MergeTreeSelect%'; -- For read-in-order, disable everything set query_plan_remove_redundant_sorting=0; -- to keep reading in order diff --git a/tests/queries/0_stateless/02494_query_cache_query_log.reference b/tests/queries/0_stateless/02494_query_cache_query_log.reference new file mode 100644 index 00000000000..f9429064456 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_query_log.reference @@ -0,0 +1,12 @@ +-- Run a query with query cache not enabled +124437993 +QueryFinish SELECT 124437993; None +-- Run a query with query cache enabled +124437994 +QueryFinish SELECT 124437994 SETTINGS use_query_cache = 1; Write +-- Run the same query with query cache enabled +124437994 +QueryFinish SELECT 124437994 SETTINGS use_query_cache = 1; Write +QueryFinish SELECT 124437994 SETTINGS use_query_cache = 1; Read +-- Throw exception with query cache enabled +SELECT 124437995, throwIf(1) SETTINGS use_query_cache = 1; None diff --git a/tests/queries/0_stateless/02494_query_cache_query_log.sql b/tests/queries/0_stateless/02494_query_cache_query_log.sql new file mode 100644 index 00000000000..aedc39c4486 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_query_log.sql @@ -0,0 +1,67 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY CACHE; + +-- DROP TABLE system.query_log; -- debugging + + + +SELECT '-- Run a query with query cache not enabled'; +SELECT 124437993; + +SYSTEM FLUSH LOGS; + +-- Field 'query_cache_usage' should be 'None' +SELECT type, query, query_cache_usage +FROM system.query_log +WHERE current_database = currentDatabase() + AND query = 'SELECT 124437993;' + AND type = 'QueryFinish' +ORDER BY type, query_cache_usage; + + + +SELECT '-- Run a query with query cache enabled'; +SELECT 124437994 SETTINGS use_query_cache = 1; + +SYSTEM FLUSH LOGS; + +-- Field 'query_cache_usage' should be 'Write' +SELECT type, query, query_cache_usage +FROM system.query_log +WHERE current_database = currentDatabase() + AND query = 'SELECT 124437994 SETTINGS use_query_cache = 1;' + AND type = 'QueryFinish' +ORDER BY type, query_cache_usage; + + + +SELECT '-- Run the same query with query cache enabled'; +SELECT 124437994 SETTINGS use_query_cache = 1; + +SYSTEM FLUSH LOGS; + +-- Field 'query_cache_usage' should be 'Read' +SELECT type, query, query_cache_usage +FROM system.query_log +WHERE current_database = currentDatabase() + AND query = 'SELECT 124437994 SETTINGS use_query_cache = 1;' + AND type = 'QueryFinish' +ORDER BY type, query_cache_usage; + + + +SELECT '-- Throw exception with query cache enabled'; +SELECT 124437995, throwIf(1) SETTINGS use_query_cache = 1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + +SYSTEM FLUSH LOGS; + +-- Field 'query_cache_usage' should be 'None' +SELECT query, query_cache_usage +FROM system.query_log +WHERE current_database = currentDatabase() + AND query = 'SELECT 124437995, throwIf(1) SETTINGS use_query_cache = 1;' + AND type = 'ExceptionWhileProcessing'; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql b/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql index b4504a55643..ffb2afe8ca4 100644 --- a/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql +++ b/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql @@ -72,6 +72,7 @@ SYSTEM SYNC REPLICA wikistat2; -- it doesn't make test flaky, rarely we will not delete the parts because of cleanup thread was slow. -- Such condition will lead to successful queries. +SET function_sleep_max_microseconds_per_block = 5000000; SELECT 0 FROM numbers(5) WHERE sleepEachRow(1) = 1; select sum(hits), count() from wikistat1 GROUP BY project, subproject, path settings optimize_use_projections = 1, force_optimize_projection = 1; diff --git a/tests/queries/0_stateless/02497_trace_events_stress_long.sh b/tests/queries/0_stateless/02497_trace_events_stress_long.sh index 91f6a9bb541..c111ed40a29 100755 --- a/tests/queries/0_stateless/02497_trace_events_stress_long.sh +++ b/tests/queries/0_stateless/02497_trace_events_stress_long.sh @@ -45,4 +45,11 @@ thread2 $TIMEOUT >/dev/null & wait -$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%'" | rg '^0$' \ No newline at end of file +for _ in {1..10} +do + # process list is cleaned after everything is sent to client + # so this check can be run before process list is cleaned + # to avoid spurious failures we retry the check couple of times + $CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%'" | rg '^0$' && break + sleep 1 +done \ No newline at end of file diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference index 2e049dbc936..763a7cc4286 100644 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference @@ -477,3 +477,32 @@ Expression (Projection) ReadFromStorage (SystemNumbers) -- execute 1 +-- UNION ALL with DISTINCT => do _not_ remove DISTINCT +-- query +SELECT DISTINCT number +FROM +( + SELECT DISTINCT number + FROM numbers(1) + UNION ALL + SELECT DISTINCT number + FROM numbers(2) +) +-- explain +Expression (Projection) + Distinct + Distinct (Preliminary DISTINCT) + Union + Expression ((Before ORDER BY + Projection)) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ReadFromStorage (SystemNumbers) + Expression (( + Projection)) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ReadFromStorage (SystemNumbers) +-- execute +0 +1 diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh index 41744cc59f9..f07cdca4b5a 100755 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh @@ -264,3 +264,15 @@ run_query "$query" echo "-- DISTINCT COUNT() with GROUP BY => do _not_ remove DISTINCT" query="select distinct count() from numbers(10) group by number" run_query "$query" + +echo "-- UNION ALL with DISTINCT => do _not_ remove DISTINCT" +query="SELECT DISTINCT number +FROM +( + SELECT DISTINCT number + FROM numbers(1) + UNION ALL + SELECT DISTINCT number + FROM numbers(2) +)" +run_query "$query" diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference index c9301c1f0a3..50ca5981cf1 100644 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference @@ -479,3 +479,32 @@ Expression (Project names) ReadFromStorage (SystemNumbers) -- execute 1 +-- UNION ALL with DISTINCT => do _not_ remove DISTINCT +-- query +SELECT DISTINCT number +FROM +( + SELECT DISTINCT number + FROM numbers(1) + UNION ALL + SELECT DISTINCT number + FROM numbers(2) +) +-- explain +Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Union + Expression ((Projection + (Change column names to column identifiers + Project names))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) + Expression (( + ( + Project names))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) +-- execute +0 +1 diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh index 229c68bf8ec..63f912c6bff 100755 --- a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh +++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh @@ -22,7 +22,7 @@ SETTINGS min_bytes_for_wide_part = 0, type = cache, max_size = '128Mi', max_file_segment_size = '10Ki', - path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + path = '${CLICKHOUSE_TEST_UNIQUE_NAME}', cache_on_write_operations = 1, enable_filesystem_query_cache_limit = 1, delayed_cleanup_interval_ms = 100, diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.reference b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.reference index 79871e3716c..de5f14ee1ff 100644 --- a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.reference +++ b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.reference @@ -27,3 +27,15 @@ GitHub Documentation ['GitHub Documentation','GitHub'] Documentation ['Documentation'] +['foo','(?i)foo','(?-i)foo'] +['(?i)foo'] +['hello.*world','(?i)hello.*world','(?-i)hello.*world'] +[] +['(?i)hello.*world'] +[] +['foo','(?i)foo','(?-i)foo'] +['foo','(?i)foo'] +['hello.*world','(?i)hello.*world','(?-i)hello.*world'] +['hello.*world','(?i)hello.*world','(?-i)hello.*world'] +['hello.*world','(?i)hello.*world'] +['hello.*world','(?i)hello.*world'] diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh index 5e8985406ae..34ea8b9bfbe 100755 --- a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh +++ b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh @@ -239,10 +239,66 @@ select dictGet('regexp_dict3', 'tag', '/docs'); select dictGetAll('regexp_dict3', 'tag', '/docs'); " +# Test case-insensitive and dot-all match modes +cat > "$yaml" < +-- Daniel J. Bernstein +-- Released under CC0 +-- https://github.com/veorq/SipHash/blob/eee7d0d84dc7731df2359b243aa5e75d85f6eaef/vectors.h#L645 + +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + '')); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62))); + +-- CH tests +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0)) == sipHash128Reference(char(0)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1)) == sipHash128Reference(char(0, 1)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2)) == sipHash128Reference(char(0, 1, 2)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3)) == sipHash128Reference(char(0, 1, 2, 3)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4)) == sipHash128Reference(char(0, 1, 2, 3, 4)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); + +select sipHash128ReferenceKeyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } +select sipHash128ReferenceKeyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } + SELECT hex(sipHash128Reference()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000'; +SELECT hex(sipHash128ReferenceKeyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000'; + +SELECT 'Check bug with hashing of const integer values'; +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory; +INSERT INTO tab VALUES ((2, 2), 4); +-- these two statements must produce the same result +SELECT hex(sipHash128ReferenceKeyed(key, val)) FROM tab; +SELECT hex(sipHash128ReferenceKeyed(key, 4::UInt64)) FROM tab; +DROP TABLE tab; + +SELECT 'Check memsan bug'; +SELECT hex(sipHash128ReferenceKeyed((toUInt64(2), toUInt64(-9223372036854775807)))) GROUP BY (toUInt64(506097522914230528), toUInt64(now64(2, NULL + NULL), 1084818905618843912)), toUInt64(2), NULL + NULL, char(-2147483649, 1); + +SELECT 'Check const columns'; +DROP TABLE IF EXISTS sipHashKeyed_test; +CREATE TABLE sipHashKeyed_test ENGINE = Memory() AS SELECT 1 a, 'test' b; +SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), 1, 'test')); +SELECT hex(sipHash128Reference(tuple(*))) FROM sipHashKeyed_test; +SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), tuple(*))) FROM sipHashKeyed_test; +SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), a, b)) FROM sipHashKeyed_test; +DROP TABLE sipHashKeyed_test; + +SELECT 'Check multiple keys as tuple from a table'; +DROP TABLE IF EXISTS sipHashKeyed_keys; +CREATE TABLE sipHashKeyed_keys (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory; +INSERT INTO sipHashKeyed_keys VALUES ((2, 2), 4); +INSERT INTO sipHashKeyed_keys VALUES ((4, 4), 4); +SELECT hex(sipHash128ReferenceKeyed(key, val)) FROM sipHashKeyed_keys ORDER by key; +DROP TABLE sipHashKeyed_keys; + +SELECT 'Check multiple keys as separate ints from a table'; +DROP TABLE IF EXISTS sipHashKeyed_keys; +CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64, val UInt64) ENGINE=Memory; +INSERT INTO sipHashKeyed_keys VALUES (2, 2, 4); +INSERT INTO sipHashKeyed_keys VALUES (4, 4, 4); +SELECT hex(sipHash128ReferenceKeyed((key0, key1), val)) FROM sipHashKeyed_keys ORDER by key0; +SELECT 'Check constant key and data from a table'; +SELECT hex(sipHash128ReferenceKeyed((2::UInt64, 2::UInt64), val)) FROM sipHashKeyed_keys ORDER by val; +DROP TABLE sipHashKeyed_keys; + +SELECT 'Check multiple keys as separate ints from a table with constant data'; +DROP TABLE IF EXISTS sipHashKeyed_keys; +CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64) ENGINE=Memory; +INSERT INTO sipHashKeyed_keys VALUES (2, 2); +INSERT INTO sipHashKeyed_keys VALUES (4, 4); +SELECT hex(sipHash128ReferenceKeyed((key0, key1), 4::UInt64)) FROM sipHashKeyed_keys ORDER by key0; +DROP TABLE sipHashKeyed_keys; diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference index 440f668c614..794fe5944cd 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference @@ -40,7 +40,7 @@ ExpressionTransform × 2 (Expression) ExpressionTransform (ReadFromMergeTree) - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 ---Result--- 2023-01-05 all @@ -59,4 +59,4 @@ ExpressionTransform × 2 (Filter) FilterTransform (ReadFromMergeTree) - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 diff --git a/tests/queries/0_stateless/02554_rewrite_count_distinct_if_with_count_distinct_implementation.reference b/tests/queries/0_stateless/02554_rewrite_count_distinct_if_with_count_distinct_implementation.reference new file mode 100644 index 00000000000..efc3cd99060 --- /dev/null +++ b/tests/queries/0_stateless/02554_rewrite_count_distinct_if_with_count_distinct_implementation.reference @@ -0,0 +1,6 @@ +2 +SELECT countDistinctIf(number % 10, (number % 5) = 2) +FROM numbers(1000) +2 +SELECT uniqExactIf(number % 10, (number % 5) = 2) +FROM numbers(1000) diff --git a/tests/queries/0_stateless/02554_rewrite_count_distinct_if_with_count_distinct_implementation.sql b/tests/queries/0_stateless/02554_rewrite_count_distinct_if_with_count_distinct_implementation.sql new file mode 100644 index 00000000000..a81f53c1ef6 --- /dev/null +++ b/tests/queries/0_stateless/02554_rewrite_count_distinct_if_with_count_distinct_implementation.sql @@ -0,0 +1,8 @@ +-- Tags: no-parallel +SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000); +EXPLAIN SYNTAX SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000); + +-- disable by default +SET rewrite_count_distinct_if_with_count_distinct_implementation = 1; +SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000); +EXPLAIN SYNTAX SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000); diff --git a/tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.sql b/tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.sql index 9b117773b9b..93a47c6736a 100644 --- a/tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.sql +++ b/tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.sql @@ -1,5 +1,5 @@ drop table if exists test_table; -CREATE TABLE test_table (string_value String) ENGINE = MergeTree ORDER BY string_value; +CREATE TABLE test_table (string_value String) ENGINE = MergeTree ORDER BY string_value SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; system stop merges test_table; insert into test_table select * from ( select 'test_value_1' diff --git a/tests/queries/0_stateless/02561_temporary_table_grants.sh b/tests/queries/0_stateless/02561_temporary_table_grants.sh index 6e0c96786e8..33784f1d536 100755 --- a/tests/queries/0_stateless/02561_temporary_table_grants.sh +++ b/tests/queries/0_stateless/02561_temporary_table_grants.sh @@ -10,24 +10,24 @@ user=user_$CLICKHOUSE_TEST_UNIQUE_NAME $CLICKHOUSE_CLIENT --query "DROP USER IF EXISTS $user" $CLICKHOUSE_CLIENT --query "CREATE USER $user IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'hello'" -$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_memory_02561(name String)" 2>&1 | grep -F "Not enough privileges. To execute this query it's necessary to have grant CREATE TEMPORARY TABLE" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_memory_02561(name String)" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant CREATE TEMPORARY TABLE" > /dev/null && echo "OK" $CLICKHOUSE_CLIENT --query "GRANT CREATE TEMPORARY TABLE ON *.* TO $user" $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_memory_02561(name String)" -$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_merge_tree_02561(name String) ENGINE = MergeTree() ORDER BY name" 2>&1 | grep -F "Not enough privileges. To execute this query it's necessary to have grant CREATE ARBITRARY TEMPORARY TABLE" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_merge_tree_02561(name String) ENGINE = MergeTree() ORDER BY name" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant CREATE ARBITRARY TEMPORARY TABLE" > /dev/null && echo "OK" $CLICKHOUSE_CLIENT --query "GRANT CREATE ARBITRARY TEMPORARY TABLE ON *.* TO $user" $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_merge_tree_02561(name String) ENGINE = MergeTree() ORDER BY name" -$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_file_02561(name String) ENGINE = File(TabSeparated)" 2>&1 | grep -F "Not enough privileges. To execute this query it's necessary to have grant FILE" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_file_02561(name String) ENGINE = File(TabSeparated)" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant FILE" > /dev/null && echo "OK" $CLICKHOUSE_CLIENT --query "GRANT FILE ON *.* TO $user" $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_file_02561(name String) ENGINE = File(TabSeparated)" -$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_url_02561(name String) ENGINE = URL('http://127.0.0.1:8123?query=select+12', 'RawBLOB')" 2>&1 | grep -F "Not enough privileges. To execute this query it's necessary to have grant URL" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_url_02561(name String) ENGINE = URL('http://127.0.0.1:8123?query=select+12', 'RawBLOB')" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant URL" > /dev/null && echo "OK" $CLICKHOUSE_CLIENT --query "GRANT URL ON *.* TO $user" diff --git a/tests/queries/0_stateless/02565_update_empty_nested.sql b/tests/queries/0_stateless/02565_update_empty_nested.sql index ca1c1f5f36e..33316847601 100644 --- a/tests/queries/0_stateless/02565_update_empty_nested.sql +++ b/tests/queries/0_stateless/02565_update_empty_nested.sql @@ -7,7 +7,7 @@ CREATE TABLE t_update_empty_nested ) ENGINE = MergeTree ORDER BY id -SETTINGS min_bytes_for_wide_part = 0; +SETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192, index_granularity_bytes = '10Mi'; SET mutations_sync = 2; diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference index eeba62c5dc8..22dfaf93781 100644 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference @@ -4,6 +4,7 @@ insert into buffer_02572 values (1); select * from data_02572; select * from copy_02572; -- we cannot use OPTIMIZE, this will attach query context, so let's wait +SET function_sleep_max_microseconds_per_block = 6000000; select sleepEachRow(1) from numbers(3*2) format Null; select * from data_02572; 1 diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql index dc229412b13..939c189c5fe 100644 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql @@ -22,6 +22,7 @@ insert into buffer_02572 values (1); select * from data_02572; select * from copy_02572; -- we cannot use OPTIMIZE, this will attach query context, so let's wait +SET function_sleep_max_microseconds_per_block = 6000000; select sleepEachRow(1) from numbers(3*2) format Null; select * from data_02572; select * from copy_02572; diff --git a/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh index 89b5147f026..d00026d516a 100755 --- a/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh +++ b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh @@ -5,6 +5,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +set -o pipefail + $CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" $CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='lz4'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" $CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='snappy'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql index 7b52a89b16f..eff9e0fa825 100644 --- a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql +++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS 02581_trips; -CREATE TABLE 02581_trips(id UInt32, id2 UInt32, description String) ENGINE=MergeTree ORDER BY id; +CREATE TABLE 02581_trips(id UInt32, id2 UInt32, description String) ENGINE=MergeTree ORDER BY id SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -- Make multiple parts INSERT INTO 02581_trips SELECT number, number, '' FROM numbers(10000); diff --git a/tests/queries/0_stateless/02582_async_reading_with_small_limit.reference b/tests/queries/0_stateless/02582_async_reading_with_small_limit.reference index a7f994d4b13..770e0046c27 100644 --- a/tests/queries/0_stateless/02582_async_reading_with_small_limit.reference +++ b/tests/queries/0_stateless/02582_async_reading_with_small_limit.reference @@ -4,4 +4,4 @@ ExpressionTransform Limit (ReadFromMergeTree) Concat 3 → 1 - MergeTreeInOrder × 3 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 3 0 → 1 diff --git a/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql b/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql index 6ea6f880712..65b51014258 100644 --- a/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql +++ b/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql @@ -1,3 +1,5 @@ +drop table if exists t; + create table t(a UInt64) engine=MergeTree order by tuple(); system stop merges t; diff --git a/tests/queries/0_stateless/02596_build_set_and_remote.reference b/tests/queries/0_stateless/02596_build_set_and_remote.reference index 8d12196ae33..b506eaf574f 100644 --- a/tests/queries/0_stateless/02596_build_set_and_remote.reference +++ b/tests/queries/0_stateless/02596_build_set_and_remote.reference @@ -4,16 +4,32 @@ SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM system.one; SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one); 1 1 +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}'); +1 +1 SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY NULL; 1 +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}') GROUP BY NULL; +1 SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 1; 1 +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}') GROUP BY 1; +1 SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 'A'; 1 +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}') GROUP BY 'A'; +1 SELECT 1 IN ( SELECT 1 ) FROM remote('127.0.0.{1,2}', system.one) GROUP BY dummy; 1 +SELECT 1 IN ( SELECT 1 ) FROM remote('127.0.0.{1,2}') GROUP BY dummy; +1 SELECT 1000.0001, toUInt64(arrayJoin([NULL, 257, 65536, NULL])), arrayExists(x -> (x IN (SELECT '2.55')), [-9223372036854775808]) FROM remote('127.0.0.{1,2}', system.one) GROUP BY NULL, NULL, NULL, NULL; 1000.0001 \N 0 1000.0001 257 0 1000.0001 65536 0 1000.0001 \N 0 +SELECT 1000.0001, toUInt64(arrayJoin([NULL, 257, 65536, NULL])), arrayExists(x -> (x IN (SELECT '2.55')), [-9223372036854775808]) FROM remote('127.0.0.{1,2}') GROUP BY NULL, NULL, NULL, NULL; +1000.0001 \N 0 +1000.0001 257 0 +1000.0001 65536 0 +1000.0001 \N 0 diff --git a/tests/queries/0_stateless/02596_build_set_and_remote.sql b/tests/queries/0_stateless/02596_build_set_and_remote.sql index 7a904344c91..4785446c1ab 100644 --- a/tests/queries/0_stateless/02596_build_set_and_remote.sql +++ b/tests/queries/0_stateless/02596_build_set_and_remote.sql @@ -2,13 +2,19 @@ SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM system.one; SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one); +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}'); SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY NULL; +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}') GROUP BY NULL; SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 1; +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}') GROUP BY 1; SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 'A'; +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}') GROUP BY 'A'; SELECT 1 IN ( SELECT 1 ) FROM remote('127.0.0.{1,2}', system.one) GROUP BY dummy; +SELECT 1 IN ( SELECT 1 ) FROM remote('127.0.0.{1,2}') GROUP BY dummy; SELECT 1000.0001, toUInt64(arrayJoin([NULL, 257, 65536, NULL])), arrayExists(x -> (x IN (SELECT '2.55')), [-9223372036854775808]) FROM remote('127.0.0.{1,2}', system.one) GROUP BY NULL, NULL, NULL, NULL; +SELECT 1000.0001, toUInt64(arrayJoin([NULL, 257, 65536, NULL])), arrayExists(x -> (x IN (SELECT '2.55')), [-9223372036854775808]) FROM remote('127.0.0.{1,2}') GROUP BY NULL, NULL, NULL, NULL; diff --git a/tests/queries/0_stateless/02661_read_from_archive.lib b/tests/queries/0_stateless/02661_read_from_archive.lib new file mode 100644 index 00000000000..908b6bd38d2 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive.lib @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function read_archive_file() { + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2" + $CLICKHOUSE_CLIENT --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2" + $CLICKHOUSE_CLIENT --query "DESC file('${user_files_path}/$1')" + $CLICKHOUSE_CLIENT --query "CREATE TABLE 02661_archive_table Engine=File('CSV', '${user_files_path}/$1')" + $CLICKHOUSE_CLIENT --query "SELECT * FROM 02661_archive_table ORDER BY 1, 2" + $CLICKHOUSE_CLIENT --query "DROP TABLE 02661_archive_table" +} + +function run_archive_test() { + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS 02661_archive_table" + + extension_without_dot=$(echo $1 | sed -e 's/\.//g') + FILE_PREFIX="02661_read_from_archive_${CLICKHOUSE_DATABASE}_$extension_without_dot" + + user_files_path=$(clickhouse-client --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -o "/[^[:space:]]*nonexist.txt" | awk '{gsub("/nonexist.txt","",$1); print $1}') + + touch ${FILE_PREFIX}_data0.csv + echo -e "1,2\n3,4" > ${FILE_PREFIX}_data1.csv + echo -e "5,6\n7,8" > ${FILE_PREFIX}_data2.csv + echo -e "9,10\n11,12" > ${FILE_PREFIX}_data3.csv + + eval "$2 ${user_files_path}/${FILE_PREFIX}_archive1.$1 ${FILE_PREFIX}_data0.csv ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data2.csv > /dev/null" + eval "$2 ${user_files_path}/${FILE_PREFIX}_archive2.$1 ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data3.csv > /dev/null" + eval "$2 ${user_files_path}/${FILE_PREFIX}_archive3.$1 ${FILE_PREFIX}_data2.csv ${FILE_PREFIX}_data3.csv > /dev/null" + + echo "archive1 data1.csv" + read_archive_file "${FILE_PREFIX}_archive1.$1 :: ${FILE_PREFIX}_data1.csv" + echo "archive{1..2} data1.csv" + read_archive_file "${FILE_PREFIX}_archive{1..2}.$1 :: ${FILE_PREFIX}_data1.csv" + echo "archive{1,2} data{1,3}.csv" + read_archive_file "${FILE_PREFIX}_archive{1,2}.$1 :: ${FILE_PREFIX}_data{1,3}.csv" + echo "archive3 data*.csv" + read_archive_file "${FILE_PREFIX}_archive3.$1 :: ${FILE_PREFIX}_data*.csv" + echo "archive* *.csv" + read_archive_file "${FILE_PREFIX}_archive*.$1 :: *.csv" + echo "archive* {2..3}.csv" + read_archive_file "${FILE_PREFIX}_archive*.$1 :: ${FILE_PREFIX}_data{2..3}.csv" + + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" + + rm ${user_files_path}/${FILE_PREFIX}_archive{1..3}.$1 + + rm ${FILE_PREFIX}_data{0..3}.csv +} + +# vi: ft=bash diff --git a/tests/queries/0_stateless/02661_read_from_archive_7z.reference b/tests/queries/0_stateless/02661_read_from_archive_7z.reference new file mode 100644 index 00000000000..7d406d45716 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_7z.reference @@ -0,0 +1,128 @@ +archive1 data1.csv +1 2 +3 4 +1 2 +3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +3 4 +archive{1..2} data1.csv +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +archive{1,2} data{1,3}.csv +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +archive3 data*.csv +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +5 6 +7 8 +9 10 +11 12 +archive* *.csv +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +archive* {2..3}.csv +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +OK +OK diff --git a/tests/queries/0_stateless/02661_read_from_archive_7z.sh b/tests/queries/0_stateless/02661_read_from_archive_7z.sh new file mode 100755 index 00000000000..62137c4bf77 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_7z.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# shellcheck source=./02661_read_from_archive.lib +. "$CUR_DIR"/02661_read_from_archive.lib + +run_archive_test "7z" "7z a" \ No newline at end of file diff --git a/tests/queries/0_stateless/02661_read_from_archive_tar.reference b/tests/queries/0_stateless/02661_read_from_archive_tar.reference new file mode 100644 index 00000000000..7d406d45716 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tar.reference @@ -0,0 +1,128 @@ +archive1 data1.csv +1 2 +3 4 +1 2 +3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +3 4 +archive{1..2} data1.csv +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +archive{1,2} data{1,3}.csv +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +archive3 data*.csv +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +5 6 +7 8 +9 10 +11 12 +archive* *.csv +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +archive* {2..3}.csv +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +OK +OK diff --git a/tests/queries/0_stateless/02661_read_from_archive_tar.sh b/tests/queries/0_stateless/02661_read_from_archive_tar.sh new file mode 100755 index 00000000000..7e7db389235 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tar.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# shellcheck source=./02661_read_from_archive.lib +. "$CUR_DIR"/02661_read_from_archive.lib + +run_archive_test "tar" "tar -cvf" \ No newline at end of file diff --git a/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.reference b/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.reference new file mode 100644 index 00000000000..7d406d45716 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.reference @@ -0,0 +1,128 @@ +archive1 data1.csv +1 2 +3 4 +1 2 +3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +3 4 +archive{1..2} data1.csv +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +archive{1,2} data{1,3}.csv +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +archive3 data*.csv +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +5 6 +7 8 +9 10 +11 12 +archive* *.csv +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +archive* {2..3}.csv +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +OK +OK diff --git a/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.sh b/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.sh new file mode 100755 index 00000000000..4c3763629f4 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# shellcheck source=./02661_read_from_archive.lib +. "$CUR_DIR"/02661_read_from_archive.lib + +run_archive_test "tar.bz2" "tar -cjf" \ No newline at end of file diff --git a/tests/queries/0_stateless/02661_read_from_archive_targz.reference b/tests/queries/0_stateless/02661_read_from_archive_targz.reference new file mode 100644 index 00000000000..7d406d45716 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_targz.reference @@ -0,0 +1,128 @@ +archive1 data1.csv +1 2 +3 4 +1 2 +3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +3 4 +archive{1..2} data1.csv +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +archive{1,2} data{1,3}.csv +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +archive3 data*.csv +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +5 6 +7 8 +9 10 +11 12 +archive* *.csv +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +archive* {2..3}.csv +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +OK +OK diff --git a/tests/queries/0_stateless/02661_read_from_archive_targz.sh b/tests/queries/0_stateless/02661_read_from_archive_targz.sh new file mode 100755 index 00000000000..4505a150237 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_targz.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# shellcheck source=./02661_read_from_archive.lib +. "$CUR_DIR"/02661_read_from_archive.lib + +run_archive_test "tar.gz" "tar -cvzf" \ No newline at end of file diff --git a/tests/queries/0_stateless/02661_read_from_archive_tarxz.reference b/tests/queries/0_stateless/02661_read_from_archive_tarxz.reference new file mode 100644 index 00000000000..7d406d45716 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tarxz.reference @@ -0,0 +1,128 @@ +archive1 data1.csv +1 2 +3 4 +1 2 +3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +3 4 +archive{1..2} data1.csv +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +archive{1,2} data{1,3}.csv +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +archive3 data*.csv +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +5 6 +7 8 +9 10 +11 12 +archive* *.csv +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +archive* {2..3}.csv +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +OK +OK diff --git a/tests/queries/0_stateless/02661_read_from_archive_tarxz.sh b/tests/queries/0_stateless/02661_read_from_archive_tarxz.sh new file mode 100755 index 00000000000..b8ee5bc46d2 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tarxz.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# shellcheck source=./02661_read_from_archive.lib +. "$CUR_DIR"/02661_read_from_archive.lib + +run_archive_test "tar.xz" "tar -cJf" \ No newline at end of file diff --git a/tests/queries/0_stateless/02661_read_from_archive_tzst.reference b/tests/queries/0_stateless/02661_read_from_archive_tzst.reference new file mode 100644 index 00000000000..7d406d45716 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tzst.reference @@ -0,0 +1,128 @@ +archive1 data1.csv +1 2 +3 4 +1 2 +3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +3 4 +archive{1..2} data1.csv +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +archive{1,2} data{1,3}.csv +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +archive3 data*.csv +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +5 6 +7 8 +9 10 +11 12 +archive* *.csv +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +archive* {2..3}.csv +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +OK +OK diff --git a/tests/queries/0_stateless/02661_read_from_archive_tzst.sh b/tests/queries/0_stateless/02661_read_from_archive_tzst.sh new file mode 100755 index 00000000000..b4145e0d1d0 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tzst.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# shellcheck source=./02661_read_from_archive.lib +. "$CUR_DIR"/02661_read_from_archive.lib + +run_archive_test "tzst" "tar -caf" \ No newline at end of file diff --git a/tests/queries/0_stateless/02661_read_from_archive_zip.reference b/tests/queries/0_stateless/02661_read_from_archive_zip.reference new file mode 100644 index 00000000000..7d406d45716 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_zip.reference @@ -0,0 +1,128 @@ +archive1 data1.csv +1 2 +3 4 +1 2 +3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +3 4 +archive{1..2} data1.csv +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +archive{1,2} data{1,3}.csv +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +archive3 data*.csv +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +5 6 +7 8 +9 10 +11 12 +archive* *.csv +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +archive* {2..3}.csv +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +OK +OK diff --git a/tests/queries/0_stateless/02661_read_from_archive_zip.sh b/tests/queries/0_stateless/02661_read_from_archive_zip.sh new file mode 100755 index 00000000000..1234464f0a6 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_zip.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# shellcheck source=./02661_read_from_archive.lib +. "$CUR_DIR"/02661_read_from_archive.lib + +run_archive_test "zip" "zip" \ No newline at end of file diff --git a/tests/queries/0_stateless/02668_parse_datetime.reference b/tests/queries/0_stateless/02668_parse_datetime.reference index b7215ac3718..f6c53ce1887 100644 --- a/tests/queries/0_stateless/02668_parse_datetime.reference +++ b/tests/queries/0_stateless/02668_parse_datetime.reference @@ -241,3 +241,5 @@ select str_to_date('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') IS -- Error handling select parseDateTime('12 AM'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } select parseDateTime('12 AM', '%h %p', 'UTC', 'a fourth argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +-- Fuzzer crash bug #53715 +select parseDateTime('', '', toString(number)) from numbers(13); -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/02668_parse_datetime.sql b/tests/queries/0_stateless/02668_parse_datetime.sql index 3fb4aacedbd..d8f2a94e188 100644 --- a/tests/queries/0_stateless/02668_parse_datetime.sql +++ b/tests/queries/0_stateless/02668_parse_datetime.sql @@ -165,4 +165,7 @@ select str_to_date('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') IS select parseDateTime('12 AM'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } select parseDateTime('12 AM', '%h %p', 'UTC', 'a fourth argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +-- Fuzzer crash bug #53715 +select parseDateTime('', '', toString(number)) from numbers(13); -- { serverError ILLEGAL_COLUMN } + -- { echoOff } diff --git a/tests/queries/0_stateless/02668_ulid_decoding.sql b/tests/queries/0_stateless/02668_ulid_decoding.sql index df94025b7b5..ecab5004df6 100644 --- a/tests/queries/0_stateless/02668_ulid_decoding.sql +++ b/tests/queries/0_stateless/02668_ulid_decoding.sql @@ -4,7 +4,7 @@ SELECT dateDiff('minute', ULIDStringToDateTime(generateULID()), now()) = 0; SELECT toTimezone(ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E'), 'America/Costa_Rica'); SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E', 'America/Costa_Rica'); SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9', 'America/Costa_Rica'); -- { serverError ILLEGAL_COLUMN } -SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E', 'America/Costa_Ric'); -- { serverError POCO_EXCEPTION } +SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E', 'America/Costa_Ric'); -- { serverError BAD_ARGUMENTS } SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E0'); -- { serverError ILLEGAL_COLUMN } SELECT ULIDStringToDateTime(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT ULIDStringToDateTime(1, 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02676_distinct_reading_in_order_analyzer.reference b/tests/queries/0_stateless/02676_distinct_reading_in_order_analyzer.reference index 016202cfb66..bb9cfc5efca 100644 --- a/tests/queries/0_stateless/02676_distinct_reading_in_order_analyzer.reference +++ b/tests/queries/0_stateless/02676_distinct_reading_in_order_analyzer.reference @@ -1 +1 @@ -MergeTreeInOrder +MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 diff --git a/tests/queries/0_stateless/02676_distinct_reading_in_order_analyzer.sql b/tests/queries/0_stateless/02676_distinct_reading_in_order_analyzer.sql index f00c1322e1d..a280a384c40 100644 --- a/tests/queries/0_stateless/02676_distinct_reading_in_order_analyzer.sql +++ b/tests/queries/0_stateless/02676_distinct_reading_in_order_analyzer.sql @@ -5,4 +5,4 @@ set allow_experimental_analyzer=1; create table t (a UInt64, b UInt64) engine=MergeTree() order by (a); insert into t select number % 2, number from numbers(10); -select splitByChar(' ', trimBoth(explain))[1] from (explain pipeline select distinct a from t) where explain like '%MergeTreeInOrder%'; +select trimBoth(explain) from (explain pipeline select distinct a from t) where explain like '%InOrder%'; diff --git a/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh b/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh index 2202a349c56..c1f28f9f079 100755 --- a/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh +++ b/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh @@ -61,7 +61,7 @@ INSERT INTO test_replicated SELECT 3;" wait_for_number_of_parts 'test_replicated' 1 100 $CLICKHOUSE_CLIENT -nmq " -SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one +SELECT sleepEachRow(1) FROM numbers(9) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one SELECT (now() - modification_time) > 5 FROM system.parts WHERE database = currentDatabase() AND table='test_replicated' AND active; DROP TABLE test_replicated;" diff --git a/tests/queries/0_stateless/02676_to_decimal_string.reference b/tests/queries/0_stateless/02676_to_decimal_string.reference new file mode 100644 index 00000000000..4c27ee5b528 --- /dev/null +++ b/tests/queries/0_stateless/02676_to_decimal_string.reference @@ -0,0 +1,21 @@ +2.00000000000000000000000000000000000000000000000000000000000000000000000000000 +2.12 +-2.00000000000000000000000000000000000000000000000000000000000000000000000000000 +-2.12 +2.987600000000000033395508580724708735942840576171875000000000 +2.15 +-2.987600000000000033395508580724708735942840576171875000000000 +-2.15 +64.1230010986 +64.2340000000 +-64.1230010986 +-64.2340000000 +-32.345 +32.34500000000000000000000000000000000000000000000000000000000000000000000000000 +32.46 +-64.5671232345 +128.78932312332132985464 +-128.78932312332132985464 +128.78932312332132985464000000000000000000000000000000000000000000000000000000000 +128.7893231233 +-128.78932312332132985464123123789323123321329854600000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02676_to_decimal_string.sql b/tests/queries/0_stateless/02676_to_decimal_string.sql new file mode 100644 index 00000000000..1dae139deb1 --- /dev/null +++ b/tests/queries/0_stateless/02676_to_decimal_string.sql @@ -0,0 +1,41 @@ +-- Regular types +SELECT toDecimalString(2, 77); -- more digits required than exist +SELECT toDecimalString(2.123456, 2); -- rounding +SELECT toDecimalString(-2, 77); -- more digits required than exist +SELECT toDecimalString(-2.123456, 2); -- rounding + +SELECT toDecimalString(2.9876, 60); -- more digits required than exist (took 60 as it is float by default) +SELECT toDecimalString(2.1456, 2); -- rounding +SELECT toDecimalString(-2.9876, 60); -- more digits required than exist +SELECT toDecimalString(-2.1456, 2); -- rounding + +-- Float32 and Float64 tests. No sense to test big float precision -- the result will be a mess anyway. +SELECT toDecimalString(64.123::Float32, 10); +SELECT toDecimalString(64.234::Float64, 10); +SELECT toDecimalString(-64.123::Float32, 10); +SELECT toDecimalString(-64.234::Float64, 10); + +-- Decimals +SELECT toDecimalString(-32.345::Decimal32(3), 3); +SELECT toDecimalString(32.345::Decimal32(3), 77); -- more digits required than exist +SELECT toDecimalString(32.456::Decimal32(3), 2); -- rounding +SELECT toDecimalString('-64.5671232345'::Decimal64(10), 10); +SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 20); +SELECT toDecimalString('-128.78932312332132985464123123'::Decimal128(26), 20); -- rounding +SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 77); -- more digits required than exist +SELECT toDecimalString('128.789323123321329854641231237893231233213298546'::Decimal256(45), 10); -- rounding +SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 77); -- more digits required than exist + +-- Max number of decimal fractional digits is defined as 77 for Int/UInt/Decimal and 60 for Float. +-- These values shall work OK. +SELECT toDecimalString('32.32'::Float32, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('64.64'::Float64, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('88'::UInt8, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('646464'::Int256, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} + +-- wrong types: #52407 and similar +SELECT toDecimalString('256.256'::Decimal256(45), *); -- {serverError ILLEGAL_COLUMN} +SELECT toDecimalString('128.128'::Decimal128(30), 'str'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT toDecimalString('64.64'::Decimal64(10)); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT toDecimalString('64.64'::Decimal64(10), 3, 3); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} diff --git a/tests/queries/0_stateless/02681_undrop_query.reference b/tests/queries/0_stateless/02681_undrop_query.reference deleted file mode 100644 index 5df6c392eb9..00000000000 --- a/tests/queries/0_stateless/02681_undrop_query.reference +++ /dev/null @@ -1,32 +0,0 @@ -test MergeTree undrop -02681_undrop_mergetree -1 -2 -3 -test detach -UPDATE num = 2 WHERE id = 1 -test MergeTree with cluster -02681_undrop_uuid_on_cluster -1 -2 -3 -test MergeTree without uuid on cluster -02681_undrop_no_uuid_on_cluster -1 -2 -3 -test ReplicatedMergeTree undrop -02681_undrop_replicatedmergetree -1 -2 -3 -test Log undrop -02681_undrop_log -1 -2 -3 -test Distributed undrop -02681_undrop_distributed -test MergeTree drop and undrop multiple times -02681_undrop_multiple -3 diff --git a/tests/queries/0_stateless/02681_undrop_query.sql b/tests/queries/0_stateless/02681_undrop_query.sql deleted file mode 100644 index 39ca1548d53..00000000000 --- a/tests/queries/0_stateless/02681_undrop_query.sql +++ /dev/null @@ -1,90 +0,0 @@ --- Tags: no-ordinary-database, no-replicated-database, distributed, zookeeper - -set database_atomic_wait_for_drop_and_detach_synchronously = 0; -set allow_experimental_undrop_table_query = 1; - -select 'test MergeTree undrop'; -drop table if exists 02681_undrop_mergetree sync; -create table 02681_undrop_mergetree (id Int32) Engine=MergeTree() order by id; -insert into 02681_undrop_mergetree values (1),(2),(3); -drop table 02681_undrop_mergetree; -select table from system.dropped_tables where table = '02681_undrop_mergetree' limit 1; -undrop table 02681_undrop_mergetree; -select * from 02681_undrop_mergetree order by id; -drop table 02681_undrop_mergetree sync; - -select 'test detach'; -drop table if exists 02681_undrop_detach sync; -create table 02681_undrop_detach (id Int32, num Int32) Engine=MergeTree() order by id; -insert into 02681_undrop_detach values (1, 1); -detach table 02681_undrop_detach; -undrop table 02681_undrop_detach; -- { serverError 57 } -attach table 02681_undrop_detach; -alter table 02681_undrop_detach update num = 2 where id = 1; -select command from system.mutations where table='02681_undrop_detach' and database=currentDatabase() limit 1; -drop table 02681_undrop_detach sync; - -select 'test MergeTree with cluster'; -drop table if exists 02681_undrop_uuid_on_cluster on cluster test_shard_localhost sync format Null; -create table 02681_undrop_uuid_on_cluster on cluster test_shard_localhost (id Int32) Engine=MergeTree() order by id format Null; -insert into 02681_undrop_uuid_on_cluster values (1),(2),(3); -drop table 02681_undrop_uuid_on_cluster on cluster test_shard_localhost format Null; -select table from system.dropped_tables where table = '02681_undrop_uuid_on_cluster' limit 1; -undrop table 02681_undrop_uuid_on_cluster on cluster test_shard_localhost format Null; -select * from 02681_undrop_uuid_on_cluster order by id; -drop table 02681_undrop_uuid_on_cluster sync; - -select 'test MergeTree without uuid on cluster'; -drop table if exists 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost sync format Null; -create table 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost (id Int32) Engine=MergeTree() order by id format Null; -insert into 02681_undrop_no_uuid_on_cluster values (1),(2),(3); -drop table 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost format Null; -select table from system.dropped_tables where table = '02681_undrop_no_uuid_on_cluster' limit 1; -undrop table 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost format Null; -select * from 02681_undrop_no_uuid_on_cluster order by id; -drop table 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost sync format Null; - -select 'test ReplicatedMergeTree undrop'; -drop table if exists 02681_undrop_replicatedmergetree sync; -create table 02681_undrop_replicatedmergetree (id Int32) Engine=ReplicatedMergeTree('/clickhouse/tables/{database}/02681_undrop_replicatedmergetree', 'test_undrop') order by id; -insert into 02681_undrop_replicatedmergetree values (1),(2),(3); -drop table 02681_undrop_replicatedmergetree; -select table from system.dropped_tables where table = '02681_undrop_replicatedmergetree' limit 1; -undrop table 02681_undrop_replicatedmergetree; -select * from 02681_undrop_replicatedmergetree order by id; -drop table 02681_undrop_replicatedmergetree sync; - -select 'test Log undrop'; -drop table if exists 02681_undrop_log sync; -create table 02681_undrop_log (id Int32) Engine=Log(); -insert into 02681_undrop_log values (1),(2),(3); -drop table 02681_undrop_log; -select table from system.dropped_tables where table = '02681_undrop_log' limit 1; -undrop table 02681_undrop_log; -select * from 02681_undrop_log order by id; -drop table 02681_undrop_log sync; - -select 'test Distributed undrop'; -drop table if exists 02681_undrop_distributed sync; -create table 02681_undrop_distributed (id Int32) Engine = Distributed(test_shard_localhost, currentDatabase(), 02681_undrop, rand()); -drop table 02681_undrop_distributed; -select table from system.dropped_tables where table = '02681_undrop_distributed' limit 1; -undrop table 02681_undrop_distributed; -drop table 02681_undrop_distributed sync; - -select 'test MergeTree drop and undrop multiple times'; -drop table if exists 02681_undrop_multiple sync; -create table 02681_undrop_multiple (id Int32) Engine=MergeTree() order by id; -insert into 02681_undrop_multiple values (1); -drop table 02681_undrop_multiple; -create table 02681_undrop_multiple (id Int32) Engine=MergeTree() order by id; -insert into 02681_undrop_multiple values (2); -drop table 02681_undrop_multiple; -create table 02681_undrop_multiple (id Int32) Engine=MergeTree() order by id; -insert into 02681_undrop_multiple values (3); -drop table 02681_undrop_multiple; -select table from system.dropped_tables where table = '02681_undrop_multiple' limit 1; -undrop table 02681_undrop_multiple; -select * from 02681_undrop_multiple order by id; -undrop table 02681_undrop_multiple; -- { serverError 57 } -drop table 02681_undrop_multiple sync; diff --git a/tests/queries/0_stateless/02681_undrop_query_uuid.reference b/tests/queries/0_stateless/02681_undrop_query_uuid.reference deleted file mode 100644 index beae016401b..00000000000 --- a/tests/queries/0_stateless/02681_undrop_query_uuid.reference +++ /dev/null @@ -1,6 +0,0 @@ -test MergeTree with uuid -02681_undrop_uuid -OK -1 -2 -3 diff --git a/tests/queries/0_stateless/02681_undrop_query_uuid.sh b/tests/queries/0_stateless/02681_undrop_query_uuid.sh deleted file mode 100755 index a93f30ef459..00000000000 --- a/tests/queries/0_stateless/02681_undrop_query_uuid.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-ordinary-database, no-replicated-database - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -echo 'test MergeTree with uuid' -${CLICKHOUSE_CLIENT} -q "drop table if exists 02681_undrop_uuid sync;" -uuid=$(${CLICKHOUSE_CLIENT} --query "SELECT generateUUIDv4()") -uuid2=$(${CLICKHOUSE_CLIENT} --query "SELECT generateUUIDv4()") -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -q "create table 02681_undrop_uuid UUID '$uuid' on cluster test_shard_localhost (id Int32) Engine=MergeTree() order by id;" -${CLICKHOUSE_CLIENT} -q "insert into 02681_undrop_uuid values (1),(2),(3);" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -q "drop table 02681_undrop_uuid on cluster test_shard_localhost settings database_atomic_wait_for_drop_and_detach_synchronously = 0;" -${CLICKHOUSE_CLIENT} -q "select table from system.dropped_tables where table = '02681_undrop_uuid' limit 1;" -${CLICKHOUSE_CLIENT} -q "undrop table 02681_undrop_uuid UUID '$uuid2' settings allow_experimental_undrop_table_query = 1;" 2>&1| grep -Faq "UNKNOWN_TABLE" && echo OK -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -q "undrop table 02681_undrop_uuid UUID '$uuid' on cluster test_shard_localhost settings allow_experimental_undrop_table_query = 1;" -${CLICKHOUSE_CLIENT} -q "select * from 02681_undrop_uuid order by id;" -${CLICKHOUSE_CLIENT} -q "drop table 02681_undrop_uuid sync;" diff --git a/tests/queries/0_stateless/02701_non_parametric_function.reference b/tests/queries/0_stateless/02701_non_parametric_function.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02701_non_parametric_function.sql b/tests/queries/0_stateless/02701_non_parametric_function.sql new file mode 100644 index 00000000000..b242bdc72ef --- /dev/null +++ b/tests/queries/0_stateless/02701_non_parametric_function.sql @@ -0,0 +1 @@ +SELECT * FROM system.numbers WHERE number > toUInt64(10)(number) LIMIT 10; -- { serverError 309 } diff --git a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh index b5d32d2059d..748bf856deb 100755 --- a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh +++ b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh @@ -14,7 +14,7 @@ $CLICKHOUSE_CLIENT -nm -q " $CLICKHOUSE_CLIENT -q "insert into data select * from numbers(1e6)" query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to Disk('default', 'backups/$CLICKHOUSE_DATABASE/data/backup1')" --max_backup_bandwidth=1M > /dev/null +$CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to Disk('backups', '$CLICKHOUSE_DATABASE/data/backup1')" --max_backup_bandwidth=1M > /dev/null $CLICKHOUSE_CLIENT -nm -q " SYSTEM FLUSH LOGS; SELECT diff --git a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference index ccc02ad4f34..60464757bdd 100644 --- a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference +++ b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference @@ -5,5 +5,11 @@ implicit: 4 Test 2: check Filesystem database 4 +30 +10 +4 +3 +2 +1 Test 3: check show database with Filesystem test02707 diff --git a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh index 7c9095b3d8b..dabea8a373a 100755 --- a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh +++ b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh @@ -15,6 +15,23 @@ echo '2,"def",456,"bacabaa"' >> $dir/tmp.csv echo '3,"story",78912,"acabaab"' >> $dir/tmp.csv echo '4,"history",21321321,"cabaaba"' >> $dir/tmp.csv +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/tmp_numbers_1.csv') select * from numbers(1, 10)" +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/tmp_numbers_2.csv') select * from numbers(11, 10)" +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/tmp_numbers_30.csv') select * from numbers(21, 10)" + +readonly nested_dir=$dir/nested +[[ -d $nested_dir ]] && rm -rd $nested_dir +mkdir $nested_dir +mkdir $nested_dir/subnested + +cp ${dir}/tmp_numbers_1.csv ${nested_dir}/nested_tmp_numbers_1.csv +cp ${dir}/tmp_numbers_1.csv ${nested_dir}/subnested/subnested_tmp_numbers_1.csv + +readonly other_nested_dir=$dir/other_nested +[[ -d $other_nested_dir ]] && rm -rd $other_nested_dir +mkdir $other_nested_dir +cp ${dir}/tmp_numbers_1.csv ${other_nested_dir}/tmp_numbers_1.csv + ################# echo "Test 1: check explicit and implicit call of the file table function" @@ -29,6 +46,12 @@ $CLICKHOUSE_LOCAL --multiline --multiquery -q """ DROP DATABASE IF EXISTS test; CREATE DATABASE test ENGINE = Filesystem('${dir}'); SELECT COUNT(*) FROM test.\`tmp.csv\`; +SELECT COUNT(*) FROM test.\`tmp_numbers_*.csv\`; +SELECT COUNT(*) FROM test.\`nested/nested_tmp_numbers_1*.csv\`; +SELECT count(DISTINCT _path) FROM test.\`*.csv\`; +SELECT count(DISTINCT _path) FROM test.\`**/*.csv\`; +SELECT count(DISTINCT _path) FROM test.\`**/*.csv\` WHERE position(_path, '${nested_dir}') > 0; +SELECT count(DISTINCT _path) FROM test.\`**/*.csv\` WHERE position(_path, '${nested_dir}') = 0; DROP DATABASE test; """ diff --git a/tests/queries/0_stateless/02714_local_object_storage.sql b/tests/queries/0_stateless/02714_local_object_storage.sql index fa9025b8b6e..914eac1ddba 100644 --- a/tests/queries/0_stateless/02714_local_object_storage.sql +++ b/tests/queries/0_stateless/02714_local_object_storage.sql @@ -7,7 +7,7 @@ CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS disk = disk( type = 'local_blob_storage', - path = '/var/lib/clickhouse/disks/${CLICKHOUSE_TEST_UNIQUE_NAME}/'); + path = '${CLICKHOUSE_TEST_UNIQUE_NAME}/'); INSERT INTO test SELECT 1, 'test'; SELECT * FROM test; @@ -19,7 +19,7 @@ ENGINE = MergeTree() ORDER BY tuple() SETTINGS disk = disk( type = 'cache', max_size = '10Mi', - path = '/var/lib/clickhouse/caches/${CLICKHOUSE_TEST_UNIQUE_NAME}/', + path = '${CLICKHOUSE_TEST_UNIQUE_NAME}/', disk = disk(type='local_blob_storage', path='/var/lib/clickhouse/disks/${CLICKHOUSE_TEST_UNIQUE_NAME}/')); INSERT INTO test SELECT 1, 'test'; diff --git a/tests/queries/0_stateless/02722_database_filesystem.reference b/tests/queries/0_stateless/02722_database_filesystem.reference index c65dda7933a..17f84cfc49c 100644 --- a/tests/queries/0_stateless/02722_database_filesystem.reference +++ b/tests/queries/0_stateless/02722_database_filesystem.reference @@ -3,6 +3,14 @@ Test 1: create filesystem database and check implicit calls test1 4 4 +30 +10 +10 +4 +0 +2 +0 +OK 4 Test 2: check DatabaseFilesystem access rights and errors handling on server OK @@ -13,3 +21,6 @@ OK OK OK OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index 3b7a41bb39e..c21b1af2ff4 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -19,11 +19,17 @@ echo '3,"story",78912,"acabaab"' >> ${user_files_tmp_dir}/tmp.csv echo '4,"history",21321321,"cabaaba"' >> ${user_files_tmp_dir}/tmp.csv tmp_dir=${CLICKHOUSE_TEST_UNIQUE_NAME} + +$CLICKHOUSE_LOCAL -q "insert into function file('$user_files_tmp_dir/tmp_numbers_1.csv') select * from numbers(1, 10)" +$CLICKHOUSE_LOCAL -q "insert into function file('$user_files_tmp_dir/tmp_numbers_2.csv') select * from numbers(11, 10)" +$CLICKHOUSE_LOCAL -q "insert into function file('$user_files_tmp_dir/tmp_numbers_30.csv') select * from numbers(21, 10)" + [[ -d $tmp_dir ]] && rm -rd $tmp_dir mkdir $tmp_dir cp ${user_files_tmp_dir}/tmp.csv ${tmp_dir}/tmp.csv cp ${user_files_tmp_dir}/tmp.csv ${user_files_tmp_dir}/tmp/tmp.csv cp ${user_files_tmp_dir}/tmp.csv ${user_files_tmp_dir}/tmp.myext +cp ${user_files_tmp_dir}/tmp_numbers_1.csv ${user_files_tmp_dir}/tmp/tmp_numbers_1.csv ################# echo "Test 1: create filesystem database and check implicit calls" @@ -35,37 +41,48 @@ echo $? ${CLICKHOUSE_CLIENT} --query "SHOW DATABASES" | grep "test1" ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp.csv\`;" ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp/tmp.csv\`;" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp_numbers_*.csv\`;" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp/*tmp_numbers_*.csv\`;" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/*/*tmp_numbers_*.csv\`;" +${CLICKHOUSE_CLIENT} --query "SELECT count(DISTINCT _path) FROM test1.\`${unique_name}/*.csv\` WHERE startsWith(_path, '${user_files_tmp_dir}')"; +${CLICKHOUSE_CLIENT} --query "SELECT count(DISTINCT _path) FROM test1.\`${unique_name}/*.csv\` WHERE not startsWith(_path, '${user_files_tmp_dir}')"; +# **/* does not search in the current directory but searches recursively in nested directories. +${CLICKHOUSE_CLIENT} --query "SELECT count(DISTINCT _path) FROM test1.\`${unique_name}/**/*.csv\` WHERE startsWith(_path, '${user_files_tmp_dir}')"; +${CLICKHOUSE_CLIENT} --query "SELECT count(DISTINCT _path) FROM test1.\`${unique_name}/**/*.csv\` WHERE not startsWith(_path, '${user_files_tmp_dir}')"; +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp_numbers_*.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_LOCAL} -q "SELECT COUNT(*) FROM \"${tmp_dir}/tmp.csv\"" ################# echo "Test 2: check DatabaseFilesystem access rights and errors handling on server" # DATABASE_ACCESS_DENIED: Allows list files only inside user_files -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../tmp.csv\`;" 2>&1| grep -F "Code: 481" > /dev/null && echo "OK" || echo 'FAIL' ||: -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`/tmp/tmp.csv\`;" 2>&1| grep -F "Code: 481" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../tmp.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`/tmp/tmp.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../*/tmp_numbers_*.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../tmp_numbers_*.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../*.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --multiline --multiquery --query """ USE test1; SELECT COUNT(*) FROM \"../${tmp_dir}/tmp.csv\"; -""" 2>&1| grep -F "Code: 481" > /dev/null && echo "OK" || echo 'FAIL' ||: -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../../../../../../tmp.csv\`;" 2>&1| grep -F "Code: 481" > /dev/null && echo "OK" || echo 'FAIL' ||: +""" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../../../../../../tmp.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: # BAD_ARGUMENTS: path should be inside user_files ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test2; CREATE DATABASE test2 ENGINE = Filesystem('/tmp'); -""" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" || echo 'FAIL' ||: +""" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: # BAD_ARGUMENTS: .../user_files/relative_unknown_dir does not exists ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test2; CREATE DATABASE test2 ENGINE = Filesystem('relative_unknown_dir'); -""" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" || echo 'FAIL' ||: +""" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: # FILE_DOESNT_EXIST: unknown file -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp2.csv\`;" 2>&1| grep -F "Code: 60" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp2.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "FILE_DOESNT_EXIST" > /dev/null && echo "OK" || echo 'FAIL' ||: # BAD_ARGUMENTS: Cannot determine the file format by it's extension -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp.myext\`;" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" || echo 'FAIL' ||: - +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp.myext\`;" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: # Clean ${CLICKHOUSE_CLIENT} --query "DROP DATABASE test1;" rm -rd $tmp_dir diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh index bb8f1f5f7ee..13b627c0342 100755 --- a/tests/queries/0_stateless/02724_database_s3.sh +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -46,12 +46,12 @@ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = S3; USE test3; SELECT * FROM \"http://localhost:11111/test/a.myext\" -""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK" +""" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ USE test3; SELECT * FROM \"abacaba\" -""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK" +""" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: # Cleanup ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ diff --git a/tests/queries/0_stateless/02725_database_hdfs.reference b/tests/queries/0_stateless/02725_database_hdfs.reference index ef8adae2bbc..dfc5b63647d 100644 --- a/tests/queries/0_stateless/02725_database_hdfs.reference +++ b/tests/queries/0_stateless/02725_database_hdfs.reference @@ -4,9 +4,8 @@ test1 1 2 3 test2 Test 2: check exceptions -OK0 -OK1 -OK2 -OK3 -OK4 -OK5 +BAD_ARGUMENTS +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index 89ff7421a6f..f6089cfa18a 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash # Tags: no-fasttest, use-hdfs, no-parallel +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh @@ -36,19 +38,20 @@ echo "Test 2: check exceptions" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = HDFS('abacaba'); -""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK0" +""" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test4; CREATE DATABASE test4 ENGINE = HDFS; USE test4; SELECT * FROM \"abacaba/file.tsv\" -""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK1" +""" 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE" -${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK2" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK3" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK4" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK5" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||: + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: # Cleanup diff --git a/tests/queries/0_stateless/02725_parquet_preserve_order.sh b/tests/queries/0_stateless/02725_parquet_preserve_order.sh index ea3e4219e35..94f2eaaa753 100755 --- a/tests/queries/0_stateless/02725_parquet_preserve_order.sh +++ b/tests/queries/0_stateless/02725_parquet_preserve_order.sh @@ -10,7 +10,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # It'll be read into two blocks. The first block will sleep 2x longer than the second. # So reordering is very likely if the order-preservation doesn't work. -$CLICKHOUSE_LOCAL -q "select number+sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1" +$CLICKHOUSE_LOCAL -q "select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1, function_sleep_max_microseconds_per_block = 6000000, input_format_parquet_local_file_min_bytes_for_seek=0" -$CLICKHOUSE_LOCAL -q "explain pipeline select number+sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1, max_threads=2" -$CLICKHOUSE_LOCAL -q "explain pipeline select number+sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=0, parallelize_output_from_storages=1, max_threads=2" +$CLICKHOUSE_LOCAL -q "explain pipeline select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1, max_threads=2, input_format_parquet_local_file_min_bytes_for_seek=0" +$CLICKHOUSE_LOCAL -q "explain pipeline select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=0, parallelize_output_from_storages=1, max_threads=2, input_format_parquet_local_file_min_bytes_for_seek=0" diff --git a/tests/queries/0_stateless/02725_sleep_max_time.reference b/tests/queries/0_stateless/02725_sleep_max_time.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02725_sleep_max_time.sql b/tests/queries/0_stateless/02725_sleep_max_time.sql new file mode 100644 index 00000000000..b8378aee17e --- /dev/null +++ b/tests/queries/0_stateless/02725_sleep_max_time.sql @@ -0,0 +1 @@ +SELECT * FROM system.numbers WHERE sleepEachRow(0.05) LIMIT 10; -- { serverError TOO_SLOW } diff --git a/tests/queries/0_stateless/02725_start_stop_fetches.sh b/tests/queries/0_stateless/02725_start_stop_fetches.sh index 0ca687ae951..c9922455d94 100755 --- a/tests/queries/0_stateless/02725_start_stop_fetches.sh +++ b/tests/queries/0_stateless/02725_start_stop_fetches.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel, no-upgrade-check, no-replicated-database +# Tags: race, zookeeper, no-parallel, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.reference b/tests/queries/0_stateless/02726_async_insert_flush_queue.reference new file mode 100644 index 00000000000..b94888d227e --- /dev/null +++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.reference @@ -0,0 +1,5 @@ +JSONEachRow 3 +Values 2 +0 +0 +9 diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql new file mode 100644 index 00000000000..98e78045b85 --- /dev/null +++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql @@ -0,0 +1,30 @@ +-- Tags: no-parallel + +DROP TABLE IF EXISTS t_async_inserts_flush; + +CREATE TABLE t_async_inserts_flush (a UInt64) ENGINE = Memory; + +SET async_insert = 1; +SET wait_for_async_insert = 0; +SET async_insert_busy_timeout_ms = 1000000; + +INSERT INTO t_async_inserts_flush VALUES (1) (2); +INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": 10} {"a": 20}; +INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": "str"} +INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": 100} {"a": 200} +INSERT INTO t_async_inserts_flush VALUES (3) (4) (5); + +SELECT sleep(1) FORMAT Null; + +SELECT format, length(entries.query_id) FROM system.asynchronous_inserts +WHERE database = currentDatabase() AND table = 't_async_inserts_flush' +ORDER BY format; + +SELECT count() FROM t_async_inserts_flush; + +SYSTEM FLUSH ASYNC INSERT QUEUE; + +SELECT count() FROM system.asynchronous_inserts; +SELECT count() FROM t_async_inserts_flush; + +DROP TABLE t_async_inserts_flush; diff --git a/tests/queries/0_stateless/02726_async_insert_flush_stress.reference b/tests/queries/0_stateless/02726_async_insert_flush_stress.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02726_async_insert_flush_stress.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh new file mode 100755 index 00000000000..876766d0780 --- /dev/null +++ b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh @@ -0,0 +1,95 @@ +#!/usr/bin/env bash +# Tags: long, no-parallel + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +function insert1() +{ + url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" + + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV +1,"a" +2,"b" +' + done +} + +function insert2() +{ + url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" + + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}' + done +} + +function insert3() +{ + url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" + + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO FUNCTION remote('127.0.0.1', $CLICKHOUSE_DATABASE, async_inserts) VALUES (7, 'g') (8, 'h')" + done +} + +function select1() +{ + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + ${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts FORMAT Null" + done +} + +function select2() +{ + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + ${CLICKHOUSE_CLIENT} -q "SELECT * FROM system.asynchronous_inserts FORMAT Null" + done +} + +function flush1() +{ + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + sleep 0.2 + ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH ASYNC INSERT QUEUE" + done +} + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" + +TIMEOUT=10 + +export -f insert1 +export -f insert2 +export -f insert3 +export -f select1 +export -f select2 +export -f flush1 + +for _ in {1..5}; do + insert1 $TIMEOUT & + insert2 $TIMEOUT & + insert3 $TIMEOUT & +done + +select1 $TIMEOUT & +select2 $TIMEOUT & +flush1 $TIMEOUT & + +wait + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH ASYNC INSERT QUEUE" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.asynchronous_inserts" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts"; diff --git a/tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.sql b/tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.sql deleted file mode 100644 index 244ba4e959a..00000000000 --- a/tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.sql +++ /dev/null @@ -1,2 +0,0 @@ -SET allow_experimental_query_cache = 0; -SET allow_experimental_query_cache = 1; diff --git a/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference new file mode 100644 index 00000000000..7616c59e4fd --- /dev/null +++ b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference @@ -0,0 +1,5 @@ +dict_flat_simple Flat +dict_hashed_simple_Decimal128 Hashed +dict_hashed_simple_Float32 Hashed +dict_hashed_simple_String ComplexKeyHashed +dict_hashed_simple_auto_convert ComplexKeyHashed diff --git a/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql new file mode 100644 index 00000000000..753b9f663b5 --- /dev/null +++ b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql @@ -0,0 +1,35 @@ +DROP DICTIONARY IF EXISTS dict_flat_simple; +DROP DICTIONARY IF EXISTS dict_hashed_simple_Decimal128; +DROP DICTIONARY IF EXISTS dict_hashed_simple_Float32; +DROP DICTIONARY IF EXISTS dict_hashed_simple_String; +DROP DICTIONARY IF EXISTS dict_hashed_simple_auto_convert; +DROP TABLE IF EXISTS dict_data; + +CREATE TABLE dict_data (v0 UInt16, v1 Int16, v2 Float32, v3 Decimal128(10), v4 String) engine=Memory() AS SELECT number, number%65535, number*1.1, number*1.1, 'foo' FROM numbers(10);; + +CREATE DICTIONARY dict_flat_simple (v0 UInt16, v1 UInt16, v2 UInt16) PRIMARY KEY v0 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(flat()); +SYSTEM RELOAD DICTIONARY dict_flat_simple; +SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_flat_simple'; +DROP DICTIONARY dict_flat_simple; + +CREATE DICTIONARY dict_hashed_simple_Decimal128 (v3 Decimal128(10), v1 UInt16, v2 Float32) PRIMARY KEY v3 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(hashed()); +SYSTEM RELOAD DICTIONARY dict_hashed_simple_Decimal128; +SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_simple_Decimal128'; +DROP DICTIONARY dict_hashed_simple_Decimal128; + +CREATE DICTIONARY dict_hashed_simple_Float32 (v2 Float32, v3 Decimal128(10), v4 String) PRIMARY KEY v2 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(hashed()); +SYSTEM RELOAD DICTIONARY dict_hashed_simple_Float32; +SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_simple_Float32'; +DROP DICTIONARY dict_hashed_simple_Float32; + +CREATE DICTIONARY dict_hashed_simple_String (v4 String, v3 Decimal128(10), v2 Float32) PRIMARY KEY v4 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(hashed()); +SYSTEM RELOAD DICTIONARY dict_hashed_simple_String; +SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_simple_String'; +DROP DICTIONARY dict_hashed_simple_String; + +CREATE DICTIONARY dict_hashed_simple_auto_convert (v0 UInt16, v1 Int16, v2 UInt16) PRIMARY KEY v0,v1 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(hashed()); +SYSTEM RELOAD DICTIONARY dict_hashed_simple_auto_convert; +SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_simple_auto_convert'; +DROP DICTIONARY dict_hashed_simple_auto_convert; + +DROP TABLE dict_data; diff --git a/tests/queries/0_stateless/02731_zero_objects_in_metadata.reference b/tests/queries/0_stateless/02731_zero_objects_in_metadata.reference index 125915f4f65..1898b2c543b 100644 --- a/tests/queries/0_stateless/02731_zero_objects_in_metadata.reference +++ b/tests/queries/0_stateless/02731_zero_objects_in_metadata.reference @@ -1,3 +1,6 @@ 1 [] 1 [] [] +1 [] +1 [] +[] diff --git a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh index 690cf977d08..eef52002e36 100755 --- a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh +++ b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh @@ -5,23 +5,26 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -n --query " -DROP TABLE IF EXISTS test; -CREATE TABLE test (id Int32, empty Array(Int32)) - ENGINE=MergeTree ORDER BY id - SETTINGS min_rows_for_wide_part=0, min_bytes_for_wide_part=0, disk='s3_disk'; +for DISK in s3_disk s3_cache +do + ${CLICKHOUSE_CLIENT} -n --query " + DROP TABLE IF EXISTS test; + CREATE TABLE test (id Int32, empty Array(Int32)) + ENGINE=MergeTree ORDER BY id + SETTINGS min_rows_for_wide_part=0, min_bytes_for_wide_part=0, disk='$DISK'; -INSERT INTO test (id) VALUES (1); -SELECT * FROM test; -" + INSERT INTO test (id) VALUES (1); + SELECT * FROM test; + " -${CLICKHOUSE_CLIENT} -n --query " -BACKUP TABLE test TO Disk('backups', 'test_s3_backup'); -DROP TABLE test; -RESTORE TABLE test FROM Disk('backups', 'test_s3_backup'); -" &>/dev/null + ${CLICKHOUSE_CLIENT} -n --query " + BACKUP TABLE test TO Disk('backups', 'test_s3_backup'); + DROP TABLE test; + RESTORE TABLE test FROM Disk('backups', 'test_s3_backup'); + " &>/dev/null -${CLICKHOUSE_CLIENT} -n --query " -SELECT * FROM test; -SELECT empty FROM test; -" + ${CLICKHOUSE_CLIENT} -n --query " + SELECT * FROM test; + SELECT empty FROM test; + " +done diff --git a/tests/queries/0_stateless/02735_parquet_encoder.reference b/tests/queries/0_stateless/02735_parquet_encoder.reference new file mode 100644 index 00000000000..a7ee82bc67f --- /dev/null +++ b/tests/queries/0_stateless/02735_parquet_encoder.reference @@ -0,0 +1,63 @@ +u8 Nullable(UInt8) +u16 Nullable(UInt16) +u32 Nullable(UInt32) +u64 Nullable(UInt64) +i8 Nullable(Int8) +i16 Nullable(Int16) +i32 Nullable(Int32) +i64 Nullable(Int64) +date Nullable(UInt16) +date32 Nullable(Date32) +datetime Nullable(UInt32) +datetime64 Nullable(DateTime64(3, \'UTC\')) +enum8 Nullable(Int8) +enum16 Nullable(Int16) +float32 Nullable(Float32) +float64 Nullable(Float64) +str Nullable(String) +fstr Nullable(FixedString(12)) +u128 Nullable(FixedString(16)) +u256 Nullable(FixedString(32)) +i128 Nullable(FixedString(16)) +i256 Nullable(FixedString(32)) +decimal32 Nullable(Decimal(9, 3)) +decimal64 Nullable(Decimal(18, 10)) +decimal128 Nullable(Decimal(38, 20)) +decimal256 Nullable(Decimal(76, 40)) +ipv4 Nullable(UInt32) +ipv6 Nullable(FixedString(16)) +0 +0 +0 +0 +1 2 1 +1 2 2 +1 3 3 +1 1000000 1 +3914219105369203805 +4 1000000 1 +(1000000,0,NULL,'100','299') +(1000000,0,NULL,'0','-1294970296') +(1000000,0,NULL,'-2147483296','2147481000') +(100000,900000,NULL,'100009','999999') +[(2,0,NULL,'','[]')] +1 1 +0 1 +5090915589685802007 +16159458007063698496 +16159458007063698496 +BYTE_ARRAY String +FIXED_LEN_BYTE_ARRAY None +BYTE_ARRAY None +BYTE_ARRAY None +BYTE_ARRAY String +never gonna +give you +up +ms Nullable(DateTime64(3, \'UTC\')) +us Nullable(DateTime64(6, \'UTC\')) +ns Nullable(DateTime64(9, \'UTC\')) +cs Nullable(DateTime64(3, \'UTC\')) +s Nullable(DateTime64(3, \'UTC\')) +dus Nullable(DateTime64(9, \'UTC\')) +12670676506515577395 diff --git a/tests/queries/0_stateless/02735_parquet_encoder.sql b/tests/queries/0_stateless/02735_parquet_encoder.sql new file mode 100644 index 00000000000..19125abf8da --- /dev/null +++ b/tests/queries/0_stateless/02735_parquet_encoder.sql @@ -0,0 +1,182 @@ +-- Tags: no-fasttest, no-parallel + +set output_format_parquet_use_custom_encoder = 1; +set output_format_parquet_row_group_size = 1000; +set output_format_parquet_data_page_size = 800; +set output_format_parquet_batch_size = 100; +set output_format_parquet_row_group_size_bytes = 1000000000; +set engine_file_truncate_on_insert=1; + +-- Write random data to parquet file, then read from it and check that it matches what we wrote. +-- Do this for all kinds of data types: primitive, Nullable(primitive), Array(primitive), +-- Array(Nullable(primitive)), Array(Array(primitive)), Map(primitive, primitive), etc. + +drop table if exists basic_types_02735; +create temporary table basic_types_02735 as select * from generateRandom(' + u8 UInt8, + u16 UInt16, + u32 UInt32, + u64 UInt64, + i8 Int8, + i16 Int16, + i32 Int32, + i64 Int64, + date Date, + date32 Date32, + datetime DateTime, + datetime64 DateTime64, + enum8 Enum8(''x'' = 1, ''y'' = 2, ''z'' = 3), + enum16 Enum16(''xx'' = 1000, ''yy'' = 2000, ''zz'' = 3000), + float32 Float32, + float64 Float64, + str String, + fstr FixedString(12), + u128 UInt128, + u256 UInt256, + i128 Int128, + i256 Int256, + decimal32 Decimal32(3), + decimal64 Decimal64(10), + decimal128 Decimal128(20), + decimal256 Decimal256(40), + ipv4 IPv4, + ipv6 IPv6') limit 10101; +insert into function file(basic_types_02735.parquet) select * from basic_types_02735; +desc file(basic_types_02735.parquet); +select (select sum(cityHash64(*)) from basic_types_02735) - (select sum(cityHash64(*)) from file(basic_types_02735.parquet)); +drop table basic_types_02735; + + +drop table if exists nullables_02735; +create temporary table nullables_02735 as select * from generateRandom(' + u16 Nullable(UInt16), + i64 Nullable(Int64), + datetime64 Nullable(DateTime64), + enum8 Nullable(Enum8(''x'' = 1, ''y'' = 2, ''z'' = 3)), + float64 Nullable(Float64), + str Nullable(String), + fstr Nullable(FixedString(12)), + i256 Nullable(Int256), + decimal256 Nullable(Decimal256(40)), + ipv6 Nullable(IPv6)') limit 10000; +insert into function file(nullables_02735.parquet) select * from nullables_02735; +select (select sum(cityHash64(*)) from nullables_02735) - (select sum(cityHash64(*)) from file(nullables_02735.parquet)); +drop table nullables_02735; + + +-- TODO: When cityHash64() fully supports Nullable: https://github.com/ClickHouse/ClickHouse/pull/48625 +-- the next two blocks can be simplified: arrays_out_02735 intermediate table is not needed, +-- a.csv and b.csv are not needed. + +drop table if exists arrays_02735; +drop table if exists arrays_out_02735; +create table arrays_02735 engine = Memory as select * from generateRandom(' + u32 Array(UInt32), + i8 Array(Int8), + datetime Array(DateTime), + enum16 Array(Enum16(''xx'' = 1000, ''yy'' = 2000, ''zz'' = 3000)), + float32 Array(Float32), + str Array(String), + fstr Array(FixedString(12)), + u128 Array(UInt128), + decimal64 Array(Decimal64(10)), + ipv4 Array(IPv4), + msi Map(String, Int16), + tup Tuple(FixedString(3), Array(String), Map(Int8, Date))') limit 10000; +insert into function file(arrays_02735.parquet) select * from arrays_02735; +create temporary table arrays_out_02735 as arrays_02735; +insert into arrays_out_02735 select * from file(arrays_02735.parquet); +select (select sum(cityHash64(*)) from arrays_02735) - (select sum(cityHash64(*)) from arrays_out_02735); +--select (select sum(cityHash64(*)) from arrays_02735) - +-- (select sum(cityHash64(u32, i8, datetime, enum16, float32, str, fstr, arrayMap(x->reinterpret(x, 'UInt128'), u128), decimal64, ipv4, msi, tup)) from file(arrays_02735.parquet)); +drop table arrays_02735; +drop table arrays_out_02735; + + +drop table if exists madness_02735; +create temporary table madness_02735 as select * from generateRandom(' + aa Array(Array(UInt32)), + aaa Array(Array(Array(UInt32))), + an Array(Nullable(String)), + aan Array(Array(Nullable(FixedString(10)))), + l LowCardinality(String), + ln LowCardinality(Nullable(FixedString(11))), + al Array(LowCardinality(UInt128)), + aaln Array(Array(LowCardinality(Nullable(String)))), + mln Map(LowCardinality(String), Nullable(Int8)), + t Tuple(Map(FixedString(5), Tuple(Array(UInt16), Nullable(UInt16), Array(Tuple(Int8, Decimal64(10))))), Tuple(kitchen UInt64, sink String)), + n Nested(hello UInt64, world Tuple(first String, second FixedString(1))) + ') limit 10000; +insert into function file(madness_02735.parquet) select * from madness_02735; +insert into function file(a.csv) select * from madness_02735 order by tuple(*); +insert into function file(b.csv) select aa, aaa, an, aan, l, ln, arrayMap(x->reinterpret(x, 'UInt128'), al) as al_, aaln, mln, t, n.hello, n.world from file(madness_02735.parquet) order by tuple(aa, aaa, an, aan, l, ln, al_, aaln, mln, t, n.hello, n.world); +select (select sum(cityHash64(*)) from file(a.csv, LineAsString)) - (select sum(cityHash64(*)) from file(b.csv, LineAsString)); +--select (select sum(cityHash64(*)) from madness_02735) - +-- (select sum(cityHash64(aa, aaa, an, aan, l, ln, map(x->reinterpret(x, 'UInt128'), al), aaln, mln, t, n.hello, n.world)) from file(madness_02735.parquet)); +drop table madness_02735; + + +-- Merging input blocks into bigger row groups. +insert into function file(squash_02735.parquet) select '012345' union all select '543210' settings max_block_size = 1; +select num_columns, num_rows, num_row_groups from file(squash_02735.parquet, ParquetMetadata); + +-- Row group size limit in bytes. +insert into function file(row_group_bytes_02735.parquet) select '012345' union all select '543210' settings max_block_size = 1, output_format_parquet_row_group_size_bytes = 5; +select num_columns, num_rows, num_row_groups from file(row_group_bytes_02735.parquet, ParquetMetadata); + +-- Row group size limit in rows. +insert into function file(tiny_row_groups_02735.parquet) select * from numbers(3) settings output_format_parquet_row_group_size = 1; +select num_columns, num_rows, num_row_groups from file(tiny_row_groups_02735.parquet, ParquetMetadata); + +-- 1M unique 8-byte values should exceed dictionary_size_limit (1 MB). +insert into function file(big_column_chunk_02735.parquet) select number from numbers(1000000) settings output_format_parquet_row_group_size = 1000000; +select num_columns, num_rows, num_row_groups from file(big_column_chunk_02735.parquet, ParquetMetadata); +select sum(cityHash64(number)) from file(big_column_chunk_02735.parquet); + +-- Check statistics: signed vs unsigned, null count. Use enough rows to produce multiple pages. +insert into function file(statistics_02735.parquet) select 100 + number%200 as a, toUInt32(number * 3000) as u, toInt32(number * 3000) as i, if(number % 10 == 9, toString(number), null) as s from numbers(1000000) settings output_format_parquet_row_group_size = 1000000; +select num_columns, num_rows, num_row_groups from file(statistics_02735.parquet, ParquetMetadata); +select tupleElement(c, 'statistics') from file(statistics_02735.parquet, ParquetMetadata) array join tupleElement(row_groups[1], 'columns') as c; + +-- Statistics string length limit (max_statistics_size). +insert into function file(long_string_02735.parquet) select toString(range(number * 2000)) from numbers(2); +select tupleElement(tupleElement(row_groups[1], 'columns'), 'statistics') from file(long_string_02735.parquet, ParquetMetadata); + +-- Compression setting. +insert into function file(compressed_02735.parquet) select concat('aaaaaaaaaaaaaaaa', toString(number)) as s from numbers(1000) settings output_format_parquet_row_group_size = 10000, output_format_parquet_compression_method='zstd'; +select total_compressed_size < 10000, total_uncompressed_size > 15000 from file(compressed_02735.parquet, ParquetMetadata); +insert into function file(compressed_02735.parquet) select concat('aaaaaaaaaaaaaaaa', toString(number)) as s from numbers(1000) settings output_format_parquet_row_group_size = 10000, output_format_parquet_compression_method='none'; +select total_compressed_size < 10000, total_uncompressed_size > 15000 from file(compressed_02735.parquet, ParquetMetadata); +insert into function file(compressed_02735.parquet) select if(number%3==1, NULL, 42) as x from numbers(70) settings output_format_parquet_compression_method='zstd'; +select sum(cityHash64(*)) from file(compressed_02735.parquet); + +-- Single-threaded encoding and Arrow encoder. +drop table if exists other_encoders_02735; +create temporary table other_encoders_02735 as select number, number*2 from numbers(10000); +insert into function file(single_thread_02735.parquet) select * from other_encoders_02735 settings max_threads = 1; +select sum(cityHash64(*)) from file(single_thread_02735.parquet); +insert into function file(arrow_02735.parquet) select * from other_encoders_02735 settings output_format_parquet_use_custom_encoder = 0; +select sum(cityHash64(*)) from file(arrow_02735.parquet); + +-- String -> binary vs string; FixedString -> fixed-length-binary vs binary vs string. +insert into function file(strings1_02735.parquet) select 'never', toFixedString('gonna', 5) settings output_format_parquet_string_as_string = 1, output_format_parquet_fixed_string_as_fixed_byte_array = 1; +select columns.5, columns.6 from file(strings1_02735.parquet, ParquetMetadata) array join columns; +insert into function file(strings2_02735.parquet) select 'give', toFixedString('you', 3) settings output_format_parquet_string_as_string = 0, output_format_parquet_fixed_string_as_fixed_byte_array = 0; +select columns.5, columns.6 from file(strings2_02735.parquet, ParquetMetadata) array join columns; +insert into function file(strings3_02735.parquet) select toFixedString('up', 2) settings output_format_parquet_string_as_string = 1, output_format_parquet_fixed_string_as_fixed_byte_array = 0; +select columns.5, columns.6 from file(strings3_02735.parquet, ParquetMetadata) array join columns; +select * from file(strings1_02735.parquet); +select * from file(strings2_02735.parquet); +select * from file(strings3_02735.parquet); + +-- DateTime64 with different units. +insert into function file(datetime64_02735.parquet) select + toDateTime64(number / 1e3, 3) as ms, + toDateTime64(number / 1e6, 6) as us, + toDateTime64(number / 1e9, 9) as ns, + toDateTime64(number / 1e2, 2) as cs, + toDateTime64(number, 0) as s, + toDateTime64(number / 1e7, 7) as dus + from numbers(2000); +desc file(datetime64_02735.parquet); +select sum(cityHash64(*)) from file(datetime64_02735.parquet); diff --git a/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python b/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python index 768fb2144e3..1736807410f 100644 --- a/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python +++ b/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python @@ -1,216 +1,23 @@ #!/usr/bin/env python3 -import socket + import os -import uuid -import json +import sys -CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") -CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) -CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -def writeVarUInt(x, ba): - for _ in range(0, 9): - byte = x & 0x7F - if x > 0x7F: - byte |= 0x80 - - ba.append(byte) - - x >>= 7 - if x == 0: - return - - -def writeStringBinary(s, ba): - b = bytes(s, "utf-8") - writeVarUInt(len(s), ba) - ba.extend(b) - - -def readStrict(s, size=1): - res = bytearray() - while size: - cur = s.recv(size) - # if not res: - # raise "Socket is closed" - size -= len(cur) - res.extend(cur) - - return res - - -def readUInt(s, size=1): - res = readStrict(s, size) - val = 0 - for i in range(len(res)): - val += res[i] << (i * 8) - return val - - -def readUInt8(s): - return readUInt(s) - - -def readUInt16(s): - return readUInt(s, 2) - - -def readUInt32(s): - return readUInt(s, 4) - - -def readUInt64(s): - return readUInt(s, 8) - - -def readVarUInt(s): - x = 0 - for i in range(9): - byte = readStrict(s)[0] - x |= (byte & 0x7F) << (7 * i) - - if not byte & 0x80: - return x - - return x - - -def readStringBinary(s): - size = readVarUInt(s) - s = readStrict(s, size) - return s.decode("utf-8") - - -def sendHello(s): - ba = bytearray() - writeVarUInt(0, ba) # Hello - writeStringBinary("simple native protocol", ba) - writeVarUInt(21, ba) - writeVarUInt(9, ba) - writeVarUInt(54449, ba) - writeStringBinary(CLICKHOUSE_DATABASE, ba) # database - writeStringBinary("default", ba) # user - writeStringBinary("", ba) # pwd - s.sendall(ba) - - -def receiveHello(s): - p_type = readVarUInt(s) - assert p_type == 0 # Hello - _server_name = readStringBinary(s) - _server_version_major = readVarUInt(s) - _server_version_minor = readVarUInt(s) - _server_revision = readVarUInt(s) - _server_timezone = readStringBinary(s) - _server_display_name = readStringBinary(s) - _server_version_patch = readVarUInt(s) - - -def serializeClientInfo(ba, query_id): - writeStringBinary("default", ba) # initial_user - writeStringBinary(query_id, ba) # initial_query_id - writeStringBinary("127.0.0.1:9000", ba) # initial_address - ba.extend([0] * 8) # initial_query_start_time_microseconds - ba.append(1) # TCP - writeStringBinary("os_user", ba) # os_user - writeStringBinary("client_hostname", ba) # client_hostname - writeStringBinary("client_name", ba) # client_name - writeVarUInt(21, ba) - writeVarUInt(9, ba) - writeVarUInt(54449, ba) - writeStringBinary("", ba) # quota_key - writeVarUInt(0, ba) # distributed_depth - writeVarUInt(1, ba) # client_version_patch - ba.append(0) # No telemetry - - -def sendQuery(s, query, settings): - ba = bytearray() - query_id = uuid.uuid4().hex - writeVarUInt(1, ba) # query - writeStringBinary(query_id, ba) - - ba.append(1) # INITIAL_QUERY - - # client info - serializeClientInfo(ba, query_id) - - # Settings - for key, value in settings.items(): - writeStringBinary(key, ba) - writeVarUInt(1, ba) # is_important - writeStringBinary(str(value), ba) - writeStringBinary("", ba) # End of settings - - writeStringBinary("", ba) # No interserver secret - writeVarUInt(2, ba) # Stage - Complete - ba.append(0) # No compression - writeStringBinary(query, ba) # query, finally - s.sendall(ba) - - -def serializeBlockInfo(ba): - writeVarUInt(1, ba) # 1 - ba.append(0) # is_overflows - writeVarUInt(2, ba) # 2 - writeVarUInt(0, ba) # 0 - ba.extend([0] * 4) # bucket_num - - -def sendEmptyBlock(s): - ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary("", ba) - serializeBlockInfo(ba) - writeVarUInt(0, ba) # rows - writeVarUInt(0, ba) # columns - s.sendall(ba) - - -def assertPacket(packet, expected): - assert packet == expected, "Got: {}, expected: {}".format(packet, expected) - - -def readResponse(s): - packet_type = readVarUInt(s) - if packet_type == 2: # Exception - raise RuntimeError(readException(s)) - - if packet_type == 1: # Data - return None - if packet_type == 3: # Progress - return None - if packet_type == 5: # End stream - return None - - raise RuntimeError("Unexpected packet: {}".format(packet_type)) - - -def readException(s): - code = readUInt32(s) - _name = readStringBinary(s) - text = readStringBinary(s) - readStringBinary(s) # trace - assertPacket(readUInt8(s), 0) # has_nested - return "code {}: {}".format(code, text.replace("DB::Exception:", "")) +from tcp_client import TCPClient def main(): - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(30) - s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) - sendHello(s) - receiveHello(s) - sendQuery(s, "select 1", {"replication_alter_partitions_sync": 1}) + with TCPClient() as client: + client.sendQuery("select 1", {"replication_alter_partitions_sync": 1}) # external tables - sendEmptyBlock(s) + client.sendEmptyBlock() - while readResponse(s) is not None: + while client.readResponse() is not None: pass - - s.close() print("OK") diff --git a/tests/queries/0_stateless/02751_match_constant_needle.reference b/tests/queries/0_stateless/02751_match_constant_needle.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/02751_match_constant_needle.reference +++ b/tests/queries/0_stateless/02751_match_constant_needle.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/02751_match_constant_needle.sql b/tests/queries/0_stateless/02751_match_constant_needle.sql index 71bdcc7cb0a..9980c3760f3 100644 --- a/tests/queries/0_stateless/02751_match_constant_needle.sql +++ b/tests/queries/0_stateless/02751_match_constant_needle.sql @@ -1 +1,2 @@ select match('default/k8s1', '\\A(?:(?:[-0-9_a-z]+(?:\\.[-0-9_a-z]+)*)/k8s1)\\z'); +select match('abc123', '[a-zA-Z]+(?P\\d+)'); diff --git a/tests/queries/0_stateless/02751_multiquery_with_argument.reference b/tests/queries/0_stateless/02751_multiquery_with_argument.reference index df9771b6bd3..843fffb476c 100644 --- a/tests/queries/0_stateless/02751_multiquery_with_argument.reference +++ b/tests/queries/0_stateless/02751_multiquery_with_argument.reference @@ -18,9 +18,6 @@ Bad arguments Bad arguments Bad arguments BAD_ARGUMENTS -Bad arguments BAD_ARGUMENTS Bad arguments Bad arguments -Bad arguments -Bad arguments diff --git a/tests/queries/0_stateless/02751_multiquery_with_argument.sh b/tests/queries/0_stateless/02751_multiquery_with_argument.sh index ce53ede3331..d742cc0ad90 100755 --- a/tests/queries/0_stateless/02751_multiquery_with_argument.sh +++ b/tests/queries/0_stateless/02751_multiquery_with_argument.sh @@ -30,9 +30,6 @@ $CLICKHOUSE_LOCAL -n --multiquery "SELECT 307; SELECT 308;" 2>&1 | grep -o 'Bad $CLICKHOUSE_LOCAL --multiquery "SELECT 309; SELECT 310;" --multiquery 2>&1 | grep -o 'Bad arguments' $CLICKHOUSE_LOCAL --multiquery "SELECT 311;" --multiquery "SELECT 312;" 2>&1 | grep -o 'Bad arguments' $CLICKHOUSE_LOCAL --multiquery "SELECT 313;" -n "SELECT 314;" 2>&1 | grep -o 'BAD_ARGUMENTS' -$CLICKHOUSE_LOCAL --multiquery "SELECT 315;" --query "SELECT 316;" 2>&1 | grep -o 'Bad arguments' $CLICKHOUSE_LOCAL -n "SELECT 320" --query "SELECT 317;" 2>&1 | grep -o 'BAD_ARGUMENTS' -$CLICKHOUSE_LOCAL --query --multiquery --multiquery "SELECT 318;" 2>&1 | grep -o 'Bad arguments' -$CLICKHOUSE_LOCAL --query --multiquery "SELECT 319;" 2>&1 | grep -o 'Bad arguments' $CLICKHOUSE_LOCAL --query -n "SELECT 400;" 2>&1 | grep -o 'Bad arguments' -$CLICKHOUSE_LOCAL --query -n --multiquery "SELECT 401;" 2>&1 | grep -o 'Bad arguments' \ No newline at end of file +$CLICKHOUSE_LOCAL --query -n --multiquery "SELECT 401;" 2>&1 | grep -o 'Bad arguments' diff --git a/tests/queries/0_stateless/02752_forbidden_headers.sql b/tests/queries/0_stateless/02752_forbidden_headers.sql index d966fc0a187..dd0dcb53b16 100644 --- a/tests/queries/0_stateless/02752_forbidden_headers.sql +++ b/tests/queries/0_stateless/02752_forbidden_headers.sql @@ -3,16 +3,24 @@ SELECT * FROM url('http://localhost:8123/', LineAsString, headers('exact_header' = 'value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM url('http://localhost:8123/', LineAsString, headers('cAsE_INSENSITIVE_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM url('http://localhost:8123/', LineAsString, headers('bad_header_name: test\nexact_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM url('http://localhost:8123/', LineAsString, headers('bad_header_value' = 'test\nexact_header: value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM url('http://localhost:8123/', LineAsString, headers('random_header' = 'value')) FORMAT Null; SELECT * FROM urlCluster('test_cluster_two_shards_localhost', 'http://localhost:8123/', LineAsString, headers('exact_header' = 'value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM urlCluster('test_cluster_two_shards_localhost', 'http://localhost:8123/', LineAsString, headers('cAsE_INSENSITIVE_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM urlCluster('test_cluster_two_shards_localhost', 'http://localhost:8123/', LineAsString, headers('bad_header_name: test\nexact_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM urlCluster('test_cluster_two_shards_localhost', 'http://localhost:8123/', LineAsString, headers('bad_header_value' = 'test\nexact_header: value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM urlCluster('test_cluster_two_shards_localhost', 'http://localhost:8123/', LineAsString, headers('random_header' = 'value')) FORMAT Null; SELECT * FROM s3('http://localhost:8123/123/4', LineAsString, headers('exact_header' = 'value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM s3('http://localhost:8123/123/4', LineAsString, headers('cAsE_INSENSITIVE_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM s3('http://localhost:8123/123/4', LineAsString, headers('bad_header_name: test\nexact_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM s3('http://localhost:8123/123/4', LineAsString, headers('bad_header_value' = 'test\nexact_header: value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM s3('http://localhost:8123/123/4', LineAsString, headers('random_header' = 'value')); -- { serverError S3_ERROR } SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:8123/123/4', LineAsString, headers('exact_header' = 'value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:8123/123/4', LineAsString, headers('cAsE_INSENSITIVE_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:8123/123/4', LineAsString, headers('bad_header_name: test\nexact_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:8123/123/4', LineAsString, headers('bad_header_value' = 'test\nexact_header: value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:8123/123/4', LineAsString, headers('random_header' = 'value')); -- { serverError S3_ERROR } diff --git a/tests/queries/0_stateless/02766_prql.sh b/tests/queries/0_stateless/02766_prql.sh index f8bbd72af4e..85b1167027c 100755 --- a/tests/queries/0_stateless/02766_prql.sh +++ b/tests/queries/0_stateless/02766_prql.sh @@ -21,18 +21,18 @@ INSERT INTO aboba (user_id, message, creation_date, metric) VALUES (101, 'Hello, SET dialect = 'prql'; from aboba -derive [ +derive { a = 2, b = s\"LEFT(message, 2)\" -] -select [ user_id, message, a, b ]; +} +select { user_id, message, a, b }; from aboba filter user_id > 101 group user_id ( - aggregate [ + aggregate { metrics = sum metric - ] + } ); SET dialect = 'clickhouse'; @@ -49,10 +49,10 @@ SELECT '---'; SET dialect = 'prql'; from aboba -select [ user_id, message, metric ] +select { user_id, message, metric } derive creation_date = s\"toTimeZone(creation_date, 'Europe/Amsterdam')\" -select [ user_id, message, creation_date, metric]; +select { user_id, message, creation_date, metric}; from s\"SELECT * FROM system.users\" | select non_existent_column; # {serverError UNKNOWN_IDENTIFIER} from non_existent_table; # {serverError UNKNOWN_TABLE} -" \ No newline at end of file +" diff --git a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql index ecc243b9c89..020a429c109 100644 --- a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql +++ b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql @@ -4,7 +4,7 @@ INSERT INTO test_parallel_replicas_unavailable_shards SELECT * FROM numbers(10); SYSTEM FLUSH LOGS; -SET skip_unavailable_shards=1, allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=11, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1; +SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=11, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1; SET send_logs_level='error'; SELECT count() FROM test_parallel_replicas_unavailable_shards WHERE NOT ignore(*); diff --git a/tests/queries/0_stateless/02771_multiple_query_arguments.reference b/tests/queries/0_stateless/02771_multiple_query_arguments.reference new file mode 100644 index 00000000000..5cad23947c8 --- /dev/null +++ b/tests/queries/0_stateless/02771_multiple_query_arguments.reference @@ -0,0 +1,17 @@ +101 +101 +202 +202 +Multi-statements are not allowed +Empty query +Bad arguments +Syntax error +101 +101 +202 +202 +303 +303 +303 +Bad arguments +Syntax error diff --git a/tests/queries/0_stateless/02771_multiple_query_arguments.sh b/tests/queries/0_stateless/02771_multiple_query_arguments.sh new file mode 100755 index 00000000000..435e0a33315 --- /dev/null +++ b/tests/queries/0_stateless/02771_multiple_query_arguments.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# clickhouse-client +$CLICKHOUSE_CLIENT --query "SELECT 101" --query "SELECT 101" +$CLICKHOUSE_CLIENT --query "SELECT 202;" --query "SELECT 202;" +$CLICKHOUSE_CLIENT --query "SELECT 303" --query "SELECT 303; SELECT 303" 2>&1 | grep -o 'Multi-statements are not allowed' +$CLICKHOUSE_CLIENT --query "" --query "" 2>&1 | grep -o 'Empty query' +$CLICKHOUSE_CLIENT --query "SELECT 303" --query 2>&1 | grep -o 'Bad arguments' +$CLICKHOUSE_CLIENT --query "SELECT 303" --query "SELE" 2>&1 | grep -o 'Syntax error' + +# clickhouse-local +$CLICKHOUSE_LOCAL --query "SELECT 101" --query "SELECT 101" +$CLICKHOUSE_LOCAL --query "SELECT 202;" --query "SELECT 202;" +$CLICKHOUSE_LOCAL --query "SELECT 303" --query "SELECT 303; SELECT 303" 2>&1 # behaves differently than clickhouse-client, TODO make it consistent +$CLICKHOUSE_LOCAL --query "" --query "" 2>&1 # behaves equally different than clickhouse-client TODO +$CLICKHOUSE_LOCAL --query "SELECT 303" --query 2>&1 | grep -o 'Bad arguments' +$CLICKHOUSE_LOCAL --query "SELECT 303" --query "SELE" 2>&1 | grep -o 'Syntax error' diff --git a/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference b/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference index 5242c625325..85a66dfb34c 100644 --- a/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference +++ b/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference @@ -12,12 +12,12 @@ ExpressionTransform × 2 FilterSortedStreamByRange × 2 Description: filter values in [(999424), +inf) ExpressionTransform × 2 - MergeTreeInOrder × 2 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 AggregatingSortedTransform FilterSortedStreamByRange Description: filter values in [-inf, (999424)) ExpressionTransform - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 EXPLAIN PIPELINE SELECT * FROM data FINAL WHERE v1 >= now() - INTERVAL 180 DAY SETTINGS max_threads=2, max_final_threads=2, force_data_skipping_indices='v1_index', use_skip_indexes_if_final=0 FORMAT LineAsString; @@ -31,9 +31,9 @@ ExpressionTransform × 2 FilterSortedStreamByRange × 2 Description: filter values in [(999424), +inf) ExpressionTransform × 2 - MergeTreeInOrder × 2 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 AggregatingSortedTransform FilterSortedStreamByRange Description: filter values in [-inf, (999424)) ExpressionTransform - MergeTreeInOrder 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 diff --git a/tests/queries/0_stateless/02783_date_predicate_optimizations.reference b/tests/queries/0_stateless/02783_date_predicate_optimizations.reference index 872a5dd1d7d..5c3cab9bf4a 100644 --- a/tests/queries/0_stateless/02783_date_predicate_optimizations.reference +++ b/tests/queries/0_stateless/02783_date_predicate_optimizations.reference @@ -1,5 +1,7 @@ 2021-12-31 23:00:00 0 2021-12-31 23:00:00 0 +2021-12-31 23:00:00 0 +2021-12-31 23:00:00 0 Date 2 3 @@ -13,6 +15,18 @@ Date 4 1 4 +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 DateTime 2 3 @@ -26,6 +40,18 @@ DateTime 4 1 4 +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 Date32 2 3 @@ -39,6 +65,18 @@ Date32 4 1 4 +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 DateTime64 2 3 @@ -52,3 +90,15 @@ DateTime64 4 1 4 +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 diff --git a/tests/queries/0_stateless/02783_date_predicate_optimizations.sql b/tests/queries/0_stateless/02783_date_predicate_optimizations.sql index 0a2fa6cc93b..4da8cebff1c 100644 --- a/tests/queries/0_stateless/02783_date_predicate_optimizations.sql +++ b/tests/queries/0_stateless/02783_date_predicate_optimizations.sql @@ -11,6 +11,8 @@ INSERT INTO source values ('2021-12-31 23:00:00', 0); SELECT * FROM source WHERE toYYYYMM(ts) = 202112; SELECT * FROM source WHERE toYear(ts) = 2021; +SELECT * FROM source WHERE toYYYYMM(ts) = 202112 SETTINGS allow_experimental_analyzer=1; +SELECT * FROM source WHERE toYear(ts) = 2021 SETTINGS allow_experimental_analyzer=1; DROP TABLE IF EXISTS source; CREATE TABLE source @@ -44,6 +46,18 @@ SELECT count(*) FROM source WHERE toYear(dt) < 2023; SELECT count(*) FROM source WHERE toYear(dt) <= 2023; SELECT count(*) FROM source WHERE toYear(dt) > 2023; SELECT count(*) FROM source WHERE toYear(dt) >= 2023; +SELECT count(*) FROM source WHERE toYYYYMM(dt) = 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt) <> 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt) < 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt) <= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt) > 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt) >= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt) = 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt) <> 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt) < 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt) <= 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt) > 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt) >= 2023 SETTINGS allow_experimental_analyzer=1; SELECT 'DateTime'; SELECT count(*) FROM source WHERE toYYYYMM(ts) = 202312; @@ -58,6 +72,18 @@ SELECT count(*) FROM source WHERE toYear(ts) < 2023; SELECT count(*) FROM source WHERE toYear(ts) <= 2023; SELECT count(*) FROM source WHERE toYear(ts) > 2023; SELECT count(*) FROM source WHERE toYear(ts) >= 2023; +SELECT count(*) FROM source WHERE toYYYYMM(ts) = 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts) <> 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts) < 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts) <= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts) > 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts) >= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts) = 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts) <> 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts) < 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts) <= 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts) > 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts) >= 2023 SETTINGS allow_experimental_analyzer=1; SELECT 'Date32'; SELECT count(*) FROM source WHERE toYYYYMM(dt_32) = 202312; @@ -72,6 +98,18 @@ SELECT count(*) FROM source WHERE toYear(dt_32) < 2023; SELECT count(*) FROM source WHERE toYear(dt_32) <= 2023; SELECT count(*) FROM source WHERE toYear(dt_32) > 2023; SELECT count(*) FROM source WHERE toYear(dt_32) >= 2023; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) = 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) <> 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) < 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) <= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) > 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) >= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt_32) = 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt_32) <> 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt_32) < 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt_32) <= 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt_32) > 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt_32) >= 2023 SETTINGS allow_experimental_analyzer=1; SELECT 'DateTime64'; SELECT count(*) FROM source WHERE toYYYYMM(ts_64) = 202312; @@ -86,4 +124,16 @@ SELECT count(*) FROM source WHERE toYear(ts_64) < 2023; SELECT count(*) FROM source WHERE toYear(ts_64) <= 2023; SELECT count(*) FROM source WHERE toYear(ts_64) > 2023; SELECT count(*) FROM source WHERE toYear(ts_64) >= 2023; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) = 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) <> 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) < 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) <= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) > 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) >= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts_64) = 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts_64) <> 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts_64) < 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts_64) <= 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts_64) > 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts_64) >= 2023 SETTINGS allow_experimental_analyzer=1; DROP TABLE source; diff --git a/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql b/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql index 38e495fdd4e..c67722393ab 100644 --- a/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql +++ b/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql @@ -1,3 +1,5 @@ +SET session_timezone = 'UTC'; + SELECT 'The reference time point is 2023-06-30 23:59:30'; SELECT '───────────────────────────────────────────────'; SELECT 'The argument is before the reference time point'; diff --git a/tests/queries/0_stateless/02784_projections_read_in_order_bug.sql b/tests/queries/0_stateless/02784_projections_read_in_order_bug.sql index 6bf287a3d77..2aa23e2b70d 100644 --- a/tests/queries/0_stateless/02784_projections_read_in_order_bug.sql +++ b/tests/queries/0_stateless/02784_projections_read_in_order_bug.sql @@ -1,3 +1,5 @@ +DROP TABLE IF EXISTS events; + create table events ( `organisation_id` UUID, `session_id` UUID, diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference new file mode 100644 index 00000000000..0fd2f694aeb --- /dev/null +++ b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference @@ -0,0 +1,1122 @@ +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE ((date1 < \'1993-01-01\') OR (date1 >= \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 >= \'1994-01-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 < \'1994-01-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 >= \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1998-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 15, constant_value: \'1998-01-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) OR ((date1 >= \'1994-01-01\') AND (date1 < \'1995-01-01\'))) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + FUNCTION id: 10, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 13, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 14, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 15, nodes: 2 + COLUMN id: 16, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 17, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 18, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + FUNCTION id: 20, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 21, nodes: 2 + COLUMN id: 22, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 23, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 24, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 25, nodes: 2 + COLUMN id: 26, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 27, constant_value: \'1995-01-01\', constant_value_type: String + FUNCTION id: 28, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 29, nodes: 2 + FUNCTION id: 30, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 31, nodes: 2 + COLUMN id: 32, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 33, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 34, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 35, nodes: 2 + COLUMN id: 32, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 36, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT + value1, + toYear(date1) AS year1 +FROM date_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + year1 UInt16 + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + FUNCTION id: 4, function_name: toYear, function_type: ordinary, result_type: UInt16 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: date1, result_type: Date, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 7, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 8, nodes: 2 + FUNCTION id: 9, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 10, nodes: 2 + FUNCTION id: 11, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 12, nodes: 2 + COLUMN id: 13, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 17, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 18, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 19, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + FUNCTION id: 21, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 22, nodes: 2 + COLUMN id: 23, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 25, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 26, nodes: 2 + COLUMN id: 23, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 27, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +PREWHERE (date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\') +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + PREWHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 10, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 13, constant_value: \'1994-01-01\', constant_value_type: String + WHERE + FUNCTION id: 14, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 15, nodes: 2 + FUNCTION id: 16, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 18, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 19, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 20, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 21, nodes: 2 + COLUMN id: 18, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 22, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE ((id >= 1) AND (id <= 3)) AND ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 9, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 10, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + COLUMN id: 8, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 12, constant_value: UInt64_3, constant_value_type: UInt8 + HAVING + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 17, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 18, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 19, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + COLUMN id: 21, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 22, constant_value: \'1994-01-01\', constant_value_type: String + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (toYYYYMM(date1) = 199300) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: toYYYYMM, function_type: ordinary, result_type: UInt32 + ARGUMENTS + LIST id: 9, nodes: 1 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: UInt64_199300, constant_value_type: UInt32 + FUNCTION id: 12, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + FUNCTION id: 14, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 15, nodes: 2 + COLUMN id: 16, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 17, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 18, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 16, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (toYYYYMM(date1) = 199313) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: toYYYYMM, function_type: ordinary, result_type: UInt32 + ARGUMENTS + LIST id: 9, nodes: 1 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: UInt64_199313, constant_value_type: UInt32 + FUNCTION id: 12, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + FUNCTION id: 14, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 15, nodes: 2 + COLUMN id: 16, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 17, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 18, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 16, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-12-01\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1992-03-01\') AND (date1 < \'1992-04-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: \'1992-03-01\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 15, constant_value: \'1992-04-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE ((date1 < \'1992-03-01\') OR (date1 >= \'1992-04-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: \'1992-03-01\', constant_value_type: String + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 15, constant_value: \'1992-04-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 < \'1992-03-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1992-03-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 >= \'1992-04-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1992-04-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 < \'1992-04-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1992-04-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 >= \'1992-03-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1992-03-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1992-03-01\') OR ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\'))) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: \'1992-03-01\', constant_value_type: String + FUNCTION id: 12, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + FUNCTION id: 14, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 15, nodes: 2 + COLUMN id: 16, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 17, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 18, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 21, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 22, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 25, nodes: 2 + COLUMN id: 26, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 27, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 28, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 29, nodes: 2 + COLUMN id: 26, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 30, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM datetime_t +WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.datetime_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: DateTime, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: DateTime, source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM datetime_t +WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.datetime_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: DateTime, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-12-01 00:00:00\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: DateTime, source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date32_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date32_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date32, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date32, source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date32_t +WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date32_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date32, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-12-01\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date32, source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM datetime64_t +WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.datetime64_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: DateTime64(3), source_id: 3 + CONSTANT id: 11, constant_value: \'1993-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: DateTime64(3), source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM datetime64_t +WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.datetime64_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: DateTime64(3), source_id: 3 + CONSTANT id: 11, constant_value: \'1993-12-01 00:00:00\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: DateTime64(3), source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.sql b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.sql new file mode 100644 index 00000000000..9cc8dd74e5d --- /dev/null +++ b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.sql @@ -0,0 +1,75 @@ +DROP TABLE IF EXISTS date_t; +CREATE TABLE date_t (id UInt32, value1 String, date1 Date) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1, toYear(date1) as year1 FROM date_t WHERE year1 = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1, toYear(date1) as year1 FROM date_t WHERE year1 = 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199300 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199300 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199313 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199313 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199203 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199203 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <> 199203 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) <> 199203 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) < 199203 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) < 199203 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) > 199203 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) > 199203 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <= 199203 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) <= 199203 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) >= 199203 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) >= 199203 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYYYYMM(date1) >= 199203 OR toYear(date1) = 1993) AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE (toYYYYMM(date1) >= 199203 OR toYear(date1) = 1993) AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +DROP TABLE date_t; + +DROP TABLE IF EXISTS datetime_t; +CREATE TABLE datetime_t (id UInt32, value1 String, date1 Datetime) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM datetime_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM datetime_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +DROP TABLE datetime_t; + +DROP TABLE IF EXISTS date32_t; +CREATE TABLE date32_t (id UInt32, value1 String, date1 Date32) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date32_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date32_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +DROP TABLE date32_t; + +DROP TABLE IF EXISTS datetime64_t; +CREATE TABLE datetime64_t (id UInt32, value1 String, date1 Datetime64) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM datetime64_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM datetime64_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +DROP TABLE datetime64_t; diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference deleted file mode 100644 index 9235e7e106a..00000000000 --- a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference +++ /dev/null @@ -1,87 +0,0 @@ -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 < \'1993-01-01\') OR (date1 >= \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 >= \'1994-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1994-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 >= \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1998-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) OR ((date1 >= \'1994-01-01\') AND (date1 < \'1995-01-01\'))) AND ((id >= 1) AND (id <= 3)) -SELECT - value1, - toYear(date1) AS year1 -FROM date_t -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -PREWHERE (date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\') -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((id >= 1) AND (id <= 3)) AND ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) -SELECT value1 -FROM date_t -WHERE (toYYYYMM(date1) = 199300) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (toYYYYMM(date1) = 199313) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1992-03-01\') AND (date1 < \'1992-04-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 < \'1992-03-01\') OR (date1 >= \'1992-04-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1992-03-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 >= \'1992-04-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1992-04-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 >= \'1992-03-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1992-03-01\') OR ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\'))) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM datetime_t -WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM datetime_t -WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date32_t -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date32_t -WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM datetime64_t -WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM datetime64_t -WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql deleted file mode 100644 index 266be59b0a3..00000000000 --- a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql +++ /dev/null @@ -1,47 +0,0 @@ -DROP TABLE IF EXISTS date_t; -CREATE TABLE date_t (id UInt32, value1 String, date1 Date) ENGINE ReplacingMergeTree() ORDER BY id; - -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1, toYear(date1) as year1 FROM date_t WHERE year1 = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199300 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199313 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <> 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) < 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) > 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <= 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) >= 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYYYYMM(date1) >= 199203 OR toYear(date1) = 1993) AND id BETWEEN 1 AND 3; -DROP TABLE date_t; - -DROP TABLE IF EXISTS datetime_t; -CREATE TABLE datetime_t (id UInt32, value1 String, date1 Datetime) ENGINE ReplacingMergeTree() ORDER BY id; - -EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; -DROP TABLE datetime_t; - -DROP TABLE IF EXISTS date32_t; -CREATE TABLE date32_t (id UInt32, value1 String, date1 Date32) ENGINE ReplacingMergeTree() ORDER BY id; - -EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; -DROP TABLE date32_t; - -DROP TABLE IF EXISTS datetime64_t; -CREATE TABLE datetime64_t (id UInt32, value1 String, date1 Datetime64) ENGINE ReplacingMergeTree() ORDER BY id; - -EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; -DROP TABLE datetime64_t; diff --git a/tests/queries/0_stateless/02787_transform_null.reference b/tests/queries/0_stateless/02787_transform_null.reference index a650dbbd173..a84dd83d712 100644 --- a/tests/queries/0_stateless/02787_transform_null.reference +++ b/tests/queries/0_stateless/02787_transform_null.reference @@ -5,5 +5,5 @@ ONE a a \N 0 \N 0 \N -1 1 1 \N 1 1 +1 1 1 1 1 1 a \N 3 3 3 3 diff --git a/tests/queries/0_stateless/02788_current_schemas_function.reference b/tests/queries/0_stateless/02788_current_schemas_function.reference new file mode 100644 index 00000000000..999c306b3ac --- /dev/null +++ b/tests/queries/0_stateless/02788_current_schemas_function.reference @@ -0,0 +1,2 @@ +['default'] +['default'] diff --git a/tests/queries/0_stateless/02788_current_schemas_function.sql b/tests/queries/0_stateless/02788_current_schemas_function.sql new file mode 100644 index 00000000000..408b21c0e34 --- /dev/null +++ b/tests/queries/0_stateless/02788_current_schemas_function.sql @@ -0,0 +1,4 @@ +SELECT current_schemas(true) AS result; +SELECT current_schemas(false) AS result; +SELECT current_schemas(1); -- { serverError 43 } +SELECT current_schemas(); -- { serverError 42 } \ No newline at end of file diff --git a/tests/queries/0_stateless/02790_async_queries_in_query_log.reference b/tests/queries/0_stateless/02790_async_queries_in_query_log.reference index e202a38f068..aa18817f4e6 100644 --- a/tests/queries/0_stateless/02790_async_queries_in_query_log.reference +++ b/tests/queries/0_stateless/02790_async_queries_in_query_log.reference @@ -36,6 +36,14 @@ exception_code: 0 system.query_views_log +system.part_log +Row 1: +────── +database: default +table: async_insert_landing +partition_id: all +rows: 4 + system.query_log Row 1: ────── @@ -85,6 +93,14 @@ written_bytes: 0 status: QueryFinish exception_code: 0 +system.part_log +Row 1: +────── +database: default +table: async_insert_landing +partition_id: all +rows: 3 + system.query_log Row 1: ────── @@ -133,3 +149,11 @@ written_rows: 0 written_bytes: 0 status: Exc*****onWhileProcessing exception_code: 395 + +system.part_log +Row 1: +────── +database: default +table: async_insert_landing +partition_id: all +rows: 3 diff --git a/tests/queries/0_stateless/02790_async_queries_in_query_log.sh b/tests/queries/0_stateless/02790_async_queries_in_query_log.sh index d1c8fe122cb..1ff97031acb 100755 --- a/tests/queries/0_stateless/02790_async_queries_in_query_log.sh +++ b/tests/queries/0_stateless/02790_async_queries_in_query_log.sh @@ -28,7 +28,7 @@ function print_flush_query_logs() FROM system.query_log WHERE event_date >= yesterday() - AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE query_id = '$1') + AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE event_date >= yesterday() AND query_id = '$1') -- AND current_database = currentDatabase() -- Just to silence style check: this is not ok for this test since the query uses default values ORDER BY type DESC FORMAT Vertical" @@ -50,7 +50,21 @@ function print_flush_query_logs() FROM system.query_views_log WHERE event_date >= yesterday() - AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE query_id = '$1') + AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE event_date >= yesterday() AND query_id = '$1') + FORMAT Vertical" + + echo "" + echo "system.part_log" + ${CLICKHOUSE_CLIENT} -q " + SELECT + database, + table, + partition_id, + rows + FROM system.part_log + WHERE + event_date >= yesterday() + AND query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE event_date >= yesterday() AND query_id = '$1') FORMAT Vertical" } diff --git a/tests/queries/0_stateless/02790_client_max_opening_fd.reference b/tests/queries/0_stateless/02790_client_max_opening_fd.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02790_client_max_opening_fd.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02790_client_max_opening_fd.sh b/tests/queries/0_stateless/02790_client_max_opening_fd.sh new file mode 100755 index 00000000000..ecc05d32050 --- /dev/null +++ b/tests/queries/0_stateless/02790_client_max_opening_fd.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Ensure that clickhouse-client does not open a large number of files. +ulimit -n 1024 +${CLICKHOUSE_CLIENT} --query "SELECT 1" diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.reference b/tests/queries/0_stateless/02790_sql_standard_fetch.reference new file mode 100644 index 00000000000..ef578b526da --- /dev/null +++ b/tests/queries/0_stateless/02790_sql_standard_fetch.reference @@ -0,0 +1,36 @@ +┌─name───────────┬─department─┬─salary─┐ +│ Frank │ it │ 120 │ +│ Henry or Irene │ it │ 104 │ +│ Henry or Irene │ it │ 104 │ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +└────────────────┴────────────┴────────┘ +┌─name───────────┬─department─┬─salary─┐ +│ Frank │ it │ 120 │ +│ Henry or Irene │ it │ 104 │ +│ Henry or Irene │ it │ 104 │ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +└────────────────┴────────────┴────────┘ +┌─name───────────┬─department─┬─salary─┐ +│ Frank │ it │ 120 │ +│ Henry or Irene │ it │ 104 │ +│ Henry or Irene │ it │ 104 │ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +│ Dave or Cindy │ sales │ 96 │ +└────────────────┴────────────┴────────┘ +┌─name──────────┬─department─┬─salary─┐ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +│ Dave or Cindy │ sales │ 96 │ +│ Grace │ it │ 90 │ +│ Emma │ it │ 84 │ +└───────────────┴────────────┴────────┘ +┌─name──────────┬─department─┬─salary─┐ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +│ Dave or Cindy │ sales │ 96 │ +│ Grace │ it │ 90 │ +│ Emma │ it │ 84 │ +└───────────────┴────────────┴────────┘ diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.sql b/tests/queries/0_stateless/02790_sql_standard_fetch.sql new file mode 100644 index 00000000000..638cc66682d --- /dev/null +++ b/tests/queries/0_stateless/02790_sql_standard_fetch.sql @@ -0,0 +1,34 @@ +-- https://antonz.org/sql-fetch/ + +CREATE TEMPORARY TABLE employees (id UInt64, name String, department String, salary UInt64); +INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104), (25, 'Frank', 'it', 120), (31, 'Cindy', 'sales', 96), (33, 'Alice', 'sales', 100), (32, 'Dave', 'sales', 96), (22, 'Grace', 'it', 90), (21, 'Emma', 'it', 84); + +-- Determinism +SET max_threads = 1, parallelize_output_from_storages = 0; + +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +order by salary desc +limit 5 +format PrettyCompactNoEscapes; + +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +order by salary desc +fetch first 5 rows only +format PrettyCompactNoEscapes; + +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +order by salary desc +fetch first 5 rows with ties +format PrettyCompactNoEscapes; + +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +order by salary desc +offset 3 rows +fetch next 5 rows only +format PrettyCompactNoEscapes; + +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +order by salary desc +offset 3 rows +fetch first 5 rows only +format PrettyCompactNoEscapes; diff --git a/tests/queries/0_stateless/02792_alter_table_modify_comment.reference b/tests/queries/0_stateless/02792_alter_table_modify_comment.reference new file mode 100644 index 00000000000..b148993285c --- /dev/null +++ b/tests/queries/0_stateless/02792_alter_table_modify_comment.reference @@ -0,0 +1,20 @@ +Hello +World +Hello +World +Hello +World +Hello +World +Hello +World + +World + +World + +World + +World + +World diff --git a/tests/queries/0_stateless/02792_alter_table_modify_comment.sql b/tests/queries/0_stateless/02792_alter_table_modify_comment.sql new file mode 100644 index 00000000000..ba30833905d --- /dev/null +++ b/tests/queries/0_stateless/02792_alter_table_modify_comment.sql @@ -0,0 +1,72 @@ +-- Tags: no-replicated-database +-- Tag no-replicated-database: Unsupported type of ALTER query + +DROP TABLE IF EXISTS t; + +# Memory, MergeTree, and ReplicatedMergeTree + +CREATE TABLE t (x UInt8) ENGINE = Memory COMMENT 'Hello'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t; + +CREATE TABLE t (x UInt8) ENGINE = MergeTree ORDER BY () COMMENT 'Hello'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t; + +# The case when there are many operations in one ALTER + +CREATE TABLE t (x UInt8) ENGINE = MergeTree ORDER BY () COMMENT 'Hello'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World', MODIFY COLUMN x UInt16; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t; + +# Note that the table comment is not replicated. We can implement it later. + +CREATE TABLE t (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_comment_table1/t', '1') ORDER BY () COMMENT 'Hello'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t SYNC; + +CREATE TABLE t (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_comment_table2/t', '1') ORDER BY () COMMENT 'Hello'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World', MODIFY COLUMN x UInt16; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t SYNC; + +# The cases when there is no comment on creation + +CREATE TABLE t (x UInt8) ENGINE = Memory; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t; + +CREATE TABLE t (x UInt8) ENGINE = MergeTree ORDER BY (); +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t; + +CREATE TABLE t (x UInt8) ENGINE = MergeTree ORDER BY (); +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World', MODIFY COLUMN x UInt16; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t; + +CREATE TABLE t (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_comment_table3/t', '1') ORDER BY (); +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t SYNC; + +CREATE TABLE t (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_comment_table4/t', '1') ORDER BY (); +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World', MODIFY COLUMN x UInt16; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t SYNC; diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.reference b/tests/queries/0_stateless/02792_drop_projection_lwd.reference new file mode 100644 index 00000000000..3ad5abd03ae --- /dev/null +++ b/tests/queries/0_stateless/02792_drop_projection_lwd.reference @@ -0,0 +1 @@ +99 diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.sql b/tests/queries/0_stateless/02792_drop_projection_lwd.sql new file mode 100644 index 00000000000..a1d8a9c90f3 --- /dev/null +++ b/tests/queries/0_stateless/02792_drop_projection_lwd.sql @@ -0,0 +1,20 @@ +SET mutations_sync = 2; + +DROP TABLE IF EXISTS t_projections_lwd; + +CREATE TABLE t_projections_lwd (a UInt32, b UInt32, PROJECTION p (SELECT * ORDER BY b)) ENGINE = MergeTree ORDER BY a; + +INSERT INTO t_projections_lwd SELECT number, number FROM numbers(100); + +-- LWD does not work, as expected +DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError BAD_ARGUMENTS } +KILL MUTATION WHERE database = currentDatabase() AND table = 't_projections_lwd' SYNC FORMAT Null; + +-- drop projection +ALTER TABLE t_projections_lwd DROP projection p; + +DELETE FROM t_projections_lwd WHERE a = 2; + +SELECT count() FROM t_projections_lwd; + +DROP TABLE t_projections_lwd; diff --git a/tests/queries/0_stateless/02796_projection_date_filter_on_view.sql b/tests/queries/0_stateless/02796_projection_date_filter_on_view.sql index 9d9d7a3abd5..cb26a6bce4f 100644 --- a/tests/queries/0_stateless/02796_projection_date_filter_on_view.sql +++ b/tests/queries/0_stateless/02796_projection_date_filter_on_view.sql @@ -13,7 +13,7 @@ CREATE TABLE fx_1m ( ) ENGINE = MergeTree() PARTITION BY toYear(dt_close) -ORDER BY (symbol, dt_close); +ORDER BY (symbol, dt_close) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -- add projection ALTER TABLE fx_1m diff --git a/tests/queries/0_stateless/02797_aggregator_huge_mem_usage_bug.reference b/tests/queries/0_stateless/02797_aggregator_huge_mem_usage_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02797_aggregator_huge_mem_usage_bug.sql b/tests/queries/0_stateless/02797_aggregator_huge_mem_usage_bug.sql new file mode 100644 index 00000000000..e204d968382 --- /dev/null +++ b/tests/queries/0_stateless/02797_aggregator_huge_mem_usage_bug.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS v; + +create view v (s LowCardinality(String), n UInt8) as select 'test' as s, toUInt8(number) as n from numbers(10000000); + +-- this is what allows mem usage to go really high +set max_block_size=10000000000; + +set max_memory_usage = '1Gi'; + +select s, sum(n) from v group by s format Null; + +DROP TABLE v; diff --git a/tests/queries/0_stateless/02797_read_subcolumns_from_files.reference b/tests/queries/0_stateless/02797_read_subcolumns_from_files.reference new file mode 100644 index 00000000000..45ea5a7a29f --- /dev/null +++ b/tests/queries/0_stateless/02797_read_subcolumns_from_files.reference @@ -0,0 +1,4 @@ +2 (1,2) 3 +2 (1,2) 3 +0 (0,0) 0 +42 (42,42) 42 diff --git a/tests/queries/0_stateless/02797_read_subcolumns_from_files.sh b/tests/queries/0_stateless/02797_read_subcolumns_from_files.sh new file mode 100755 index 00000000000..767acf68553 --- /dev/null +++ b/tests/queries/0_stateless/02797_read_subcolumns_from_files.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME + +$CLICKHOUSE_LOCAL -q "select ((1, 2), 3)::Tuple(b Tuple(c UInt32, d UInt32), e UInt32) as a format TSV" > $DATA_FILE +$CLICKHOUSE_LOCAL -q "select a.b.d, a.b, a.e from file('$DATA_FILE', TSV, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + +$CLICKHOUSE_LOCAL -q "select ((1, 2), 3)::Tuple(b Tuple(c UInt32, d UInt32), e UInt32) as a format JSONEachRow" > $DATA_FILE +$CLICKHOUSE_LOCAL -q "select a.b.d, a.b, a.e from file('$DATA_FILE', JSONEachRow, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" +$CLICKHOUSE_LOCAL -q "select x.b.d, x.b, x.e from file('$DATA_FILE', JSONEachRow, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" +$CLICKHOUSE_LOCAL -q "select x.b.d, x.b, x.e from file('$DATA_FILE', JSONEachRow, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + +rm $DATA_FILE + diff --git a/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference index 6fc36a0ba01..ffb61b54f9a 100644 --- a/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference +++ b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference @@ -1,11 +1,11 @@ - explain + explain - (Expression) - ExpressionTransform - (Aggregating) - FinalizeAggregatedTransform - AggregatingInOrderTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeInOrder 0 → 1 + (Expression) + ExpressionTransform + (Aggregating) + FinalizeAggregatedTransform + AggregatingInOrderTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 diff --git a/tests/queries/0_stateless/02798_substring_index.reference b/tests/queries/0_stateless/02798_substring_index.reference new file mode 100644 index 00000000000..a3084509c12 --- /dev/null +++ b/tests/queries/0_stateless/02798_substring_index.reference @@ -0,0 +1,155 @@ +-- { echoOn } +select substringIndex('www.clickhouse.com', '.', -4); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', -3); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', -2); +clickhouse.com +select substringIndex('www.clickhouse.com', '.', -1); +com +select substringIndex('www.clickhouse.com', '.', 0); + +select substringIndex('www.clickhouse.com', '.', 1); +www +select substringIndex('www.clickhouse.com', '.', 2); +www.clickhouse +select substringIndex('www.clickhouse.com', '.', 3); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', 4); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', -4); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', -3); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', -2); +clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', -1); +com +select substringIndex(materialize('www.clickhouse.com'), '.', 0); + +select substringIndex(materialize('www.clickhouse.com'), '.', 1); +www +select substringIndex(materialize('www.clickhouse.com'), '.', 2); +www.clickhouse +select substringIndex(materialize('www.clickhouse.com'), '.', 3); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', 4); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-4)); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-3)); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-2)); +clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-1)); +com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(0)); + +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(1)); +www +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(2)); +www.clickhouse +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(3)); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(4)); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(-4)); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(-3)); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(-2)); +clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(-1)); +com +select substringIndex('www.clickhouse.com', '.', materialize(0)); + +select substringIndex('www.clickhouse.com', '.', materialize(1)); +www +select substringIndex('www.clickhouse.com', '.', materialize(2)); +www.clickhouse +select substringIndex('www.clickhouse.com', '.', materialize(3)); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(4)); +www.clickhouse.com +select SUBSTRING_INDEX('www.clickhouse.com', '.', 2); +www.clickhouse +select substringIndex('www.clickhouse.com', '..', 2); -- { serverError BAD_ARGUMENTS } +select substringIndex('www.clickhouse.com', '', 2); -- { serverError BAD_ARGUMENTS } +select substringIndex('www.clickhouse.com', materialize('.'), 2); -- { serverError ILLEGAL_COLUMN } +select substringIndex('www.clickhouse.com', '.', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select substringIndexUTF8('富强,民主,文明', ',', -4); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', -3); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', -2); +民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', -1); +文明 +select substringIndexUTF8('富强,民主,文明', ',', 0); + +select substringIndexUTF8('富强,民主,文明', ',', 1); +富强 +select substringIndexUTF8('富强,民主,文明', ',', 2); +富强,民主 +select substringIndexUTF8('富强,民主,文明', ',', 3); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', 4); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -4); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -3); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -2); +民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -1); +文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 0); + +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 1); +富强 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 2); +富强,民主 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 3); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 4); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(-4)); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(-3)); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(-2)); +民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(-1)); +文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(0)); + +select substringIndexUTF8('富强,民主,文明', ',', materialize(1)); +富强 +select substringIndexUTF8('富强,民主,文明', ',', materialize(2)); +富强,民主 +select substringIndexUTF8('富强,民主,文明', ',', materialize(3)); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(4)); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-4)); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-3)); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-2)); +民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-1)); +文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(0)); + +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(1)); +富强 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(2)); +富强,民主 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(3)); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(4)); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',,', 2); -- { serverError BAD_ARGUMENTS } +select substringIndexUTF8('富强,民主,文明', '', 2); -- { serverError BAD_ARGUMENTS } +select substringIndexUTF8('富强,民主,文明', materialize(','), 2); -- { serverError ILLEGAL_COLUMN } +select substringIndexUTF8('富强,民主,文明', ',', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02798_substring_index.sql b/tests/queries/0_stateless/02798_substring_index.sql new file mode 100644 index 00000000000..520775e8970 --- /dev/null +++ b/tests/queries/0_stateless/02798_substring_index.sql @@ -0,0 +1,93 @@ +-- { echoOn } +select substringIndex('www.clickhouse.com', '.', -4); +select substringIndex('www.clickhouse.com', '.', -3); +select substringIndex('www.clickhouse.com', '.', -2); +select substringIndex('www.clickhouse.com', '.', -1); +select substringIndex('www.clickhouse.com', '.', 0); +select substringIndex('www.clickhouse.com', '.', 1); +select substringIndex('www.clickhouse.com', '.', 2); +select substringIndex('www.clickhouse.com', '.', 3); +select substringIndex('www.clickhouse.com', '.', 4); + +select substringIndex(materialize('www.clickhouse.com'), '.', -4); +select substringIndex(materialize('www.clickhouse.com'), '.', -3); +select substringIndex(materialize('www.clickhouse.com'), '.', -2); +select substringIndex(materialize('www.clickhouse.com'), '.', -1); +select substringIndex(materialize('www.clickhouse.com'), '.', 0); +select substringIndex(materialize('www.clickhouse.com'), '.', 1); +select substringIndex(materialize('www.clickhouse.com'), '.', 2); +select substringIndex(materialize('www.clickhouse.com'), '.', 3); +select substringIndex(materialize('www.clickhouse.com'), '.', 4); + +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-4)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-3)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-2)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-1)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(0)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(1)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(2)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(3)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(4)); + +select substringIndex('www.clickhouse.com', '.', materialize(-4)); +select substringIndex('www.clickhouse.com', '.', materialize(-3)); +select substringIndex('www.clickhouse.com', '.', materialize(-2)); +select substringIndex('www.clickhouse.com', '.', materialize(-1)); +select substringIndex('www.clickhouse.com', '.', materialize(0)); +select substringIndex('www.clickhouse.com', '.', materialize(1)); +select substringIndex('www.clickhouse.com', '.', materialize(2)); +select substringIndex('www.clickhouse.com', '.', materialize(3)); +select substringIndex('www.clickhouse.com', '.', materialize(4)); + +select SUBSTRING_INDEX('www.clickhouse.com', '.', 2); + +select substringIndex('www.clickhouse.com', '..', 2); -- { serverError BAD_ARGUMENTS } +select substringIndex('www.clickhouse.com', '', 2); -- { serverError BAD_ARGUMENTS } +select substringIndex('www.clickhouse.com', materialize('.'), 2); -- { serverError ILLEGAL_COLUMN } +select substringIndex('www.clickhouse.com', '.', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select substringIndexUTF8('富强,民主,文明', ',', -4); +select substringIndexUTF8('富强,民主,文明', ',', -3); +select substringIndexUTF8('富强,民主,文明', ',', -2); +select substringIndexUTF8('富强,民主,文明', ',', -1); +select substringIndexUTF8('富强,民主,文明', ',', 0); +select substringIndexUTF8('富强,民主,文明', ',', 1); +select substringIndexUTF8('富强,民主,文明', ',', 2); +select substringIndexUTF8('富强,民主,文明', ',', 3); +select substringIndexUTF8('富强,民主,文明', ',', 4); + +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -4); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -3); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -2); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -1); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 0); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 1); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 2); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 3); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 4); + +select substringIndexUTF8('富强,民主,文明', ',', materialize(-4)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(-3)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(-2)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(-1)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(0)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(1)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(2)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(3)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(4)); + +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-4)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-3)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-2)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-1)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(0)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(1)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(2)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(3)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(4)); + +select substringIndexUTF8('富强,民主,文明', ',,', 2); -- { serverError BAD_ARGUMENTS } +select substringIndexUTF8('富强,民主,文明', '', 2); -- { serverError BAD_ARGUMENTS } +select substringIndexUTF8('富强,民主,文明', materialize(','), 2); -- { serverError ILLEGAL_COLUMN } +select substringIndexUTF8('富强,民主,文明', ',', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- { echoOff } diff --git a/tests/queries/0_stateless/02801_backup_native_copy.sh b/tests/queries/0_stateless/02801_backup_native_copy.sh index 31a7cc3b410..b8ee97a7c7d 100755 --- a/tests/queries/0_stateless/02801_backup_native_copy.sh +++ b/tests/queries/0_stateless/02801_backup_native_copy.sh @@ -29,14 +29,14 @@ $CLICKHOUSE_CLIENT -nm -q " " query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS allow_s3_native_copy=true" +$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS allow_s3_native_copy=true" $CLICKHOUSE_CLIENT -nm -q " SYSTEM FLUSH LOGS; SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' " query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS allow_s3_native_copy=false" +$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS allow_s3_native_copy=false" $CLICKHOUSE_CLIENT -nm -q " SYSTEM FLUSH LOGS; SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' diff --git a/tests/queries/0_stateless/02803_backup_tmp_files.reference b/tests/queries/0_stateless/02803_backup_tmp_files.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02803_backup_tmp_files.sh b/tests/queries/0_stateless/02803_backup_tmp_files.sh new file mode 100755 index 00000000000..d86beae4923 --- /dev/null +++ b/tests/queries/0_stateless/02803_backup_tmp_files.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag: no-fasttest - requires S3 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data; + create table data (key Int) engine=MergeTree() order by tuple() settings disk='s3_disk'; + insert into data select * from numbers(10); +" + +$CLICKHOUSE_CLIENT --format Null -q "BACKUP TABLE data TO S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data')" +$CLICKHOUSE_CLIENT --format Null -q "RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data')" diff --git a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh index a43cd6deb9e..333bc1bc25d 100755 --- a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh +++ b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh @@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT -nm --query """ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk_s3_disk(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); +SETTINGS disk = disk(name = 's3_disk', type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); """ 2>&1 | grep -q "Disk with name \`s3_disk\` already exist" && echo 'OK' || echo 'FAIL' disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" @@ -25,7 +25,7 @@ $CLICKHOUSE_CLIENT -nm --query """ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk_$disk_name(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); +SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); """ $CLICKHOUSE_CLIENT -nm --query """ diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference new file mode 100644 index 00000000000..d2c8c83d997 --- /dev/null +++ b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference @@ -0,0 +1,5 @@ +OK +1 +0 +1 +0 diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh new file mode 100755 index 00000000000..b44f9e50513 --- /dev/null +++ b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings + +# set -x + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" +$CLICKHOUSE_CLIENT -nm --query """ +DROP TABLE IF EXISTS test; +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY tuple() +SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); + +INSERT INTO test SELECT 1, 'test'; +""" + +query_id=$RANDOM + +$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" + +$CLICKHOUSE_CLIENT -nm --query """ +SYSTEM DROP FILESYSTEM CACHE '$disk_name' KEY kek; +""" 2>&1 | grep -q "Invalid cache key hex: kek" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} -q " system flush logs" + +key=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; +""") + +offset=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT offset FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; +""") + +$CLICKHOUSE_CLIENT -nm --query """ +SELECT count() FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; +""" + +$CLICKHOUSE_CLIENT -nm --query """ +SYSTEM DROP FILESYSTEM CACHE '$disk_name' KEY $key OFFSET $offset; +""" + +$CLICKHOUSE_CLIENT -nm --query """ +SELECT count() FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; +""" + +query_id=$RANDOM$RANDOM + +$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" + +${CLICKHOUSE_CLIENT} -q " system flush logs" + +key=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; +""") + +$CLICKHOUSE_CLIENT -nm --query """ +SELECT count() FROM system.filesystem_cache WHERE key = '$key'; +""" + +$CLICKHOUSE_CLIENT -nm --query """ +SYSTEM DROP FILESYSTEM CACHE '$disk_name' KEY $key +""" + +$CLICKHOUSE_CLIENT -nm --query """ +SELECT count() FROM system.filesystem_cache WHERE key = '$key'; +""" diff --git a/tests/queries/0_stateless/02810_system_sync_filesystem_cache.reference b/tests/queries/0_stateless/02810_system_sync_filesystem_cache.reference new file mode 100644 index 00000000000..7614df8ec46 --- /dev/null +++ b/tests/queries/0_stateless/02810_system_sync_filesystem_cache.reference @@ -0,0 +1,3 @@ +ok +ok +ok diff --git a/tests/queries/0_stateless/02810_system_sync_filesystem_cache.sh b/tests/queries/0_stateless/02810_system_sync_filesystem_cache.sh new file mode 100755 index 00000000000..c88ba4d5a74 --- /dev/null +++ b/tests/queries/0_stateless/02810_system_sync_filesystem_cache.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings + +# set -x + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -nm --query """ +DROP TABLE IF EXISTS test; + +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY tuple() +SETTINGS disk = disk(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, delayed_cleanup_interval_ms = 10000000, disk = s3_disk), min_bytes_for_wide_part = 10485760; + +INSERT INTO test SELECT 1, 'test'; +""" + +query_id=$RANDOM + +$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" + +${CLICKHOUSE_CLIENT} -q "system flush logs" + +key=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; +""") + +offset=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT offset FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; +""") + +path=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT cache_path FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; +""") + +rm $path + +$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" 2>&1 | grep -F -e "No such file or directory" > /dev/null && echo "ok" || echo "fail" + +CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=fatal/g') + +$CLICKHOUSE_CLIENT --query "SYSTEM SYNC FILESYSTEM CACHE" 2>&1 | grep -q "$key" && echo 'ok' || echo 'fail' + +$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" + +key=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; +""") + +offset=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT offset FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; +""") + +path=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT cache_path FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; +""") + +echo -n 'fff' > $path + +#cat $path + +$CLICKHOUSE_CLIENT --query "SYSTEM SYNC FILESYSTEM CACHE" 2>&1 | grep -q "$key" && echo 'ok' || echo 'fail' + +$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" + +$CLICKHOUSE_CLIENT --query "SYSTEM SYNC FILESYSTEM CACHE" diff --git a/tests/queries/0_stateless/02811_ip_dict_attribute.reference b/tests/queries/0_stateless/02811_ip_dict_attribute.reference new file mode 100644 index 00000000000..a36cf2e3d8a --- /dev/null +++ b/tests/queries/0_stateless/02811_ip_dict_attribute.reference @@ -0,0 +1,2 @@ +('::2.2.2.2','2.2.2.2') +('::1.1.1.1','1.1.1.1') diff --git a/tests/queries/0_stateless/02811_ip_dict_attribute.sql b/tests/queries/0_stateless/02811_ip_dict_attribute.sql new file mode 100644 index 00000000000..0ffff6e4a53 --- /dev/null +++ b/tests/queries/0_stateless/02811_ip_dict_attribute.sql @@ -0,0 +1,13 @@ +CREATE TABLE src ( id UInt64, ip4 IPv4, ip6 IPv6 ) Engine=Memory AS + SELECT * FROM VALUES( (1, '1.1.1.1', '::1.1.1.1'), (2, '2.2.2.2', '::2.2.2.2') ); + +CREATE DICTIONARY dict ( id UInt64, ip4 IPv4, ip6 IPv6 ) + PRIMARY KEY id + LAYOUT(HASHED()) + SOURCE (CLICKHOUSE ( table src)) + lifetime ( 10); + +SELECT dictGet('dict', ('ip6', 'ip4'), arrayJoin([2,1])); + +DROP DICTIONARY dict; +DROP TABLE src; diff --git a/tests/queries/0_stateless/02812_csv_date_time_with_comma.reference b/tests/queries/0_stateless/02812_csv_date_time_with_comma.reference new file mode 100644 index 00000000000..f569df13dc1 --- /dev/null +++ b/tests/queries/0_stateless/02812_csv_date_time_with_comma.reference @@ -0,0 +1,2 @@ +2000-01-01 00:00:00 abc +2000-01-01 00:00:00.000 abc diff --git a/tests/queries/0_stateless/02812_csv_date_time_with_comma.sql b/tests/queries/0_stateless/02812_csv_date_time_with_comma.sql new file mode 100644 index 00000000000..ecd3cff6ad0 --- /dev/null +++ b/tests/queries/0_stateless/02812_csv_date_time_with_comma.sql @@ -0,0 +1,3 @@ +select * from format(CSV, 'c1 DateTime, c2 String', '01-01-2000,abc') settings date_time_input_format='best_effort'; +select * from format(CSV, 'c1 DateTime64(3), c2 String', '01-01-2000,abc') settings date_time_input_format='best_effort'; + diff --git a/tests/queries/0_stateless/02812_from_to_utc_timestamp.reference b/tests/queries/0_stateless/02812_from_to_utc_timestamp.reference new file mode 100644 index 00000000000..91c52ebb7c3 --- /dev/null +++ b/tests/queries/0_stateless/02812_from_to_utc_timestamp.reference @@ -0,0 +1,3 @@ +1 2023-03-16 12:22:33 2023-03-16 10:22:33.000 2023-03-15 16:00:00 2023-03-16 19:22:33.000 +2 2023-03-16 12:22:33 2023-03-16 10:22:33.000 2023-03-16 03:22:33 2023-03-16 08:00:00.000 +3 2023-03-16 12:22:33 2023-03-16 10:22:33.000 2023-03-16 03:22:33 2023-03-16 19:22:33.123 diff --git a/tests/queries/0_stateless/02812_from_to_utc_timestamp.sh b/tests/queries/0_stateless/02812_from_to_utc_timestamp.sh new file mode 100755 index 00000000000..59a6399ee2f --- /dev/null +++ b/tests/queries/0_stateless/02812_from_to_utc_timestamp.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# NOTE: this sh wrapper is required because of shell_config + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" +$CLICKHOUSE_CLIENT -q "create table test_tbl (x UInt32, y DateTime, z DateTime64) engine=MergeTree order by x" +${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tbl values(1, '2023-03-16', '2023-03-16 11:22:33')" +${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tbl values(2, '2023-03-16 11:22:33', '2023-03-16')" +${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tbl values(3, '2023-03-16 11:22:33', '2023-03-16 11:22:33.123456')" +$CLICKHOUSE_CLIENT -q "select x, to_utc_timestamp(toDateTime('2023-03-16 11:22:33'), 'Etc/GMT+1'), from_utc_timestamp(toDateTime64('2023-03-16 11:22:33', 3), 'Etc/GMT+1'), to_utc_timestamp(y, 'Asia/Shanghai'), from_utc_timestamp(z, 'Asia/Shanghai') from test_tbl order by x" +$CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file diff --git a/tests/queries/0_stateless/02812_pointwise_array_operations.reference b/tests/queries/0_stateless/02812_pointwise_array_operations.reference new file mode 100644 index 00000000000..3b1b973fd3f --- /dev/null +++ b/tests/queries/0_stateless/02812_pointwise_array_operations.reference @@ -0,0 +1,23 @@ +[2,5] +[2,6] +[4.5,5,12,10.1] +[(11.1,5.4),(6,21)] +[[13,2],[3]] +[2,2] +[2,3] +[2,4] +[2,5] +[2,6] +[2,2] +[2,3] +[2,4] +[2,5] +[2,6] +[0,0,0] +[(NULL,100000000000000000000),(NULL,1048833)] +[2,2] +[2,3] +[2,4] +[2,5] +[2,6] +[11,1,-2] diff --git a/tests/queries/0_stateless/02812_pointwise_array_operations.sql b/tests/queries/0_stateless/02812_pointwise_array_operations.sql new file mode 100644 index 00000000000..e28c4bda347 --- /dev/null +++ b/tests/queries/0_stateless/02812_pointwise_array_operations.sql @@ -0,0 +1,18 @@ +SELECT (materialize([1,1]) + materialize([1,4])); +SELECT ([1,2] + [1,4]); +SELECT ([2.5, 1, 3, 10.1] + [2, 4, 9, 0]); +SELECT ([(1,3), (2,9)] + [(10.1, 2.4), (4,12)]); +SELECT ([[1,1],[2]]+[[12,1],[1]]); +SELECT ([1,2]+[1,number]) from numbers(5); +SELECT ([1,2::UInt64]+[1,number]) from numbers(5); +SELECT ([materialize(1),materialize(2),materialize(3)]-[1,2,3]); +SELECT [(NULL, 256), (NULL, 256)] + [(1., 100000000000000000000.), (NULL, 1048577)]; +SELECT ([1,2::UInt64]+[1,number]) from numbers(5); +CREATE TABLE my_table (values Array(Int32)) ENGINE = MergeTree() ORDER BY values; +INSERT INTO my_table (values) VALUES ([12, 3, 1]); +SELECT values - [1,2,3] FROM my_table WHERE arrayExists(x -> x > 5, values); +SELECT ([12,13] % [5,6]); -- { serverError 43 } +SELECT ([2,3,4]-[1,-2,10,29]); -- { serverError 190 } +CREATE TABLE a ( x Array(UInt64), y Array(UInt64)) ENGINE = Memory; +INSERT INTO a VALUES ([2,3],[4,5]),([1,2,3], [4,5]),([6,7],[8,9,10]); +SELECT x, y, x+y FROM a; -- { serverError 190 } diff --git a/tests/queries/0_stateless/02813_any_value.reference b/tests/queries/0_stateless/02813_any_value.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/02813_any_value.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/02813_any_value.sql b/tests/queries/0_stateless/02813_any_value.sql new file mode 100644 index 00000000000..6bd2b66fde1 --- /dev/null +++ b/tests/queries/0_stateless/02813_any_value.sql @@ -0,0 +1,3 @@ +SET max_block_size = 10, max_threads = 1; +select any_value(number) from numbers(10); +select aNy_VaLue(number) from numbers(10); diff --git a/tests/queries/0_stateless/02813_avro_union_with_one_type.reference b/tests/queries/0_stateless/02813_avro_union_with_one_type.reference new file mode 100644 index 00000000000..c65bed48055 --- /dev/null +++ b/tests/queries/0_stateless/02813_avro_union_with_one_type.reference @@ -0,0 +1,5 @@ +name String +favorite_number Int32 +favorite_color String +Alyssa 256 yellow +Ben 7 red diff --git a/tests/queries/0_stateless/02813_avro_union_with_one_type.sh b/tests/queries/0_stateless/02813_avro_union_with_one_type.sh new file mode 100755 index 00000000000..b58dc9126da --- /dev/null +++ b/tests/queries/0_stateless/02813_avro_union_with_one_type.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_DIR=$CUR_DIR/data_avro + +$CLICKHOUSE_LOCAL -q "desc file('$DATA_DIR/union_one_type.avro')" +$CLICKHOUSE_LOCAL -q "select * from file('$DATA_DIR/union_one_type.avro')" diff --git a/tests/queries/0_stateless/02813_float_parsing.reference b/tests/queries/0_stateless/02813_float_parsing.reference new file mode 100644 index 00000000000..c83331e0138 --- /dev/null +++ b/tests/queries/0_stateless/02813_float_parsing.reference @@ -0,0 +1,2 @@ +1.7090999999999998 15008753.000000002 6.000000000000001e-9 6.000000000000002e-9 1.7091 15008752 5.9999996e-9 5.9999996e-9 +1.7091 15008753 6e-9 6.000000000000001e-9 1.7091 15008753 6e-9 6e-9 diff --git a/tests/queries/0_stateless/02813_float_parsing.sql b/tests/queries/0_stateless/02813_float_parsing.sql new file mode 100644 index 00000000000..ba57b87f191 --- /dev/null +++ b/tests/queries/0_stateless/02813_float_parsing.sql @@ -0,0 +1,21 @@ +SELECT + toFloat64('1.7091'), + toFloat64('1.5008753E7'), + toFloat64('6e-09'), + toFloat64('6.000000000000001e-9'), + toFloat32('1.7091'), + toFloat32('1.5008753E7'), + toFloat32('6e-09'), + toFloat32('6.000000000000001e-9') +SETTINGS precise_float_parsing = 0; + +SELECT + toFloat64('1.7091'), + toFloat64('1.5008753E7'), + toFloat64('6e-09'), + toFloat64('6.000000000000001e-9'), + toFloat32('1.7091'), + toFloat32('1.5008753E7'), + toFloat32('6e-09'), + toFloat32('6.000000000000001e-9') +SETTINGS precise_float_parsing = 1; diff --git a/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.reference b/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.reference new file mode 100644 index 00000000000..d19222b55ec --- /dev/null +++ b/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.reference @@ -0,0 +1,31 @@ +--- Based on https://github.com/ClickHouse/ClickHouse/issues/49685 +--- Verify that ReplacingMergeTree properly handles _is_deleted: +--- SELECT FINAL should take `_is_deleted` into consideration when there is only one partition. +-- { echoOn } + +DROP TABLE IF EXISTS t; +CREATE TABLE t +( + `account_id` UInt64, + `_is_deleted` UInt8, + `_version` UInt64 +) +ENGINE = ReplacingMergeTree(_version, _is_deleted) +ORDER BY (account_id); +INSERT INTO t SELECT number, 0, 1 FROM numbers(1e3); +-- Mark the first 100 rows as deleted. +INSERT INTO t SELECT number, 1, 1 FROM numbers(1e2); +-- Put everything in one partition +OPTIMIZE TABLE t FINAL; +SELECT count() FROM t; +1000 +SELECT count() FROM t FINAL; +900 +-- Both should produce the same number of rows. +-- Previously, `do_not_merge_across_partitions_select_final = 1` showed more rows, +-- as if no rows were deleted. +SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 1; +900 +SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 0; +900 +DROP TABLE t; diff --git a/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.sql b/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.sql new file mode 100644 index 00000000000..a89a1ff590a --- /dev/null +++ b/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.sql @@ -0,0 +1,32 @@ +--- Based on https://github.com/ClickHouse/ClickHouse/issues/49685 +--- Verify that ReplacingMergeTree properly handles _is_deleted: +--- SELECT FINAL should take `_is_deleted` into consideration when there is only one partition. +-- { echoOn } + +DROP TABLE IF EXISTS t; +CREATE TABLE t +( + `account_id` UInt64, + `_is_deleted` UInt8, + `_version` UInt64 +) +ENGINE = ReplacingMergeTree(_version, _is_deleted) +ORDER BY (account_id); + +INSERT INTO t SELECT number, 0, 1 FROM numbers(1e3); +-- Mark the first 100 rows as deleted. +INSERT INTO t SELECT number, 1, 1 FROM numbers(1e2); + +-- Put everything in one partition +OPTIMIZE TABLE t FINAL; + +SELECT count() FROM t; +SELECT count() FROM t FINAL; + +-- Both should produce the same number of rows. +-- Previously, `do_not_merge_across_partitions_select_final = 1` showed more rows, +-- as if no rows were deleted. +SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 1; +SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 0; + +DROP TABLE t; diff --git a/tests/queries/0_stateless/02814_create_index_uniq_noop.reference b/tests/queries/0_stateless/02814_create_index_uniq_noop.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02814_create_index_uniq_noop.sql b/tests/queries/0_stateless/02814_create_index_uniq_noop.sql new file mode 100644 index 00000000000..127b3cbdeb6 --- /dev/null +++ b/tests/queries/0_stateless/02814_create_index_uniq_noop.sql @@ -0,0 +1,3 @@ +SET allow_create_index_without_type=1; +SET create_index_ignore_unique=1; +CREATE UNIQUE INDEX idx_tab2_0 ON tab2 (col1); diff --git a/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference new file mode 100644 index 00000000000..7ff95106d3d --- /dev/null +++ b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference @@ -0,0 +1,17 @@ +-- Based on https://github.com/ClickHouse/ClickHouse/issues/52436 +-- Test that inserts performed via Buffer table engine land into destination table. +-- { echoOn } + +DROP TABLE IF EXISTS null_table; +DROP TABLE IF EXISTS null_table_buffer; +DROP TABLE IF EXISTS null_mv; +DROP VIEW IF EXISTS number_view; +CREATE TABLE null_table (number UInt64) ENGINE = Null; +CREATE VIEW number_view as SELECT * FROM numbers(10) as tb; +CREATE MATERIALIZED VIEW null_mv Engine = Log AS SELECT * FROM null_table LEFT JOIN number_view as tb USING number; +CREATE TABLE null_table_buffer (number UInt64) ENGINE = Buffer(currentDatabase(), null_table, 1, 1, 1, 100, 200, 10000, 20000); +INSERT INTO null_table_buffer VALUES (1); +SELECT sleep(3) FORMAT Null; +-- Insert about should've landed into `null_mv` +SELECT count() FROM null_mv; +1 diff --git a/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql new file mode 100644 index 00000000000..74b5cf5f432 --- /dev/null +++ b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql @@ -0,0 +1,19 @@ +-- Based on https://github.com/ClickHouse/ClickHouse/issues/52436 +-- Test that inserts performed via Buffer table engine land into destination table. +-- { echoOn } + +DROP TABLE IF EXISTS null_table; +DROP TABLE IF EXISTS null_table_buffer; +DROP TABLE IF EXISTS null_mv; +DROP VIEW IF EXISTS number_view; + +CREATE TABLE null_table (number UInt64) ENGINE = Null; +CREATE VIEW number_view as SELECT * FROM numbers(10) as tb; +CREATE MATERIALIZED VIEW null_mv Engine = Log AS SELECT * FROM null_table LEFT JOIN number_view as tb USING number; + +CREATE TABLE null_table_buffer (number UInt64) ENGINE = Buffer(currentDatabase(), null_table, 1, 1, 1, 100, 200, 10000, 20000); +INSERT INTO null_table_buffer VALUES (1); +SELECT sleep(3) FORMAT Null; + +-- Insert about should've landed into `null_mv` +SELECT count() FROM null_mv; diff --git a/tests/queries/0_stateless/02815_no_throw_in_simple_queries.reference b/tests/queries/0_stateless/02815_no_throw_in_simple_queries.reference new file mode 100644 index 00000000000..1e7d6b54cce --- /dev/null +++ b/tests/queries/0_stateless/02815_no_throw_in_simple_queries.reference @@ -0,0 +1,5 @@ +Aborted +1 +1 +1 +2 diff --git a/tests/queries/0_stateless/02815_no_throw_in_simple_queries.sh b/tests/queries/0_stateless/02815_no_throw_in_simple_queries.sh new file mode 100755 index 00000000000..a5c6de3ce58 --- /dev/null +++ b/tests/queries/0_stateless/02815_no_throw_in_simple_queries.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +export CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION=1 + +# The environment variable works as expected: +bash -c " + abort_handler() + { + exit 0 + } + trap 'abort_handler' ABRT + $CLICKHOUSE_LOCAL --query 'this is wrong' +" 2>&1 | grep -o 'Aborted' + +# No exceptions are thrown in simple cases: +$CLICKHOUSE_LOCAL --query "SELECT 1" +$CLICKHOUSE_LOCAL --query "SHOW TABLES" +$CLICKHOUSE_LOCAL --query "SELECT * FROM system.tables WHERE database = currentDatabase() FORMAT Null" + +# The same for the client app: +$CLICKHOUSE_CLIENT --query "SELECT 1" +$CLICKHOUSE_CLIENT --query "SHOW TABLES" +$CLICKHOUSE_CLIENT --query "SELECT * FROM system.tables WHERE database = currentDatabase() FORMAT Null" + +# Multi queries are ok: +$CLICKHOUSE_LOCAL --multiquery "SELECT 1; SELECT 2;" + +# It can run in interactive mode: +function run() +{ + command=$1 + expect << EOF + +log_user 0 +set timeout 60 +match_max 100000 + +spawn bash -c "$command" + +expect ":) " + +send -- "SELECT 1\r" +expect "1" +expect ":) " + +send -- "exit\r" +expect eof + +EOF +} + +run "$CLICKHOUSE_LOCAL" diff --git a/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.reference b/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.reference new file mode 100644 index 00000000000..8f2c820522c --- /dev/null +++ b/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.reference @@ -0,0 +1,61 @@ +Test 1: check double quotes +1 abc 123 abacaba +2 def 456 bacabaa +3 story 78912 acabaab +4 history 21321321 cabaaba +Test 1a: check double quotes no parsing overflow +1 +Test 1b: check double quotes empty +1 +Test 2: check back quotes +1 abc 123 abacaba +2 def 456 bacabaa +3 story 78912 acabaab +4 history 21321321 cabaaba +Test 2a: check back quotes no parsing overflow +1 +Test 2b: check back quotes empty +1 +Test 3: check literal +1 abc 123 abacaba +2 def 456 bacabaa +3 story 78912 acabaab +4 history 21321321 cabaaba +Test 3a: check literal no parsing overflow +1 +Test 3b: check literal empty +1 +Test 4: select using * wildcard +30 +30 +30 +30 +30 +10 +30 +10 +Test 4b: select using ? wildcard +20 +10 +20 +10 +20 +Test 4c: select using '{' + '}' wildcards +20 +20 +1 +Test 4d: select using ? and * wildcards +30 +30 +30 +1 +30 +30 +Test 4e: select using ?, * and '{' + '}' wildcards +10 +20 +20 +20 +Test 4f: recursive search +2 +1 diff --git a/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.sh b/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.sh new file mode 100755 index 00000000000..40b936481e7 --- /dev/null +++ b/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +dir=${CLICKHOUSE_TEST_UNIQUE_NAME} +[[ -d $dir ]] && rm -rd $dir +mkdir $dir +mkdir $dir/nested +mkdir $dir/nested/nested + +# Create temporary csv file for tests +echo '"id","str","int","text"' > $dir/tmp.csv +echo '1,"abc",123,"abacaba"' >> $dir/tmp.csv +echo '2,"def",456,"bacabaa"' >> $dir/tmp.csv +echo '3,"story",78912,"acabaab"' >> $dir/tmp.csv +echo '4,"history",21321321,"cabaaba"' >> $dir/tmp.csv + +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/tmp_numbers_1.jsonl') select * from numbers(1, 10)" +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/tmp_numbers_2.jsonl') select * from numbers(11, 10)" + +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/tmp_numbers_30.jsonl') select * from numbers(21, 10)" + +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/nested/nested_numbers.jsonl') select * from numbers(1)" +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/nested/nested/nested_nested_numbers.jsonl') select * from numbers(1)" + +################# +echo "Test 1: check double quotes" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM \"${dir}/tmp.csv\"" +################# +echo "Test 1a: check double quotes no parsing overflow" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM \"${dir}/tmp.csv\"\"bad\"" 2>&1 | grep -c "UNKNOWN_TABLE" +################# +echo "Test 1b: check double quotes empty" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM \"\"" 2>&1 | grep -c "SYNTAX_ERROR" +################# +echo "Test 2: check back quotes" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM \`${dir}/tmp.csv\`" +################# +echo "Test 2a: check back quotes no parsing overflow" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM \`${dir}/tmp.csv\`\`bad\`" 2>&1 | grep -c "UNKNOWN_TABLE" +################# +echo "Test 2b: check back quotes empty" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM \`\`" 2>&1 | grep -c "SYNTAX_ERROR" +################# +echo "Test 3: check literal" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM '${dir}/tmp.csv'" +################# +echo "Test 3a: check literal no parsing overflow" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM '${dir}/tmp.csv''bad'" 2>&1 | grep -c "SYNTAX_ERROR" +################# +echo "Test 3b: check literal empty" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM ''" 2>&1 | grep -c "SYNTAX_ERROR" + +echo "Test 4: select using * wildcard" +# Extension is required for auto table structure detection +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/tmp_numbers_*.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/**.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/**********************.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*_numbers_*.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*_nu*ers_*.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*_nu*ers_2.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*tmp_numbers_*.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*tmp_numbers_1*.jsonl'" + +echo "Test 4b: select using ? wildcard" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/tmp_numbers_?.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/tmp_numbers_??.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/??p_numbers??.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/tmp_n?mbers_1.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/t?p_n?mbers_?.jsonl'" + +echo "Test 4c: select using '{' + '}' wildcards" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/tmp_numbers_{1..3}.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/tmp_numbers_{1,2}.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/tmp_numbers__{1,2}.jsonl'" 2>&1 | grep -c "CANNOT_EXTRACT_TABLE_STRUCTURE" + +echo "Test 4d: select using ? and * wildcards" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*?.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/?*.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/?*????.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/?*?***_.jsonl'" 2>&1 | grep -c "CANNOT_EXTRACT_TABLE_STRUCTURE" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/?*????_*.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*?*_num*e?s_*.jsonl'" + +echo "Test 4e: select using ?, * and '{' + '}' wildcards" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*?{1,3}.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*?{1..3}.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*?*_num*e?s_{1..3}.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*?*_num*e?s_{1,2}.jsonl'" + +echo "Test 4f: recursive search" +# /**/* pattern does not look in current directory +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/**/*.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/nested/**/*.jsonl'" + + +# Remove temporary dir with files +rm -rd $dir diff --git a/tests/queries/0_stateless/02816_has_token_empty.reference b/tests/queries/0_stateless/02816_has_token_empty.reference index aa47d0d46d4..8435d77c5fe 100644 --- a/tests/queries/0_stateless/02816_has_token_empty.reference +++ b/tests/queries/0_stateless/02816_has_token_empty.reference @@ -1,2 +1,6 @@ 0 +\N +\N 0 +\N +\N diff --git a/tests/queries/0_stateless/02816_has_token_empty.sql b/tests/queries/0_stateless/02816_has_token_empty.sql index e5d6156debd..3e00959126b 100644 --- a/tests/queries/0_stateless/02816_has_token_empty.sql +++ b/tests/queries/0_stateless/02816_has_token_empty.sql @@ -2,6 +2,10 @@ SELECT hasTokenCaseInsensitive('K(G', ''); -- { serverError BAD_ARGUMENTS } SELECT hasTokenCaseInsensitive('Hello', ''); -- { serverError BAD_ARGUMENTS } SELECT hasTokenCaseInsensitive('', ''); -- { serverError BAD_ARGUMENTS } SELECT hasTokenCaseInsensitive('', 'Hello'); +SELECT hasTokenCaseInsensitiveOrNull('Hello', ''); +SELECT hasTokenCaseInsensitiveOrNull('', ''); SELECT hasToken('Hello', ''); -- { serverError BAD_ARGUMENTS } SELECT hasToken('', 'Hello'); SELECT hasToken('', ''); -- { serverError BAD_ARGUMENTS } +SELECT hasTokenOrNull('', ''); +SELECT hasTokenOrNull('Hello', ''); diff --git a/tests/queries/0_stateless/02817_structure_to_schema.reference b/tests/queries/0_stateless/02817_structure_to_schema.reference new file mode 100644 index 00000000000..1f39a8ed50e --- /dev/null +++ b/tests/queries/0_stateless/02817_structure_to_schema.reference @@ -0,0 +1,466 @@ +CapnProto +Numbers + +struct Message +{ + int8 @0 : Int8; + uint8 @1 : UInt8; + int16 @2 : Int16; + uint16 @3 : UInt16; + int32 @4 : Int32; + uint32 @5 : UInt32; + int64 @6 : Int64; + uint64 @7 : UInt64; + int128 @8 : Data; + uint128 @9 : Data; + int256 @10 : Data; + uint256 @11 : Data; + float32 @12 : Float32; + float64 @13 : Float64; + decimal32 @14 : Int32; + decimal64 @15 : Int64; + decimal128 @16 : Data; + decimal256 @17 : Data; +} +Dates + +struct Message +{ + data @0 : UInt16; + date32 @1 : Int32; + datetime @2 : UInt32; + datatime64 @3 : Int64; +} +Strings + +struct Message +{ + string @0 : Data; + fixedstring @1 : Data; +} +Special + +struct Message +{ + ipv4 @0 : UInt32; + ipv6 @1 : Data; + uuid @2 : Data; +} +Nullable + +struct Message +{ + struct Nullable + { + union + { + value @0 : UInt32; + null @1 : Void; + } + } + nullable @0 : Nullable; +} +Enums + +struct Message +{ + enum Enum8 + { + v1 @0; + v2 @1; + v3 @2; + v4 @3; + } + enum8 @0 : Enum8; + enum Enum16 + { + v5 @0; + v6 @1; + v7 @2; + v8 @3; + v9 @4; + } + enum16 @1 : Enum16; +} +Arrays + +struct Message +{ + arr1 @0 : List(UInt32); + arr2 @1 : List(List(List(UInt32))); +} +Tuples + +struct Message +{ + struct Tuple1 + { + e1 @0 : UInt32; + e2 @1 : Data; + e3 @2 : UInt32; + } + tuple1 @0 : Tuple1; + struct Tuple2 + { + struct E1 + { + e1 @0 : UInt32; + struct E2 + { + e1 @0 : Data; + e2 @1 : UInt32; + } + e2 @1 : E2; + e3 @2 : Data; + } + e1 @0 : E1; + struct E2 + { + e1 @0 : Data; + e2 @1 : UInt32; + } + e2 @1 : E2; + } + tuple2 @1 : Tuple2; +} +Maps + +struct Message +{ + struct Map1 + { + struct Entry + { + key @0 : Data; + value @1 : UInt32; + } + entries @0 : List(Entry); + } + map1 @0 : Map1; + struct Map2 + { + struct Entry + { + struct Value + { + struct Entry + { + struct Value + { + struct Entry + { + key @0 : Data; + value @1 : UInt32; + } + entries @0 : List(Entry); + } + key @0 : Data; + value @1 : Value; + } + entries @0 : List(Entry); + } + key @0 : Data; + value @1 : Value; + } + entries @0 : List(Entry); + } + map2 @1 : Map2; +} +Complex + +struct Message +{ + struct C1 + { + struct E1 + { + struct Entry + { + struct Value + { + union + { + value @0 : UInt32; + null @1 : Void; + } + } + key @0 : Data; + value @1 : List(List(Value)); + } + entries @0 : List(Entry); + } + e1 @0 : List(E1); + struct E2 + { + struct Entry + { + struct Value + { + struct E1 + { + union + { + value @0 : Data; + null @1 : Void; + } + } + e1 @0 : List(List(E1)); + struct E2 + { + e1 @0 : UInt32; + struct E2 + { + struct E1 + { + union + { + value @0 : Data; + null @1 : Void; + } + } + e1 @0 : List(List(E1)); + e2 @1 : UInt32; + } + e2 @1 : E2; + } + e2 @1 : List(E2); + } + key @0 : Data; + value @1 : Value; + } + entries @0 : List(Entry); + } + e2 @1 : List(E2); + } + c1 @0 : C1; +} +Read/write with no schema +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +Output schema + +struct Message +{ + number @0 : UInt64; +} +Bad output schema path +2 +2 +Protobuf +Numbers + +message Message +{ + int32 int8 = 1; + uint32 uint8 = 2; + int32 int16 = 3; + uint32 uint16 = 4; + int32 int32 = 5; + uint32 uint32 = 6; + int64 int64 = 7; + uint64 uint64 = 8; + bytes int128 = 9; + bytes uint128 = 10; + bytes int256 = 11; + bytes uint256 = 12; + float float32 = 13; + double float64 = 14; + bytes decimal32 = 15; + bytes decimal64 = 16; + bytes decimal128 = 17; + bytes decimal256 = 18; +} +Dates + +message Message +{ + uint32 data = 1; + int32 date32 = 2; + uint32 datetime = 3; + uint64 datatime64 = 4; +} +Strings + +message Message +{ + bytes string = 1; + bytes fixedstring = 2; +} +Special + +message Message +{ + uint32 ipv4 = 1; + bytes ipv6 = 2; + bytes uuid = 3; +} +Nullable + +message Message +{ + uint32 nullable = 1; +} +Enums + +message Message +{ + enum Enum8 + { + v1 = 0; + v2 = 1; + v3 = 2; + v4 = 3; + } + Enum8 enum8 = 1; + enum Enum16 + { + v5 = 0; + v6 = 1; + v7 = 2; + v8 = 3; + v9 = 4; + } + Enum16 enum16 = 2; +} +Arrays + +message Message +{ + repeated uint32 arr1 = 1; + message Arr2 + { + message Arr2 + { + repeated uint32 arr2 = 1; + } + repeated Arr2 arr2 = 1; + } + repeated Arr2 arr2 = 2; +} +Tuples + +message Message +{ + message Tuple1 + { + uint32 e1 = 1; + bytes e2 = 2; + uint32 e3 = 3; + } + Tuple1 tuple1 = 1; + message Tuple2 + { + message E1 + { + uint32 e1 = 1; + message E2 + { + bytes e1 = 1; + uint32 e2 = 2; + } + E2 e2 = 2; + bytes e3 = 3; + } + E1 e1 = 1; + message E2 + { + bytes e1 = 1; + uint32 e2 = 2; + } + E2 e2 = 2; + } + Tuple2 tuple2 = 2; +} +Maps + +message Message +{ + map map1 = 1; + message Map2Value + { + message Map2ValueValue + { + map map2ValueValue = 1; + } + map map2Value = 1; + } + map map2 = 2; +} +Complex + +message Message +{ + message C1 + { + message E1 + { + message E1Value + { + message E1Value + { + repeated uint32 e1Value = 1; + } + repeated E1Value e1Value = 1; + } + map e1 = 1; + } + repeated E1 e1 = 1; + message E2 + { + message E2Value + { + message E1 + { + repeated bytes e1 = 1; + } + repeated E1 e1 = 1; + message E2 + { + uint32 e1 = 1; + message E2 + { + message E1 + { + repeated bytes e1 = 1; + } + repeated E1 e1 = 1; + uint32 e2 = 2; + } + E2 e2 = 2; + } + repeated E2 e2 = 2; + } + map e2 = 1; + } + repeated E2 e2 = 2; + } + C1 c1 = 1; +} +Read/write with no schema +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +Output schema + +message Message +{ + uint64 number = 1; +} +Bad output schema path +2 +2 diff --git a/tests/queries/0_stateless/02817_structure_to_schema.sh b/tests/queries/0_stateless/02817_structure_to_schema.sh new file mode 100755 index 00000000000..3b79fa099a8 --- /dev/null +++ b/tests/queries/0_stateless/02817_structure_to_schema.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME-data +SCHEMA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME-schema + +function test_structure() +{ + format=$1 + ext=$2 + structure=$3 + + $CLICKHOUSE_LOCAL -q "select structureTo${format}Schema('$structure') format TSVRaw" > $SCHEMA_FILE.$ext + tail -n +2 $SCHEMA_FILE.$ext + + $CLICKHOUSE_LOCAL -q "select * from generateRandom('$structure', 42) limit 10 format $format settings format_schema='$SCHEMA_FILE:Message', format_capn_proto_enum_comparising_mode='by_names'" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', $format, '$structure') format Null settings format_schema='$SCHEMA_FILE:Message', format_capn_proto_enum_comparising_mode='by_names'" + +} + +function test_format() +{ + format=$1 + ext=$2 + + echo $format + + echo Numbers + numbers='int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, int128 Int128, uint128 UInt128, int256 Int256, uint256 UInt256, float32 Float32, float64 Float64, decimal32 Decimal32(3), decimal64 Decimal64(10), decimal128 Decimal128(20), decimal256 Decimal256(40)' + test_structure $format $ext "$numbers" + + echo Dates + dates='data Date, date32 Date32, datetime DateTime, datatime64 DateTime64(9)' + test_structure $format $ext "$dates" + + echo Strings + strings='string String, fixedstring FixedString(42)' + test_structure $format $ext "$strings" + + echo Special + special='ipv4 IPv4, ipv6 IPv6, uuid UUID' + test_structure $format $ext "$special" + + echo Nullable + nullable='nullable Nullable(UInt32)' + test_structure $format $ext "$nullable" + + echo Enums + enums="enum8 Enum8(''v1'' = -100, ''v2'' = -10, ''v3'' = 0, ''v4'' = 42), enum16 Enum16(''v5'' = -2000, ''v6'' = -1000, ''v7'' = 0, ''v8'' = 1000, ''v9'' = 2000)" + test_structure $format $ext "$enums" + + echo Arrays + arrays='arr1 Array(UInt32), arr2 Array(Array(Array(UInt32)))' + test_structure $format $ext "$arrays" + + echo Tuples + tuples='tuple1 Tuple(e1 UInt32, e2 String, e3 DateTime), tuple2 Tuple(e1 Tuple(e1 UInt32, e2 Tuple(e1 String, e2 DateTime), e3 String), e2 Tuple(e1 String, e2 UInt32))' + test_structure $format $ext "$tuples" + + echo Maps + maps='map1 Map(String, UInt32), map2 Map(String, Map(String, Map(String, UInt32)))' + test_structure $format $ext "$maps" + + echo Complex + complex='c1 Array(Tuple(e1 Map(String, Array(Array(Nullable(UInt32)))), e2 Map(String, Tuple(e1 Array(Array(Nullable(String))), e2 Nested(e1 UInt32, e2 Tuple(e1 Array(Array(Nullable(String))), e2 UInt32))))))' + test_structure $format $ext "$complex" + + echo "Read/write with no schema" + $CLICKHOUSE_LOCAL -q "select * from numbers(10) format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', $format, 'number UInt64')" + + echo "Output schema" + $CLICKHOUSE_LOCAL -q "select * from numbers(10) format $format settings output_format_schema='$SCHEMA_FILE.$ext'" > $DATA_FILE + tail -n +2 $SCHEMA_FILE.$ext + + echo "Bad output schema path" + $CLICKHOUSE_CLIENT -q "insert into function file('$DATA_FILE', $format) select * from numbers(10) settings output_format_schema='/tmp/schema.$ext'" 2>&1 | grep "BAD_ARGUMENTS" -c + $CLICKHOUSE_CLIENT -q "insert into function file('$DATA_FILE', $format) select * from numbers(10) settings output_format_schema='../../schema.$ext'" 2>&1 | grep "BAD_ARGUMENTS" -c +} + +test_format CapnProto capnp +test_format Protobuf proto + +rm $DATA_FILE +rm $SCHEMA_FILE* + diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh new file mode 100755 index 00000000000..b1fbea26da7 --- /dev/null +++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64, no-random-settings +# requires TraceCollector, does not available under sanitizers and aarch64 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +query_id="${CLICKHOUSE_DATABASE}_min_max_allocation_size_$RANDOM$RANDOM" +${CLICKHOUSE_CLIENT} --query_id="$query_id" --memory_profiler_sample_min_allocation_size=4096 --memory_profiler_sample_max_allocation_size=8192 --log_queries=1 --max_threads=1 --max_untracked_memory=0 --memory_profiler_sample_probability=1 --query "select randomPrintableASCII(number) from numbers(1000) FORMAT Null" + +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" + +# at least something allocated +${CLICKHOUSE_CLIENT} --query "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample'" + +# show wrong allocations +${CLICKHOUSE_CLIENT} --query "SELECT abs(size) FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" diff --git a/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.reference b/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.reference new file mode 100644 index 00000000000..004d27bacad --- /dev/null +++ b/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.reference @@ -0,0 +1,2 @@ +3 2 +3 2 3 diff --git a/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql b/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql new file mode 100644 index 00000000000..d56d9c4e181 --- /dev/null +++ b/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql @@ -0,0 +1,16 @@ +create view test_param_view as +with {param_test_val:UInt8} as param_test_val +select param_test_val, + arrayCount((a)->(a < param_test_val), t.arr) as cnt1 +from (select [1,2,3,4,5] as arr) t; + +select * from test_param_view(param_test_val = 3); + +create view test_param_view2 as +with {param_test_val:UInt8} as param_test_val +select param_test_val, + arrayCount((a)->(a < param_test_val), t.arr) as cnt1, + arrayCount((a)->(a < param_test_val+1), t.arr) as cnt2 +from (select [1,2,3,4,5] as arr) t; + +select * from test_param_view2(param_test_val = 3); \ No newline at end of file diff --git a/tests/queries/0_stateless/02828_create_as_table_function_rename.reference b/tests/queries/0_stateless/02828_create_as_table_function_rename.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02828_create_as_table_function_rename.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02828_create_as_table_function_rename.sql b/tests/queries/0_stateless/02828_create_as_table_function_rename.sql new file mode 100644 index 00000000000..7e24e485fb9 --- /dev/null +++ b/tests/queries/0_stateless/02828_create_as_table_function_rename.sql @@ -0,0 +1,7 @@ + +drop table if exists t1; +create table t1 as remote('localhost', 'system.one'); +rename table t1 to t2; +select * from t2; +rename table t2 to t1; +drop table t1; diff --git a/tests/queries/0_stateless/02832_alter_delete_indexes_projections.reference b/tests/queries/0_stateless/02832_alter_delete_indexes_projections.reference new file mode 100644 index 00000000000..f14acdf9e6d --- /dev/null +++ b/tests/queries/0_stateless/02832_alter_delete_indexes_projections.reference @@ -0,0 +1,6 @@ +2 +0 +3355402240 +3355402240 +3321851904 +3321851904 diff --git a/tests/queries/0_stateless/02832_alter_delete_indexes_projections.sql b/tests/queries/0_stateless/02832_alter_delete_indexes_projections.sql new file mode 100644 index 00000000000..399d0fba564 --- /dev/null +++ b/tests/queries/0_stateless/02832_alter_delete_indexes_projections.sql @@ -0,0 +1,26 @@ +set mutations_sync = 2; + +drop table if exists t_delete_skip_index; + +create table t_delete_skip_index (x UInt32, y String, index i y type minmax granularity 3) engine = MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +insert into t_delete_skip_index select number, toString(number) from numbers(8192 * 10); + +select count() from t_delete_skip_index where y in (4, 5); +alter table t_delete_skip_index delete where x < 8192; +select count() from t_delete_skip_index where y in (4, 5); + +drop table if exists t_delete_skip_index; +drop table if exists t_delete_projection; + +create table t_delete_projection (x UInt32, y UInt64, projection p (select sum(y))) engine = MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +insert into t_delete_projection select number, toString(number) from numbers(8192 * 10); + +select sum(y) from t_delete_projection settings optimize_use_projections = 0; +select sum(y) from t_delete_projection settings optimize_use_projections = 0, force_optimize_projection = 1; + +alter table t_delete_projection delete where x < 8192; + +select sum(y) from t_delete_projection settings optimize_use_projections = 0; +select sum(y) from t_delete_projection settings optimize_use_projections = 0, force_optimize_projection = 1; + +drop table if exists t_delete_projection; diff --git a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.reference b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.reference new file mode 100644 index 00000000000..f80f8738ff8 --- /dev/null +++ b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.reference @@ -0,0 +1,12 @@ +test_alter_profile case: max_session_count 1 alter_sessions_count 1 +test_alter_profile case: max_session_count 2 alter_sessions_count 1 +USER_SESSION_LIMIT_EXCEEDED +test_alter_profile case: max_session_count 1 alter_sessions_count 2 +test_alter_profile case: max_session_count 2 alter_sessions_count 2 +READONLY +READONLY +READONLY +READONLY +READONLY +READONLY +READONLY diff --git a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh new file mode 100755 index 00000000000..546c54a4de9 --- /dev/null +++ b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +SESSION_ID_PREFIX="02832_alter_max_sessions_session_$$" +PROFILE="02832_alter_max_sessions_profile_$$" +USER="02832_alter_max_sessions_user_$$" +USER2="02832_alter_max_sessions_user_two_$$" +ROLE="02832_alter_max_sessions_role_$$" + +${CLICKHOUSE_CLIENT} -q $"DROP USER IF EXISTS '${USER}'" +${CLICKHOUSE_CLIENT} -q $"DROP PROFILE IF EXISTS ${PROFILE}" +${CLICKHOUSE_CLIENT} -q $"CREATE SETTINGS PROFILE ${PROFILE}" +${CLICKHOUSE_CLIENT} -q $"CREATE USER '${USER}' SETTINGS PROFILE '${PROFILE}'" + +function test_alter_profile() +{ + local max_session_count="$1" + local alter_sessions_count="$2" + echo $"test_alter_profile case: max_session_count ${max_session_count} alter_sessions_count ${alter_sessions_count}" + + ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${max_session_count}" + + # Create sesssions with $max_session_count resriction + for ((i = 1 ; i <= ${max_session_count} ; i++)); do + local session_id="${SESSION_ID_PREFIX}_${i}" + # Skip output from this query + ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=0" --data-binary "SELECT 1" > /dev/null + done + + # Update resriction to $alter_sessions_count + ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${alter_sessions_count}" + + # Simultaneous sessions should use max settings from profile ($alter_sessions_count) + for ((i = 1 ; i <= ${max_session_count} ; i++)); do + local session_id="${SESSION_ID_PREFIX}_${i}" + # ignore select 1, we need only errors + ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=1" --data-binary "select sleep(0.3)" | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' & + done + + wait +} + +test_alter_profile 1 1 +test_alter_profile 2 1 +test_alter_profile 1 2 +test_alter_profile 2 2 + +${CLICKHOUSE_CLIENT} -q "SELECT 1 SETTINGS max_sessions_for_user = 1" 2>&1 | grep -m 1 -o 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} -q $"SET max_sessions_for_user = 1 " 2>&1 | grep -o -m 1 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} --max_sessions_for_user=1 -q $"SELECT 1 " 2>&1 | grep -o -m 1 'READONLY' | head -1 +# max_sessions_for_user is profile setting +${CLICKHOUSE_CLIENT} -q $"CREATE USER ${USER2} SETTINGS max_sessions_for_user = 1 " 2>&1 | grep -o -m 1 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} -q $"ALTER USER ${USER} SETTINGS max_sessions_for_user = 1" 2>&1 | grep -o -m 1 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} -q $"CREATE ROLE ${ROLE} SETTINGS max_sessions_for_user = 1" 2>&1 | grep -o -m 1 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} -q $"CREATE ROLE ${ROLE}" +${CLICKHOUSE_CLIENT} -q $"ALTER ROLE ${ROLE} SETTINGS max_sessions_for_user = 1 " 2>&1 | grep -o -m 1 'READONLY' | head -1 + +${CLICKHOUSE_CLIENT} -q $"DROP USER IF EXISTS '${USER}'" +${CLICKHOUSE_CLIENT} -q $"DROP USER IF EXISTS '${USER2}'" +${CLICKHOUSE_CLIENT} -q $"DROP PROFILE IF EXISTS ${PROFILE}" +${CLICKHOUSE_CLIENT} -q $"DROP ROLE IF EXISTS ${ROLE}" diff --git a/tests/queries/0_stateless/02832_integer_type_inference.reference b/tests/queries/0_stateless/02832_integer_type_inference.reference new file mode 100644 index 00000000000..5a01bd4cd11 --- /dev/null +++ b/tests/queries/0_stateless/02832_integer_type_inference.reference @@ -0,0 +1,2 @@ +[-4741124612489978151,-3236599669630092879,5607475129431807682] +[100,-100,5607475129431807682,5607475129431807683] diff --git a/tests/queries/0_stateless/02832_integer_type_inference.sql b/tests/queries/0_stateless/02832_integer_type_inference.sql new file mode 100644 index 00000000000..221e929d705 --- /dev/null +++ b/tests/queries/0_stateless/02832_integer_type_inference.sql @@ -0,0 +1,2 @@ +select [-4741124612489978151, -3236599669630092879, 5607475129431807682]; +select [100, -100, 5607475129431807682, 5607475129431807683]; diff --git a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference new file mode 100644 index 00000000000..ea545c90391 --- /dev/null +++ b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference @@ -0,0 +1,3 @@ +test + +\N diff --git a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql new file mode 100644 index 00000000000..0e58c716c9f --- /dev/null +++ b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql @@ -0,0 +1,3 @@ +SELECT transform(name, ['a', 'b'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name); +SELECT transform(name, ['test', 'b'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name); +SELECT transform(name, ['a', 'test'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name); diff --git a/tests/queries/0_stateless/02833_array_join_columns.reference b/tests/queries/0_stateless/02833_array_join_columns.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02833_array_join_columns.sql b/tests/queries/0_stateless/02833_array_join_columns.sql new file mode 100644 index 00000000000..3f9a33a3959 --- /dev/null +++ b/tests/queries/0_stateless/02833_array_join_columns.sql @@ -0,0 +1,19 @@ +drop table if exists test_array_joins; +drop table if exists v4test_array_joins; + +create table test_array_joins +( + id UInt64 default rowNumberInAllBlocks() + 1, + arr_1 Array(String), + arr_2 Array(String), + arr_3 Array(String), + arr_4 Array(String) +) engine = MergeTree order by id; + +insert into test_array_joins (id,arr_1, arr_2, arr_3, arr_4) +SELECT number,array(randomPrintableASCII(3)),array(randomPrintableASCII(3)),array(randomPrintableASCII(3)),array(randomPrintableASCII(3)) +from numbers(1000); + +create view v4test_array_joins as SELECT * from test_array_joins where id != 10; + +select * from v4test_array_joins array join columns('^arr') where match(arr_4,'a') and id < 100 order by id format Null settings optimize_read_in_order = 0; diff --git a/tests/queries/0_stateless/02833_concurrrent_sessions.reference b/tests/queries/0_stateless/02833_concurrrent_sessions.reference new file mode 100644 index 00000000000..bfe507e8eac --- /dev/null +++ b/tests/queries/0_stateless/02833_concurrrent_sessions.reference @@ -0,0 +1,34 @@ +sessions: +150 +port_0_sessions: +0 +address_0_sessions: +0 +tcp_sessions +60 +http_sessions +30 +http_with_session_id_sessions +30 +my_sql_sessions +30 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 diff --git a/tests/queries/0_stateless/02833_concurrrent_sessions.sh b/tests/queries/0_stateless/02833_concurrrent_sessions.sh new file mode 100755 index 00000000000..c5b6204529b --- /dev/null +++ b/tests/queries/0_stateless/02833_concurrrent_sessions.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-debug + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +readonly PID=$$ + +# Each user uses a separate thread. +readonly TCP_USERS=( "02833_TCP_USER_${PID}"_{1,2} ) # 2 concurrent TCP users +readonly HTTP_USERS=( "02833_HTTP_USER_${PID}" ) +readonly HTTP_WITH_SESSION_ID_SESSION_USERS=( "02833_HTTP_WITH_SESSION_ID_USER_${PID}" ) +readonly MYSQL_USERS=( "02833_MYSQL_USER_${PID}") +readonly ALL_USERS=( "${TCP_USERS[@]}" "${HTTP_USERS[@]}" "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}" "${MYSQL_USERS[@]}" ) + +TCP_USERS_SQL_COLLECTION_STRING="$( echo "${TCP_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" +readonly TCP_USERS_SQL_COLLECTION_STRING +HTTP_USERS_SQL_COLLECTION_STRING="$( echo "${HTTP_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" +readonly HTTP_USERS_SQL_COLLECTION_STRING +HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING="$( echo "${HTTP_WITH_SESSION_ID_SESSION_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" +readonly HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING +MYSQL_USERS_SQL_COLLECTION_STRING="$( echo "${MYSQL_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" +readonly MYSQL_USERS_SQL_COLLECTION_STRING +ALL_USERS_SQL_COLLECTION_STRING="$( echo "${ALL_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" +readonly ALL_USERS_SQL_COLLECTION_STRING + +readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface" + +for user in "${ALL_USERS[@]}"; do + ${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${user} IDENTIFIED WITH plaintext_password BY 'pass'" + ${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.* TO ${user}" + ${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON INFORMATION_SCHEMA.* TO ${user}"; +done + +# All _session functions execute in separate threads. +# These functions try to create a session with successful login and logout. +# Sleep a small, random amount of time to make concurrency more intense. +# and try to login with an invalid password. +function tcp_session() +{ + local user=$1 + local i=0 + while (( (i++) < 10 )); do + # login logout + ${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM})" --user="${user}" --password="pass" + # login failure + ${CLICKHOUSE_CLIENT} -q "SELECT 2" --user="${user}" --password 'invalid' + done +} + +function http_session() +{ + local user=$1 + local i=0 + while (( (i++) < 10 )); do + # login logout + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT 3, sleep(0.01${RANDOM})" + + # login failure + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=wrong" -d "SELECT 4" + done +} + +function http_with_session_id_session() +{ + local user=$1 + local i=0 + while (( (i++) < 10 )); do + # login logout + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=pass" -d "SELECT 5, sleep 0.01${RANDOM}" + + # login failure + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=wrong" -d "SELECT 6" + done +} + +function mysql_session() +{ + local user=$1 + local i=0 + while (( (i++) < 10 )); do + # login logout + ${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM}) FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'pass')" + + # login failure + ${CLICKHOUSE_CLIENT} -q "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'wrong', SETTINGS connection_max_tries=1)" + done +} + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})" + +export -f tcp_session; +export -f http_session; +export -f http_with_session_id_session; +export -f mysql_session; + +for user in "${TCP_USERS[@]}"; do + timeout 60s bash -c "tcp_session ${user}" >/dev/null 2>&1 & +done + +for user in "${HTTP_USERS[@]}"; do + timeout 60s bash -c "http_session ${user}" >/dev/null 2>&1 & +done + +for user in "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}"; do + timeout 60s bash -c "http_with_session_id_session ${user}" >/dev/null 2>&1 & +done + +for user in "${MYSQL_USERS[@]}"; do + timeout 60s bash -c "mysql_session ${user}" >/dev/null 2>&1 & +done + +wait + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" + +echo "sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})" + +echo "port_0_sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_port = 0" + +echo "address_0_sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_address = toIPv6('::')" + +echo "tcp_sessions" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${TCP_USERS_SQL_COLLECTION_STRING}) AND interface = 'TCP'" +echo "http_sessions" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'" +echo "http_with_session_id_sessions" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'" +echo "my_sql_sessions" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${MYSQL_USERS_SQL_COLLECTION_STRING}) AND interface = 'MySQL'" + +for user in "${ALL_USERS[@]}"; do + ${CLICKHOUSE_CLIENT} -q "DROP USER ${user}" + echo "Corresponding LoginSuccess/Logout" + ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'LoginSuccess' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'Logout')" + echo "LoginFailure" + ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.session_log WHERE user = '${user}' AND type = 'LoginFailure'" + done diff --git a/tests/queries/0_stateless/02833_local_udf_options.reference b/tests/queries/0_stateless/02833_local_udf_options.reference new file mode 100644 index 00000000000..19f0805d8de --- /dev/null +++ b/tests/queries/0_stateless/02833_local_udf_options.reference @@ -0,0 +1 @@ +qwerty diff --git a/tests/queries/0_stateless/02833_local_udf_options.sh b/tests/queries/0_stateless/02833_local_udf_options.sh new file mode 100755 index 00000000000..149b62d7e2c --- /dev/null +++ b/tests/queries/0_stateless/02833_local_udf_options.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +SCRIPTS_DIR=$CUR_DIR/scripts_udf + +$CLICKHOUSE_LOCAL -q 'select test_function()' -- --user_scripts_path=$SCRIPTS_DIR --user_defined_executable_functions_config=$SCRIPTS_DIR/function.xml diff --git a/tests/queries/0_stateless/02833_local_with_dialect.reference b/tests/queries/0_stateless/02833_local_with_dialect.reference new file mode 100644 index 00000000000..dbb67375997 --- /dev/null +++ b/tests/queries/0_stateless/02833_local_with_dialect.reference @@ -0,0 +1,2 @@ +0 +[?2004h[?2004lBye. diff --git a/tests/queries/0_stateless/02833_local_with_dialect.sh b/tests/queries/0_stateless/02833_local_with_dialect.sh new file mode 100755 index 00000000000..012a6d91269 --- /dev/null +++ b/tests/queries/0_stateless/02833_local_with_dialect.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-random-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +echo "exit" | ${CLICKHOUSE_LOCAL} --query "from s\"SELECT * FROM numbers(1)\"" --dialect prql --interactive diff --git a/tests/queries/0_stateless/02833_multiprewhere_extra_column.reference b/tests/queries/0_stateless/02833_multiprewhere_extra_column.reference new file mode 100644 index 00000000000..45571c71477 --- /dev/null +++ b/tests/queries/0_stateless/02833_multiprewhere_extra_column.reference @@ -0,0 +1,2 @@ +10496500 +4 diff --git a/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql new file mode 100644 index 00000000000..3a751294cba --- /dev/null +++ b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql @@ -0,0 +1,25 @@ +-- Tags: no-parallel, no-random-settings, no-random-merge-tree-settings, no-s3-storage + +drop table if exists t_multi_prewhere; +drop row policy if exists policy_02834 on t_multi_prewhere; + +create table t_multi_prewhere (a UInt64, b UInt64, c UInt8) +engine = MergeTree order by tuple() +settings min_bytes_for_wide_part = 0; + +create row policy policy_02834 on t_multi_prewhere using a > 2000 as permissive to all; +insert into t_multi_prewhere select number, number, number from numbers(10000); + +system drop mark cache; +select sum(b) from t_multi_prewhere prewhere a < 5000; + +system flush logs; + +select ProfileEvents['FileOpen'] from system.query_log +where + type = 'QueryFinish' + and current_database = currentDatabase() + and query ilike '%select sum(b) from t_multi_prewhere prewhere a < 5000%'; + +drop table if exists t_multi_prewhere; +drop row policy if exists policy_02834 on t_multi_prewhere; diff --git a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python new file mode 100755 index 00000000000..39d81438c1b --- /dev/null +++ b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + + +import os +import sys + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) + +from tcp_client import TCPClient + + +def run_query_without_errors(query, support_partial_result): + with TCPClient() as client: + client.sendQuery(query) + + # external tables + client.sendEmptyBlock() + client.readHeader() + + # Partial result + partial_result = client.readDataWithoutProgress()[0] + if support_partial_result: + assert ( + len(partial_result.value) > 0 + ), "Expected at least one block with a non-empty partial result before getting the full result" + + while True: + assert all( + a >= b + for a, b in zip(partial_result.value, partial_result.value[1:]) + ), "Partial result always should be sorted for this test" + + new_partial_result = client.readDataWithoutProgress( + need_print_info=False + )[0] + if len(new_partial_result.value) == 0: + break + + data_size = len(partial_result.value) + assert all( + partial_result.value[i] <= new_partial_result.value[i] + for i in range(data_size) + ), f"New partial result values should always be greater then old one because a new block contains more information about the full data. New result {new_partial_result}. Previous result {partial_result}" + + partial_result = new_partial_result + else: + block_rows = len(partial_result.value) + assert ( + block_rows == 0 + ), f"Expected only empty partial result block before getting the full result, but block has {block_rows} rows" + + # Full result + full_result = client.readDataWithoutProgress()[0] + + data_size = len(partial_result.value) + assert all( + partial_result.value[i] <= full_result.value[i] for i in range(data_size) + ), f"Full result values should always be greater then partial result values. Full result {full_result}. Partial result {partial_result}" + + for result in full_result.value: + print(result) + + +def main(): + rows_number = 2e7 + 1 + + # Request with partial result limit less then full limit + run_query_without_errors( + f"SELECT number FROM numbers_mt({rows_number}) ORDER BY -number LIMIT 5 SETTINGS max_threads = 1, partial_result_update_duration_ms = 1, max_rows_in_partial_result = 3", + support_partial_result=True, + ) + + # Request with partial result limit greater then full limit + run_query_without_errors( + f"SELECT number FROM numbers_mt({rows_number}) ORDER BY -number LIMIT 3 SETTINGS max_threads = 1, partial_result_update_duration_ms = 1, max_rows_in_partial_result = 5", + support_partial_result=True, + ) + + # Request with OFFSET + run_query_without_errors( + f"SELECT number FROM numbers_mt({rows_number}) ORDER BY -number LIMIT 3 OFFSET 1 SETTINGS max_threads = 1, partial_result_update_duration_ms = 1, max_rows_in_partial_result = 5", + support_partial_result=True, + ) + + # Request with OFFSET greater then partial result limit (partial result pipeline use blocks with less then OFFSET, so there will be no elements in block after LimitPartialResultTransform) + run_query_without_errors( + f"SELECT number FROM numbers_mt({rows_number}) ORDER BY -number LIMIT 3 OFFSET 15 SETTINGS max_threads = 1, partial_result_update_duration_ms = 1, max_rows_in_partial_result = 5", + support_partial_result=False, + ) + + +if __name__ == "__main__": + main() diff --git a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.reference b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.reference new file mode 100644 index 00000000000..dd3a343560f --- /dev/null +++ b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.reference @@ -0,0 +1,38 @@ +Rows 0 Columns 1 +Column number type UInt64 +Rows 3 Columns 1 +Column number type UInt64 +Rows 5 Columns 1 +Column number type UInt64 +20000000 +19999999 +19999998 +19999997 +19999996 +Rows 0 Columns 1 +Column number type UInt64 +Rows 3 Columns 1 +Column number type UInt64 +Rows 3 Columns 1 +Column number type UInt64 +20000000 +19999999 +19999998 +Rows 0 Columns 1 +Column number type UInt64 +Rows 3 Columns 1 +Column number type UInt64 +Rows 3 Columns 1 +Column number type UInt64 +19999999 +19999998 +19999997 +Rows 0 Columns 1 +Column number type UInt64 +Rows 0 Columns 1 +Column number type UInt64 +Rows 3 Columns 1 +Column number type UInt64 +19999985 +19999984 +19999983 diff --git a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.sh b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.sh new file mode 100755 index 00000000000..1ed15197dbf --- /dev/null +++ b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test +python3 "$CURDIR"/02833_partial_sorting_result_during_query_execution.python diff --git a/tests/queries/0_stateless/02833_sparse_columns_tuple_function.reference b/tests/queries/0_stateless/02833_sparse_columns_tuple_function.reference new file mode 100644 index 00000000000..85573e2ed49 --- /dev/null +++ b/tests/queries/0_stateless/02833_sparse_columns_tuple_function.reference @@ -0,0 +1,4 @@ +(0,0) +(0,0) +(0,1) +(0,NULL) diff --git a/tests/queries/0_stateless/02833_sparse_columns_tuple_function.sql b/tests/queries/0_stateless/02833_sparse_columns_tuple_function.sql new file mode 100644 index 00000000000..776dd35ddba --- /dev/null +++ b/tests/queries/0_stateless/02833_sparse_columns_tuple_function.sql @@ -0,0 +1,14 @@ +drop table if exists t_tuple_sparse; + +create table t_tuple_sparse (a UInt64, b UInt64) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.0; + +insert into t_tuple_sparse values (0, 0); + +select (a, b) from t_tuple_sparse; +select (a, 0) from t_tuple_sparse; +select (a, 1) from t_tuple_sparse; +select (a, NULL) from t_tuple_sparse; + +drop table if exists t_tuple_sparse; diff --git a/tests/queries/0_stateless/02833_starts_ends_with_utf8.reference b/tests/queries/0_stateless/02833_starts_ends_with_utf8.reference new file mode 100644 index 00000000000..ca2a5bc50f8 --- /dev/null +++ b/tests/queries/0_stateless/02833_starts_ends_with_utf8.reference @@ -0,0 +1,29 @@ +-- { echoOn } +select startsWithUTF8('富强民主文明和谐', '富强'); +1 +select startsWithUTF8('富强民主文明和谐', '\xe5'); +0 +select startsWithUTF8('富强民主文明和谐', ''); +1 +SELECT startsWithUTF8('123', '123'); +1 +SELECT startsWithUTF8('123', '12'); +1 +SELECT startsWithUTF8('123', '1234'); +0 +SELECT startsWithUTF8('123', ''); +1 +select endsWithUTF8('富强民主文明和谐', '和谐'); +1 +select endsWithUTF8('富强民主文明和谐', '\x90'); +0 +select endsWithUTF8('富强民主文明和谐', ''); +1 +SELECT endsWithUTF8('123', '3'); +1 +SELECT endsWithUTF8('123', '23'); +1 +SELECT endsWithUTF8('123', '32'); +0 +SELECT endsWithUTF8('123', ''); +1 diff --git a/tests/queries/0_stateless/02833_starts_ends_with_utf8.sql b/tests/queries/0_stateless/02833_starts_ends_with_utf8.sql new file mode 100644 index 00000000000..3a783dc280e --- /dev/null +++ b/tests/queries/0_stateless/02833_starts_ends_with_utf8.sql @@ -0,0 +1,19 @@ +-- { echoOn } +select startsWithUTF8('富强民主文明和谐', '富强'); +select startsWithUTF8('富强民主文明和谐', '\xe5'); +select startsWithUTF8('富强民主文明和谐', ''); + +SELECT startsWithUTF8('123', '123'); +SELECT startsWithUTF8('123', '12'); +SELECT startsWithUTF8('123', '1234'); +SELECT startsWithUTF8('123', ''); + +select endsWithUTF8('富强民主文明和谐', '和谐'); +select endsWithUTF8('富强民主文明和谐', '\x90'); +select endsWithUTF8('富强民主文明和谐', ''); + +SELECT endsWithUTF8('123', '3'); +SELECT endsWithUTF8('123', '23'); +SELECT endsWithUTF8('123', '32'); +SELECT endsWithUTF8('123', ''); +-- { echoOff } diff --git a/tests/queries/0_stateless/02833_std_alias.reference b/tests/queries/0_stateless/02833_std_alias.reference new file mode 100644 index 00000000000..d8e5e1556a7 --- /dev/null +++ b/tests/queries/0_stateless/02833_std_alias.reference @@ -0,0 +1,2 @@ +6.408619196051518 5.848925577403085 +6.408619196051518 5.848925577403085 diff --git a/tests/queries/0_stateless/02833_std_alias.sql b/tests/queries/0_stateless/02833_std_alias.sql new file mode 100644 index 00000000000..256990f3f3a --- /dev/null +++ b/tests/queries/0_stateless/02833_std_alias.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS series; +CREATE TABLE series(i UInt32, x Float64, y Float64) ENGINE = Memory; +INSERT INTO series(i, x, y) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3); + +SELECT std(x), std(y) FROM series; +SELECT stddevPop(x), stddevPop(y) FROM series; + +DROP TABLE series; diff --git a/tests/queries/0_stateless/02833_tuple_concat.reference b/tests/queries/0_stateless/02833_tuple_concat.reference new file mode 100644 index 00000000000..2c865f13ffc --- /dev/null +++ b/tests/queries/0_stateless/02833_tuple_concat.reference @@ -0,0 +1,6 @@ +(1,'y',2,'n') +(1,'y',2,'n',3,'n') +(1,2,3,'a','b','c','2020-10-08','2020-11-08') 1 2 3 a b c 2020-10-08 2020-11-08 +(1,2,1,2) 1 2 1 2 +(1,2,3,4) 1 2 3 4 +(3,4,1,2) 3 4 1 2 diff --git a/tests/queries/0_stateless/02833_tuple_concat.sql b/tests/queries/0_stateless/02833_tuple_concat.sql new file mode 100644 index 00000000000..df43e08d595 --- /dev/null +++ b/tests/queries/0_stateless/02833_tuple_concat.sql @@ -0,0 +1,23 @@ +SELECT tupleConcat(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tupleConcat((1, 'y'), 1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT tupleConcat((1, 'y'), (2, 'n')); +SELECT tupleConcat((1, 'y'), (2, 'n'), (3, 'n')); + +WITH (1,2,3) || ('a','b','c') || ('2020-10-08'::Date, '2020-11-08'::Date) AS t +SELECT t, t.1, t.2, t.3, t.4, t.5, t.6, t.7, t.8; + +DROP TABLE IF EXISTS t_02833; +CREATE TABLE t_02833 (tup Tuple(a UInt64, b UInt64)) ENGINE=Log; +INSERT INTO t_02833 VALUES ((1, 2)); + +WITH (tup || tup) AS res +SELECT res, res.1, res.2, res.3, res.4 FROM t_02833; + +WITH (tup || (3, 4)) AS res +SELECT res, res.1, res.2, res.3, res.4 FROM t_02833; + +WITH ((3, 4) || tup) AS res +SELECT res, res.1, res.2, res.3, res.4 FROM t_02833; + +DROP TABLE t_02833; diff --git a/tests/queries/0_stateless/02833_url_without_path_encoding.reference b/tests/queries/0_stateless/02833_url_without_path_encoding.reference new file mode 100644 index 00000000000..d5626230d71 --- /dev/null +++ b/tests/queries/0_stateless/02833_url_without_path_encoding.reference @@ -0,0 +1,2 @@ +4 +test%2Fa.tsv diff --git a/tests/queries/0_stateless/02833_url_without_path_encoding.sh b/tests/queries/0_stateless/02833_url_without_path_encoding.sh new file mode 100755 index 00000000000..b71586099cf --- /dev/null +++ b/tests/queries/0_stateless/02833_url_without_path_encoding.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "select count() from url('http://localhost:11111/test%2Fa.tsv') settings enable_url_encoding=1" + +# Grep 'test%2Fa.tsv' to ensure that path wasn't encoded/decoded +$CLICKHOUSE_CLIENT -q "select count() from url('http://localhost:11111/test%2Fa.tsv') settings enable_url_encoding=0" 2>&1 | grep -o "test%2Fa.tsv" -m1 + diff --git a/tests/queries/0_stateless/02833_window_func_range_offset.reference b/tests/queries/0_stateless/02833_window_func_range_offset.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02833_window_func_range_offset.sql b/tests/queries/0_stateless/02833_window_func_range_offset.sql new file mode 100644 index 00000000000..f1d26c5cbaf --- /dev/null +++ b/tests/queries/0_stateless/02833_window_func_range_offset.sql @@ -0,0 +1,6 @@ +-- invalid start offset with RANGE +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN 0.0 PRECEDING AND UNBOUNDED FOLLOWING); -- { serverError BAD_ARGUMENTS } +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN nan PRECEDING AND UNBOUNDED FOLLOWING); -- { serverError BAD_ARGUMENTS } +-- invalid end offset with RANGE +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND 0.0 FOLLOWING); -- { serverError BAD_ARGUMENTS } +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND nan FOLLOWING); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02834_alter_exception.reference b/tests/queries/0_stateless/02834_alter_exception.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02834_alter_exception.sql b/tests/queries/0_stateless/02834_alter_exception.sql new file mode 100644 index 00000000000..d42f40fcbf7 --- /dev/null +++ b/tests/queries/0_stateless/02834_alter_exception.sql @@ -0,0 +1,4 @@ +DROP TABLE IF EXISTS alter_02834; +CREATE TABLE alter_02834 (a UInt64) ENGINE=MergeTree() ORDER BY a; +ALTER TABLE alter_02834 MODIFY QUERY SELECT a FROM alter_02834; -- { serverError NOT_IMPLEMENTED } +DROP TABLE alter_02834; diff --git a/tests/queries/0_stateless/02834_analyzer_with_statement_references.reference b/tests/queries/0_stateless/02834_analyzer_with_statement_references.reference new file mode 100644 index 00000000000..19df0a2d8ae --- /dev/null +++ b/tests/queries/0_stateless/02834_analyzer_with_statement_references.reference @@ -0,0 +1,20 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 diff --git a/tests/queries/0_stateless/02834_analyzer_with_statement_references.sql b/tests/queries/0_stateless/02834_analyzer_with_statement_references.sql new file mode 100644 index 00000000000..6254c054eec --- /dev/null +++ b/tests/queries/0_stateless/02834_analyzer_with_statement_references.sql @@ -0,0 +1,7 @@ +SET allow_experimental_analyzer = 1; + +WITH test_aliases AS (SELECT number FROM numbers(20)), alias2 AS (SELECT number FROM test_aliases) +SELECT number FROM alias2 SETTINGS enable_global_with_statement = 1; + +WITH test_aliases AS (SELECT number FROM numbers(20)), alias2 AS (SELECT number FROM test_aliases) +SELECT number FROM alias2 SETTINGS enable_global_with_statement = 0; -- { serverError 60 } diff --git a/tests/queries/0_stateless/02834_array_exists_segfault.reference b/tests/queries/0_stateless/02834_array_exists_segfault.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02834_array_exists_segfault.sql b/tests/queries/0_stateless/02834_array_exists_segfault.sql new file mode 100644 index 00000000000..3cf457610fa --- /dev/null +++ b/tests/queries/0_stateless/02834_array_exists_segfault.sql @@ -0,0 +1,4 @@ +DROP TABLE IF EXISTS 02834_t; +CREATE TABLE 02834_t (id UInt64, arr Array(UInt64)) ENGINE = MergeTree ORDER BY id; +WITH subquery AS (SELECT []) SELECT t.* FROM 02834_t AS t JOIN subquery ON arrayExists(x -> x = 1, t.arr); -- { serverError INVALID_JOIN_ON_EXPRESSION } +DROP TABLE 02834_t; diff --git a/tests/queries/0_stateless/02834_client_yaml_configs.reference b/tests/queries/0_stateless/02834_client_yaml_configs.reference new file mode 100644 index 00000000000..b2eddb19e52 --- /dev/null +++ b/tests/queries/0_stateless/02834_client_yaml_configs.reference @@ -0,0 +1,3 @@ +31337 +31338 +31339 diff --git a/tests/queries/0_stateless/02834_client_yaml_configs.sh b/tests/queries/0_stateless/02834_client_yaml_configs.sh new file mode 100755 index 00000000000..66d3df8829e --- /dev/null +++ b/tests/queries/0_stateless/02834_client_yaml_configs.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-random-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +pushd "${CLICKHOUSE_TMP}" > /dev/null || exit + +echo "max_block_size: 31337" > clickhouse-client.yaml +${CLICKHOUSE_CLIENT} --query "SELECT getSetting('max_block_size')" +rm clickhouse-client.yaml + +echo "max_block_size: 31338" > clickhouse-client.yml +${CLICKHOUSE_CLIENT} --query "SELECT getSetting('max_block_size')" +rm clickhouse-client.yml + +echo "31339" > clickhouse-client.xml +${CLICKHOUSE_CLIENT} --query "SELECT getSetting('max_block_size')" +rm clickhouse-client.xml + +popd > /dev/null || exit diff --git a/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference new file mode 100644 index 00000000000..50173c150c0 --- /dev/null +++ b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference @@ -0,0 +1,76 @@ +CSV +1 1 +2 0 +0 0 +3 3 +1 1 \N \N +2 \N \N \N +\N \N \N \N +3 3 3 3 +1 1 +2 \N +\N \N +3 3 +1 0 +2 0 +0 0 +3 0 +TSV +1 1 +2 0 +0 0 +3 3 +1 1 \N \N +2 \N \N \N +\N \N \N \N +3 3 3 3 +1 1 +2 \N +\N \N +3 3 +1 0 +2 0 +0 0 +3 0 +JSONCompactEachRow +1 1 +2 0 +0 0 +3 3 +1 1 +2 0 +0 0 +3 3 +1 [1,2,3] +2 [] +0 [] +3 [3] +1 1 \N \N +2 \N \N \N +\N \N \N \N +3 3 3 3 +1 1 +2 \N +\N \N +3 3 +1 0 +2 0 +0 0 +3 0 +CustomSeparated +1 1 +2 0 +0 0 +3 3 +1 1 \N \N +2 \N \N \N +\N \N \N \N +3 3 3 3 +1 1 +2 \N +\N \N +3 3 +1 0 +2 0 +0 0 +3 0 diff --git a/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql new file mode 100644 index 00000000000..7c55cf2e9a7 --- /dev/null +++ b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql @@ -0,0 +1,24 @@ +select 'CSV'; +select * from format(CSV, 'x UInt32, y UInt32', '1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1; +select * from format(CSV, '1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1; +select * from format(CSVWithNames, '"x","y"\n1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1; +select * from format(CSVWithNames, 'x UInt32, z UInt32', '"x","y"\n1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1; +select 'TSV'; +select * from format(TSV, 'x UInt32, y UInt32', '1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1; +select * from format(TSV, '1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1; +select * from format(TSVWithNames, 'x\ty\n1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1; +select * from format(TSVWithNames, 'x UInt32, z UInt32', 'x\ty\n1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1; +select 'JSONCompactEachRow'; +select * from format(JSONCompactEachRow, 'x UInt32, y UInt32', '[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1; +select * from format(JSONCompactEachRow, 'x UInt32, y UInt32', '[1,1,[1,2,3]]\n[2]\n[]\n[3,3,3,3,[1,2,3]]') settings input_format_json_compact_allow_variable_number_of_columns=1; +select * from format(JSONCompactEachRow, 'x UInt32, y Array(UInt32)', '[1,[1,2,3],1]\n[2]\n[]\n[3,[3],3,3,[1,2,3]]') settings input_format_json_compact_allow_variable_number_of_columns=1; +select * from format(JSONCompactEachRow, '[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1; +select * from format(JSONCompactEachRowWithNames, '["x","y"]\n[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1; +select * from format(JSONCompactEachRowWithNames, 'x UInt32, z UInt32', '["x","y"]\n[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1; +select 'CustomSeparated'; +set format_custom_escaping_rule='CSV', format_custom_field_delimiter='', format_custom_row_before_delimiter='', format_custom_row_after_delimiter='', format_custom_row_between_delimiter='', format_custom_result_before_delimiter='', format_custom_result_after_delimiter=''; +select * from format(CustomSeparated, 'x UInt32, y UInt32', '1123333') settings input_format_custom_allow_variable_number_of_columns=1; +select * from format(CustomSeparated, '1123333') settings input_format_custom_allow_variable_number_of_columns=1; +select * from format(CustomSeparatedWithNames, '"x""y"1123333') settings input_format_custom_allow_variable_number_of_columns=1; +select * from format(CustomSeparatedWithNames, 'x UInt32, z UInt32', '"x""y"1123333') settings input_format_custom_allow_variable_number_of_columns=1; + diff --git a/tests/queries/0_stateless/02834_nulls_first_sort.reference b/tests/queries/0_stateless/02834_nulls_first_sort.reference new file mode 100644 index 00000000000..c16f69ac3c1 --- /dev/null +++ b/tests/queries/0_stateless/02834_nulls_first_sort.reference @@ -0,0 +1,5 @@ +5 \N 1 +5 \N 2 +5 \N 3 +5 \N 7 +5 1 1 diff --git a/tests/queries/0_stateless/02834_nulls_first_sort.sql b/tests/queries/0_stateless/02834_nulls_first_sort.sql new file mode 100644 index 00000000000..e17a49baf24 --- /dev/null +++ b/tests/queries/0_stateless/02834_nulls_first_sort.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS nulls_first_sort_test; +CREATE TABLE nulls_first_sort_test (a Nullable(Int32), b Nullable(Int32), c Nullable(Int32)) ENGINE = Memory; + +INSERT INTO nulls_first_sort_test VALUES (5,null,2), (5,null,1), (5,null,7), (5,null,3), (5,7,4), (5,7,6), (5,7,2), (5,7,1), (5,7,3), (5,7,9), (5,1,4), (5,1,6), (5,1,2), (5,1,1), (5,1,3), (5,1,9); + +SELECT * FROM nulls_first_sort_test ORDER BY a NULLS FIRST,b NULLS FIRST,c NULLS FIRST LIMIT 5; +DROP TABLE nulls_first_sort_test; diff --git a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python new file mode 100644 index 00000000000..aa9f80c751f --- /dev/null +++ b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + + +import os +import sys + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) + +from tcp_client import TCPClient + + +def get_keys(results): + return [key for key, _ in results] + + +def check_new_result(new_results, old_results, invariants, rows_limit): + if rows_limit is not None: + assert ( + len(new_results[0].value) <= rows_limit + ), f"Result should have no more then {rows_limit} rows. But it has {len(new_results[0].value)} rows" + + for new_result, old_result in zip(new_results, old_results): + assert ( + new_result.key == old_result.key + ), f"Keys in blocks should be in the same order. Full results keys {get_keys(full_results)}. Partial results keys {get_keys(partial_results)}" + + key = new_result.key + if key in invariants: + new_value = new_result.value + old_value = old_result.value + assert invariants[key]( + old_value, new_value + ), f"Problem with the invariant between new and old result for key: {key}. New value {new_value}. Old value {old_value}" + + +def run_query_without_errors( + query, support_partial_result, invariants=None, rows_limit=None +): + if invariants is None: + invariants = {} + + with TCPClient() as client: + client.sendQuery(query) + + # external tables + client.sendEmptyBlock() + client.readHeader() + + # Partial result + partial_results = client.readDataWithoutProgress() + if support_partial_result: + assert ( + len(partial_results) > 0 and len(partial_results[0].value) > 0 + ), "Expected at least one block with a non-empty partial result before getting the full result" + while True: + new_partial_results = client.readDataWithoutProgress( + need_print_info=False + ) + if len(new_partial_results[0].value) == 0: + break + + check_new_result( + new_partial_results, partial_results, invariants, rows_limit + ) + partial_results = new_partial_results + else: + block_rows = len(partial_results[0].value) + assert ( + block_rows == 0 + ), f"Expected only empty partial result block before getting the full result, but block has {block_rows} rows" + + # Full result + full_results = client.readDataWithoutProgress() + if support_partial_result: + check_new_result(full_results, partial_results, invariants, rows_limit) + + for data in full_results: + if isinstance(data.value[0], int): + print(data.key, data.value) + + +def supported_scenarios_without_key(): + rows_number = 2e7 + 1 + + # Simple aggregation query + query = f"select median(number), stddevSamp(number), stddevPop(number), max(number), min(number), any(number), count(number), avg(number), sum(number) from numbers_mt({rows_number}) settings max_threads = 1, partial_result_update_duration_ms = 1" + invariants = { + "median(number)": lambda old_value, new_value: old_value <= new_value, + "max(number)": lambda old_value, new_value: old_value <= new_value, + "min(number)": lambda old_value, new_value: old_value >= new_value, + "count(number)": lambda old_value, new_value: old_value <= new_value, + "avg(number)": lambda old_value, new_value: old_value <= new_value, + "sum(number)": lambda old_value, new_value: old_value <= new_value, + } + run_query_without_errors( + query, support_partial_result=True, invariants=invariants, rows_limit=1 + ) + + # Aggregation query with a nested ORDER BY subquery + query = f"select median(number), stddevSamp(number), stddevPop(number), max(number), min(number), any(number), count(number), avg(number), sum(number) FROM (SELECT number FROM numbers_mt({rows_number}) ORDER BY -number LIMIT 3) settings max_threads = 1, partial_result_update_duration_ms=1" + + # Aggregation receives small partial result blocks from ORDER BY which always sends blocks with bigger values + invariants["min(number)"] = lambda old_value, new_value: old_value <= new_value + run_query_without_errors( + query, support_partial_result=True, invariants=invariants, rows_limit=1 + ) + + +def unsupported_scenarios(): + rows_number = 2e7 + 1 + + # Currently aggregator for partial result supports only single thread aggregation without key + # Update test when multithreading or aggregation with GROUP BY will be supported for partial result updates + multithread_query = f"select sum(number) from numbers_mt({rows_number}) settings max_threads = 2, partial_result_update_duration_ms = 100" + run_query_without_errors(multithread_query, support_partial_result=False) + + group_with_key_query = f"select mod2, sum(number) from numbers_mt({rows_number}) group by number % 2 as mod2 settings max_threads = 1, partial_result_update_duration_ms = 100" + run_query_without_errors(group_with_key_query, support_partial_result=False) + + +def main(): + supported_scenarios_without_key() + unsupported_scenarios() + + +if __name__ == "__main__": + main() diff --git a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.reference b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.reference new file mode 100644 index 00000000000..aea61fad42f --- /dev/null +++ b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.reference @@ -0,0 +1,88 @@ +Rows 0 Columns 9 +Column median(number) type Float64 +Column stddevSamp(number) type Float64 +Column stddevPop(number) type Float64 +Column max(number) type UInt64 +Column min(number) type UInt64 +Column any(number) type UInt64 +Column count(number) type UInt64 +Column avg(number) type Float64 +Column sum(number) type UInt64 +Rows 1 Columns 9 +Column median(number) type Float64 +Column stddevSamp(number) type Float64 +Column stddevPop(number) type Float64 +Column max(number) type UInt64 +Column min(number) type UInt64 +Column any(number) type UInt64 +Column count(number) type UInt64 +Column avg(number) type Float64 +Column sum(number) type UInt64 +Rows 1 Columns 9 +Column median(number) type Float64 +Column stddevSamp(number) type Float64 +Column stddevPop(number) type Float64 +Column max(number) type UInt64 +Column min(number) type UInt64 +Column any(number) type UInt64 +Column count(number) type UInt64 +Column avg(number) type Float64 +Column sum(number) type UInt64 +max(number) [20000000] +min(number) [0] +any(number) [0] +count(number) [20000001] +sum(number) [200000010000000] +Rows 0 Columns 9 +Column median(number) type Float64 +Column stddevSamp(number) type Float64 +Column stddevPop(number) type Float64 +Column max(number) type UInt64 +Column min(number) type UInt64 +Column any(number) type UInt64 +Column count(number) type UInt64 +Column avg(number) type Float64 +Column sum(number) type UInt64 +Rows 1 Columns 9 +Column median(number) type Float64 +Column stddevSamp(number) type Float64 +Column stddevPop(number) type Float64 +Column max(number) type UInt64 +Column min(number) type UInt64 +Column any(number) type UInt64 +Column count(number) type UInt64 +Column avg(number) type Float64 +Column sum(number) type UInt64 +Rows 1 Columns 9 +Column median(number) type Float64 +Column stddevSamp(number) type Float64 +Column stddevPop(number) type Float64 +Column max(number) type UInt64 +Column min(number) type UInt64 +Column any(number) type UInt64 +Column count(number) type UInt64 +Column avg(number) type Float64 +Column sum(number) type UInt64 +max(number) [20000000] +min(number) [19999998] +any(number) [20000000] +count(number) [3] +sum(number) [59999997] +Rows 0 Columns 1 +Column sum(number) type UInt64 +Rows 0 Columns 1 +Column sum(number) type UInt64 +Rows 1 Columns 1 +Column sum(number) type UInt64 +sum(number) [200000010000000] +Rows 0 Columns 2 +Column mod2 type UInt8 +Column sum(number) type UInt64 +Rows 0 Columns 2 +Column mod2 type UInt8 +Column sum(number) type UInt64 +Rows 2 Columns 2 +Column mod2 type UInt8 +Column sum(number) type UInt64 +mod2 [0, 1] +sum(number) [100000010000000, 100000000000000] diff --git a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.sh b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.sh new file mode 100755 index 00000000000..e70a3c53ec4 --- /dev/null +++ b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test +python3 "$CURDIR"/02834_partial_aggregating_result_during_query_execution.python diff --git a/tests/queries/0_stateless/02834_remote_session_log.reference b/tests/queries/0_stateless/02834_remote_session_log.reference new file mode 100644 index 00000000000..e2680982ab0 --- /dev/null +++ b/tests/queries/0_stateless/02834_remote_session_log.reference @@ -0,0 +1,13 @@ +0 +0 +0 +0 +client_port 0 connections: +0 +client_address '::' connections: +0 +login failures: +0 +TCP Login and logout count is equal +HTTP Login and logout count is equal +MySQL Login and logout count is equal diff --git a/tests/queries/0_stateless/02834_remote_session_log.sh b/tests/queries/0_stateless/02834_remote_session_log.sh new file mode 100755 index 00000000000..3bedfb6c9ee --- /dev/null +++ b/tests/queries/0_stateless/02834_remote_session_log.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +readonly PID=$$ +readonly TEST_USER=$"02834_USER_${PID}" +readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface" + +${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${TEST_USER} IDENTIFIED WITH plaintext_password BY 'pass'" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON INFORMATION_SCHEMA.* TO ${TEST_USER}" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.* TO ${TEST_USER}" +${CLICKHOUSE_CLIENT} -q "GRANT CREATE TEMPORARY TABLE, MYSQL, REMOTE ON *.* TO ${TEST_USER}" + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user = '${TEST_USER}'" + +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${TEST_USER}&password=pass" \ + -d "SELECT * FROM remote('127.0.0.1:${CLICKHOUSE_PORT_TCP}', 'system', 'one', '${TEST_USER}', 'pass')" + +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${TEST_USER}&password=pass" \ + -d "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', '${TEST_USER}', 'pass')" + +${CLICKHOUSE_CLIENT} -q "SELECT * FROM remote('127.0.0.1:${CLICKHOUSE_PORT_TCP}', 'system', 'one', '${TEST_USER}', 'pass')" -u "${TEST_USER}" --password "pass" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', '${TEST_USER}', 'pass')" -u "${TEST_USER}" --password "pass" + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" + +echo "client_port 0 connections:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and client_port = 0" + +echo "client_address '::' connections:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and client_address = toIPv6('::')" + +echo "login failures:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and type = 'LoginFailure'" + +# remote(...) function sometimes reuses old cached sessions for query execution. +# This makes LoginSuccess/Logout entries count unstable, but success and logouts must always match. + +for interface in 'TCP' 'HTTP' 'MySQL' +do + LOGIN_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' AND interface = '${interface}'"` + CORRESPONDING_LOGOUT_RECORDS_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' AND interface = '${interface}' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout' AND interface = '${interface}')"` + + if [ "$LOGIN_COUNT" == "$CORRESPONDING_LOGOUT_RECORDS_COUNT" ]; then + echo "${interface} Login and logout count is equal" + else + TOTAL_LOGOUT_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout' AND interface = '${interface}'"` + echo "${interface} Login count ${LOGIN_COUNT} != corresponding logout count ${CORRESPONDING_LOGOUT_RECORDS_COUNT}. TOTAL_LOGOUT_COUNT ${TOTAL_LOGOUT_COUNT}" + fi +done + +${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}" diff --git a/tests/queries/0_stateless/02834_sparse_columns_sort_with_limit.reference b/tests/queries/0_stateless/02834_sparse_columns_sort_with_limit.reference new file mode 100644 index 00000000000..e2ed8f4daf2 --- /dev/null +++ b/tests/queries/0_stateless/02834_sparse_columns_sort_with_limit.reference @@ -0,0 +1 @@ +65536 diff --git a/tests/queries/0_stateless/02834_sparse_columns_sort_with_limit.sql b/tests/queries/0_stateless/02834_sparse_columns_sort_with_limit.sql new file mode 100644 index 00000000000..32bd9694bd0 --- /dev/null +++ b/tests/queries/0_stateless/02834_sparse_columns_sort_with_limit.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS t_sparse_sort_limit; + +CREATE TABLE t_sparse_sort_limit (date Date, i UInt64, v Int16) +ENGINE = MergeTree ORDER BY (date, i) +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; + +INSERT INTO t_sparse_sort_limit SELECT '2020-10-10', number % 10, number FROM numbers(100000); +INSERT INTO t_sparse_sort_limit SELECT '2020-10-11', number % 10, number FROM numbers(100000); + +SELECT count() FROM (SELECT toStartOfMonth(date) AS d FROM t_sparse_sort_limit ORDER BY -i LIMIT 65536); + +DROP TABLE IF EXISTS t_sparse_sort_limit; diff --git a/tests/queries/0_stateless/02835_drop_user_during_session.reference b/tests/queries/0_stateless/02835_drop_user_during_session.reference new file mode 100644 index 00000000000..7252faab8c6 --- /dev/null +++ b/tests/queries/0_stateless/02835_drop_user_during_session.reference @@ -0,0 +1,8 @@ +port_0_sessions: +0 +address_0_sessions: +0 +Corresponding LoginSuccess/Logout +9 +LoginFailure +0 diff --git a/tests/queries/0_stateless/02835_drop_user_during_session.sh b/tests/queries/0_stateless/02835_drop_user_during_session.sh new file mode 100755 index 00000000000..347ebd22f96 --- /dev/null +++ b/tests/queries/0_stateless/02835_drop_user_during_session.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env bash +# Tags: no-debug + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +readonly PID=$$ + +readonly TEST_USER="02835_USER_${PID}" +readonly TEST_ROLE="02835_ROLE_${PID}" +readonly TEST_PROFILE="02835_PROFILE_${PID}" +readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface" + +function tcp_session() +{ + local user=$1 + ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.numbers" --user="${user}" +} + +function http_session() +{ + local user=$1 + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT COUNT(*) FROM system.numbers" +} + +function http_with_session_id_session() +{ + local user=$1 + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT COUNT(*) FROM system.numbers" +} + +# Busy-waits until user $1, specified amount of queries ($2) will run simultaneously. +function wait_for_queries_start() +{ + local user=$1 + local queries_count=$2 + # 10 seconds waiting + counter=0 retries=100 + while [[ $counter -lt $retries ]]; do + result=$($CLICKHOUSE_CLIENT --query "SELECT COUNT(*) FROM system.processes WHERE user = '${user}'") + if [[ $result == "${queries_count}" ]]; then + break; + fi + sleep 0.1 + ((++counter)) + done +} + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user = '${TEST_USER}'" + +# DROP USE CASE +${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${TEST_USER}" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}" + +export -f tcp_session; +export -f http_session; +export -f http_with_session_id_session; + +timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 & + +wait_for_queries_start $TEST_USER 3 +${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}" +${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null & + +wait + +# DROP ROLE CASE +${CLICKHOUSE_CLIENT} -q "CREATE ROLE IF NOT EXISTS ${TEST_ROLE}" +${CLICKHOUSE_CLIENT} -q "CREATE USER ${TEST_USER} DEFAULT ROLE ${TEST_ROLE}" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}" + +timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 & + +wait_for_queries_start $TEST_USER 3 +${CLICKHOUSE_CLIENT} -q "DROP ROLE ${TEST_ROLE}" +${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}" + +${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null & + +wait + +# DROP PROFILE CASE +${CLICKHOUSE_CLIENT} -q "CREATE SETTINGS PROFILE IF NOT EXISTS '${TEST_PROFILE}'" +${CLICKHOUSE_CLIENT} -q "CREATE USER ${TEST_USER} SETTINGS PROFILE '${TEST_PROFILE}'" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}" + +timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 & + +wait_for_queries_start $TEST_USER 3 +${CLICKHOUSE_CLIENT} -q "DROP SETTINGS PROFILE '${TEST_PROFILE}'" +${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}" + +${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null & + +wait + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" + +echo "port_0_sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND client_port = 0" +echo "address_0_sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND client_address = toIPv6('::')" +echo "Corresponding LoginSuccess/Logout" +${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS}, FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout')" +echo "LoginFailure" +${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginFailure'" diff --git a/tests/queries/0_stateless/02835_fuzz_remove_redundant_sorting.reference b/tests/queries/0_stateless/02835_fuzz_remove_redundant_sorting.reference new file mode 100644 index 00000000000..5fda23e0114 --- /dev/null +++ b/tests/queries/0_stateless/02835_fuzz_remove_redundant_sorting.reference @@ -0,0 +1,3 @@ +\N + +\N diff --git a/tests/queries/0_stateless/02835_fuzz_remove_redundant_sorting.sql b/tests/queries/0_stateless/02835_fuzz_remove_redundant_sorting.sql new file mode 100644 index 00000000000..bdbc5594189 --- /dev/null +++ b/tests/queries/0_stateless/02835_fuzz_remove_redundant_sorting.sql @@ -0,0 +1,5 @@ +DROP TABLE IF EXISTS numbers500k; +CREATE TABLE numbers500k (`number` UInt32) ENGINE = MergeTree() ORDER BY tuple(); +INSERT INTO numbers500k SELECT number FROM system.numbers LIMIT 500000; +SELECT intDiv(number, NULL) AS k FROM (SELECT * FROM remote('127.0.0.{2,3}', currentDatabase(), numbers500k) PREWHERE 31 WHERE 65537 > 0 ORDER BY number DESC NULLS FIRST) GROUP BY GROUPING SETS ((k)) WITH TOTALS ORDER BY k ASC NULLS LAST LIMIT 2147483648; +DROP TABLE IF EXISTS numbers500k; diff --git a/tests/queries/0_stateless/02835_join_step_explain.reference b/tests/queries/0_stateless/02835_join_step_explain.reference new file mode 100644 index 00000000000..0cc2e802682 --- /dev/null +++ b/tests/queries/0_stateless/02835_join_step_explain.reference @@ -0,0 +1,116 @@ +Expression ((Project names + (Projection + DROP unused columns after JOIN))) +Header: id UInt64 + value_1 String + rhs.id UInt64 + rhs.value_1 String +Actions: INPUT : 0 -> id_0 UInt64 : 0 + INPUT : 1 -> value_1_1 String : 1 + INPUT : 2 -> value_1_3 String : 2 + INPUT : 3 -> id_2 UInt64 : 3 + ALIAS id_0 :: 0 -> id UInt64 : 4 + ALIAS value_1_1 :: 1 -> value_1 String : 0 + ALIAS value_1_3 :: 2 -> rhs.value_1 String : 1 + ALIAS id_2 :: 3 -> rhs.id UInt64 : 2 +Positions: 4 0 2 1 + Join (JOIN FillRightFirst) + Header: id_0 UInt64 + value_1_1 String + value_1_3 String + id_2 UInt64 + Type: INNER + Strictness: ALL + Algorithm: HashJoin + Clauses: [(id_0) = (id_2)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: id_0 UInt64 + value_1_1 String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value_1 String : 1 + ALIAS id :: 0 -> id_0 UInt64 : 2 + ALIAS value_1 :: 1 -> value_1_1 String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value_1 String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: id_2 UInt64 + value_1_3 String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value_1 String : 1 + ALIAS id :: 0 -> id_2 UInt64 : 2 + ALIAS value_1 :: 1 -> value_1_3 String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value_1 String + ReadType: Default + Parts: 1 + Granules: 1 +-- +Expression ((Project names + (Projection + DROP unused columns after JOIN))) +Header: id UInt64 + value_1 String + rhs.id UInt64 + rhs.value_1 String +Actions: INPUT : 0 -> id_0 UInt64 : 0 + INPUT : 1 -> value_1_1 String : 1 + INPUT :: 2 -> value_2_4 UInt64 : 2 + INPUT : 3 -> value_1_3 String : 3 + INPUT :: 4 -> value_2_5 UInt64 : 4 + INPUT : 5 -> id_2 UInt64 : 5 + ALIAS id_0 :: 0 -> id UInt64 : 6 + ALIAS value_1_1 :: 1 -> value_1 String : 0 + ALIAS value_1_3 :: 3 -> rhs.value_1 String : 1 + ALIAS id_2 :: 5 -> rhs.id UInt64 : 3 +Positions: 6 0 3 1 + Join (JOIN FillRightFirst) + Header: id_0 UInt64 + value_1_1 String + value_2_4 UInt64 + value_1_3 String + value_2_5 UInt64 + id_2 UInt64 + Type: INNER + Strictness: ASOF + Algorithm: HashJoin + ASOF inequality: LESS + Clauses: [(id_0, value_2_4) = (id_2, value_2_5)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: id_0 UInt64 + value_1_1 String + value_2_4 UInt64 + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value_1 String : 1 + INPUT : 2 -> value_2 UInt64 : 2 + ALIAS id :: 0 -> id_0 UInt64 : 3 + ALIAS value_1 :: 1 -> value_1_1 String : 0 + ALIAS value_2 :: 2 -> value_2_4 UInt64 : 1 + Positions: 3 0 1 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value_1 String + value_2 UInt64 + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: id_2 UInt64 + value_1_3 String + value_2_5 UInt64 + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value_1 String : 1 + INPUT : 2 -> value_2 UInt64 : 2 + ALIAS id :: 0 -> id_2 UInt64 : 3 + ALIAS value_1 :: 1 -> value_1_3 String : 0 + ALIAS value_2 :: 2 -> value_2_5 UInt64 : 1 + Positions: 3 0 1 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value_1 String + value_2 UInt64 + ReadType: Default + Parts: 1 + Granules: 1 diff --git a/tests/queries/0_stateless/02835_join_step_explain.sql b/tests/queries/0_stateless/02835_join_step_explain.sql new file mode 100644 index 00000000000..d0475fa14b6 --- /dev/null +++ b/tests/queries/0_stateless/02835_join_step_explain.sql @@ -0,0 +1,31 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table_1; +CREATE TABLE test_table_1 +( + id UInt64, + value_1 String, + value_2 UInt64 +) ENGINE=MergeTree ORDER BY id; + +DROP TABLE IF EXISTS test_table_2; +CREATE TABLE test_table_2 +( + id UInt64, + value_1 String, + value_2 UInt64 +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table_1 VALUES (0, 'Value', 0); +INSERT INTO test_table_2 VALUES (0, 'Value', 0); + +EXPLAIN header = 1, actions = 1 SELECT lhs.id, lhs.value_1, rhs.id, rhs.value_1 +FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 SELECT lhs.id, lhs.value_1, rhs.id, rhs.value_1 +FROM test_table_1 AS lhs ASOF JOIN test_table_2 AS rhs ON lhs.id = rhs.id AND lhs.value_2 < rhs.value_2; + +DROP TABLE test_table_1; +DROP TABLE test_table_2; diff --git a/tests/queries/0_stateless/02835_nested_array_lowcardinality.reference b/tests/queries/0_stateless/02835_nested_array_lowcardinality.reference new file mode 100644 index 00000000000..c2936da0b4f --- /dev/null +++ b/tests/queries/0_stateless/02835_nested_array_lowcardinality.reference @@ -0,0 +1,30 @@ +[] [] +['0'] [''] +['0','1'] ['',''] +['0','1','2'] ['','',''] +['0','1','2','3'] ['','','',''] +['0','1','2','3','4'] ['','','','',''] +['0','1','2','3','4','5'] ['','','','','',''] +['0','1','2','3','4','5','6'] ['','','','','','',''] +['0','1','2','3','4','5','6','7'] ['','','','','','','',''] +['0','1','2','3','4','5','6','7','8'] ['','','','','','','','',''] +[] [] +[[]] [[]] +[[],['0']] [[],[]] +[[],['0'],['0','1']] [[],[],[]] +[[],['0'],['0','1'],['0','1','2']] [[],[],[],[]] +[[],['0'],['0','1'],['0','1','2'],[]] [[],[],[],[],[]] +[[],['0'],['0','1'],['0','1','2'],[],['0']] [[],[],[],[],[],[]] +[[],['0'],['0','1'],['0','1','2'],[],['0'],['0','1']] [[],[],[],[],[],[],[]] +[[],['0'],['0','1'],['0','1','2'],[],['0'],['0','1'],['0','1','2']] [[],[],[],[],[],[],[],[]] +[[],['0'],['0','1'],['0','1','2'],[],['0'],['0','1'],['0','1','2'],[]] [[],[],[],[],[],[],[],[],[]] +[] [] +[{}] [{}] +[{},{'k0':0}] [{},{}] +[{},{'k0':0},{'k0':0,'k1':1}] [{},{},{}] +[{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2}] [{},{},{},{}] +[{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2},{}] [{},{},{},{},{}] +[{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2},{},{'k0':0}] [{},{},{},{},{},{}] +[{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2},{},{'k0':0},{'k0':0,'k1':1}] [{},{},{},{},{},{},{}] +[{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2},{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2}] [{},{},{},{},{},{},{},{}] +[{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2},{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2},{}] [{},{},{},{},{},{},{},{},{}] diff --git a/tests/queries/0_stateless/02835_nested_array_lowcardinality.sql b/tests/queries/0_stateless/02835_nested_array_lowcardinality.sql new file mode 100644 index 00000000000..36c1eb39cfd --- /dev/null +++ b/tests/queries/0_stateless/02835_nested_array_lowcardinality.sql @@ -0,0 +1,49 @@ +DROP TABLE IF EXISTS cool_table; + +CREATE TABLE IF NOT EXISTS cool_table +( + id UInt64, + n Nested(n UInt64, lc1 LowCardinality(String)) +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO cool_table SELECT number, range(number), range(number) FROM numbers(10); + +ALTER TABLE cool_table ADD COLUMN IF NOT EXISTS `n.lc2` Array(LowCardinality(String)); + +SELECT n.lc1, n.lc2 FROM cool_table ORDER BY id; + +DROP TABLE IF EXISTS cool_table; + +CREATE TABLE IF NOT EXISTS cool_table +( + id UInt64, + n Nested(n UInt64, lc1 Array(LowCardinality(String))) +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO cool_table SELECT number, range(number), arrayMap(x -> range(x % 4), range(number)) FROM numbers(10); + +ALTER TABLE cool_table ADD COLUMN IF NOT EXISTS `n.lc2` Array(Array(LowCardinality(String))); + +SELECT n.lc1, n.lc2 FROM cool_table ORDER BY id; + +DROP TABLE IF EXISTS cool_table; + +CREATE TABLE IF NOT EXISTS cool_table +( + id UInt64, + n Nested(n UInt64, lc1 Map(LowCardinality(String), UInt64)) +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO cool_table SELECT number, range(number), arrayMap(x -> (arrayMap(y -> 'k' || toString(y), range(x % 4)), range(x % 4))::Map(LowCardinality(String), UInt64), range(number)) FROM numbers(10); + +ALTER TABLE cool_table ADD COLUMN IF NOT EXISTS `n.lc2` Array(Map(LowCardinality(String), UInt64)); + +SELECT n.lc1, n.lc2 FROM cool_table ORDER BY id; + +DROP TABLE IF EXISTS cool_table; diff --git a/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.reference b/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.reference new file mode 100644 index 00000000000..e6a24987c0d --- /dev/null +++ b/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.reference @@ -0,0 +1,6 @@ +-- 1 shard, 3 replicas +100 0 99 49.5 +200 0 99 49.5 +-- 2 shards, 3 replicas each +200 0 99 49.5 +400 0 99 49.5 diff --git a/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.sql b/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.sql new file mode 100644 index 00000000000..60aa5748575 --- /dev/null +++ b/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.sql @@ -0,0 +1,47 @@ +-- 1 shard + +SELECT '-- 1 shard, 3 replicas'; +DROP TABLE IF EXISTS test_d; +DROP TABLE IF EXISTS test; +CREATE TABLE test (id UInt64, date Date) +ENGINE = MergeTree +ORDER BY id; + +CREATE TABLE IF NOT EXISTS test_d as test +ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test); + +insert into test select *, today() from numbers(100); + +SELECT count(), min(id), max(id), avg(id) +FROM test_d +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0; + +insert into test select *, today() from numbers(100); + +SELECT count(), min(id), max(id), avg(id) +FROM test_d +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0; + +-- 2 shards + +SELECT '-- 2 shards, 3 replicas each'; +DROP TABLE IF EXISTS test2_d; +DROP TABLE IF EXISTS test2; +CREATE TABLE test2 (id UInt64, date Date) +ENGINE = MergeTree +ORDER BY id; + +CREATE TABLE IF NOT EXISTS test2_d as test2 +ENGINE = Distributed(test_cluster_two_shard_three_replicas_localhost, currentDatabase(), test2, id); + +insert into test2 select *, today() from numbers(100); + +SELECT count(), min(id), max(id), avg(id) +FROM test2_d +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0; + +insert into test2 select *, today() from numbers(100); + +SELECT count(), min(id), max(id), avg(id) +FROM test2_d +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0; diff --git a/tests/queries/0_stateless/02836_file_diagnostics_while_reading_header.reference b/tests/queries/0_stateless/02836_file_diagnostics_while_reading_header.reference new file mode 100644 index 00000000000..6829f972684 --- /dev/null +++ b/tests/queries/0_stateless/02836_file_diagnostics_while_reading_header.reference @@ -0,0 +1,2 @@ +in file/uri +test.csv diff --git a/tests/queries/0_stateless/02836_file_diagnostics_while_reading_header.sh b/tests/queries/0_stateless/02836_file_diagnostics_while_reading_header.sh new file mode 100755 index 00000000000..d1b5ffa2af8 --- /dev/null +++ b/tests/queries/0_stateless/02836_file_diagnostics_while_reading_header.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +FILENAME="${CLICKHOUSE_TMP}/test.csv" + +printf 'Bad\rHeader\n123\n' > "${FILENAME}" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM file('${CLICKHOUSE_TMP}/t*e*s*t.csv')" 2>&1 | grep -o -P 'in file/uri|test\.csv' +rm "${FILENAME}" diff --git a/tests/queries/0_stateless/02840_merge__table_or_filter.reference b/tests/queries/0_stateless/02840_merge__table_or_filter.reference new file mode 100644 index 00000000000..ff5e0865a22 --- /dev/null +++ b/tests/queries/0_stateless/02840_merge__table_or_filter.reference @@ -0,0 +1,38 @@ +-- { echoOn } + +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings allow_experimental_analyzer=0, convert_query_to_cnf=0; +v1 1 +v1 2 +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings allow_experimental_analyzer=0, convert_query_to_cnf=0; +v1 1 +v2 2 +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=0, convert_query_to_cnf=0; +v1 1 +select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=0, convert_query_to_cnf=0; +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings allow_experimental_analyzer=0, convert_query_to_cnf=1; +v1 1 +v1 2 +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings allow_experimental_analyzer=0, convert_query_to_cnf=1; +v1 1 +v2 2 +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=0, convert_query_to_cnf=1; +v1 1 +select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=0, convert_query_to_cnf=1; +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings allow_experimental_analyzer=1, convert_query_to_cnf=0; +v1 1 +v1 2 +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings allow_experimental_analyzer=1, convert_query_to_cnf=0; +v1 1 +v2 2 +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=1, convert_query_to_cnf=0; +v1 1 +select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=1, convert_query_to_cnf=0; +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings allow_experimental_analyzer=1, convert_query_to_cnf=1; +v1 1 +v1 2 +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings allow_experimental_analyzer=1, convert_query_to_cnf=1; +v1 1 +v2 2 +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=1, convert_query_to_cnf=1; +v1 1 +select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=1, convert_query_to_cnf=1; diff --git a/tests/queries/0_stateless/02840_merge__table_or_filter.sql.j2 b/tests/queries/0_stateless/02840_merge__table_or_filter.sql.j2 new file mode 100644 index 00000000000..a87ef7302c6 --- /dev/null +++ b/tests/queries/0_stateless/02840_merge__table_or_filter.sql.j2 @@ -0,0 +1,34 @@ +drop table if exists m; +drop view if exists v1; +drop view if exists v2; +drop table if exists d1; +drop table if exists d2; + +create table d1 (key Int, value Int) engine=Memory(); +create table d2 (key Int, value Int) engine=Memory(); + +insert into d1 values (1, 10); +insert into d1 values (2, 20); + +insert into d2 values (1, 10); +insert into d2 values (2, 20); + +create view v1 as select * from d1; +create view v2 as select * from d2; + +create table m as v1 engine=Merge(currentDatabase(), '^(v1|v2)$'); + +-- avoid reorder +set max_threads=1; +-- { echoOn } +{% for settings in [ + 'allow_experimental_analyzer=0, convert_query_to_cnf=0', + 'allow_experimental_analyzer=0, convert_query_to_cnf=1', + 'allow_experimental_analyzer=1, convert_query_to_cnf=0', + 'allow_experimental_analyzer=1, convert_query_to_cnf=1' +] %} +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings {{ settings }}; +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings {{ settings }}; +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings {{ settings }}; +select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings {{ settings }}; +{% endfor %} diff --git a/tests/queries/0_stateless/02841_join_filter_set_sparse.reference b/tests/queries/0_stateless/02841_join_filter_set_sparse.reference new file mode 100644 index 00000000000..1777e2e42f7 --- /dev/null +++ b/tests/queries/0_stateless/02841_join_filter_set_sparse.reference @@ -0,0 +1,2 @@ +3428033 +3428033 diff --git a/tests/queries/0_stateless/02841_join_filter_set_sparse.sql b/tests/queries/0_stateless/02841_join_filter_set_sparse.sql new file mode 100644 index 00000000000..e1a33998d4f --- /dev/null +++ b/tests/queries/0_stateless/02841_join_filter_set_sparse.sql @@ -0,0 +1,22 @@ + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (s String) ENGINE = MergeTree ORDER BY s +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.5; + +INSERT INTO t1 SELECT if (number % 13 = 0, toString(number), '') FROM numbers(2000); + +CREATE TABLE t2 (s String) ENGINE = MergeTree ORDER BY s +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.5; + +INSERT INTO t2 SELECT if (number % 14 = 0, toString(number), '') FROM numbers(2000); + +SELECT countIf(ignore(*) == 0) FROM t1 JOIN t2 ON t1.s = t2.s; + +SET join_algorithm = 'full_sorting_merge', max_rows_in_set_to_optimize_join = 100_000; + +SELECT countIf(ignore(*) == 0) FROM t1 JOIN t2 ON t1.s = t2.s; + +DROP TABLE t1; +DROP TABLE t2; diff --git a/tests/queries/0_stateless/02841_local_assert.reference b/tests/queries/0_stateless/02841_local_assert.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02841_local_assert.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02841_local_assert.sh b/tests/queries/0_stateless/02841_local_assert.sh new file mode 100755 index 00000000000..a167c09da1f --- /dev/null +++ b/tests/queries/0_stateless/02841_local_assert.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "create table test (x UInt64) engine=Memory; +insert into test from infile 'data'; -- {clientError BAD_ARGUMENTS}" | $CLICKHOUSE_LOCAL -nm + +echo "create table test (x UInt64) engine=Memory; +insert into test from infile 'data';" | $CLICKHOUSE_LOCAL -nm --ignore-error + +echo "create table test (x UInt64) engine=Memory; +insert into test from infile 'data'; -- {clientError BAD_ARGUMENTS} +select 1" | $CLICKHOUSE_LOCAL -nm + diff --git a/tests/queries/0_stateless/02841_not_ready_set_bug.reference b/tests/queries/0_stateless/02841_not_ready_set_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02841_not_ready_set_bug.sh b/tests/queries/0_stateless/02841_not_ready_set_bug.sh new file mode 100755 index 00000000000..fd7f62d28bf --- /dev/null +++ b/tests/queries/0_stateless/02841_not_ready_set_bug.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists t1;" +$CLICKHOUSE_CLIENT -q "create table t1 (number UInt64) engine = MergeTree order by tuple();" +$CLICKHOUSE_CLIENT -q "insert into t1 select number from numbers(10);" +$CLICKHOUSE_CLIENT --max_threads=2 --max_result_rows=1 --result_overflow_mode=break -q "with tab as (select min(number) from t1 prewhere number in (select number from view(select number, row_number() OVER (partition by number % 2 ORDER BY number DESC) from numbers_mt(1e4)) where number != 2 order by number)) select number from t1 union all select * from tab;" > /dev/null + diff --git a/tests/queries/0_stateless/02841_parallel_final_wrong_columns_order.reference b/tests/queries/0_stateless/02841_parallel_final_wrong_columns_order.reference new file mode 100644 index 00000000000..749fce669df --- /dev/null +++ b/tests/queries/0_stateless/02841_parallel_final_wrong_columns_order.reference @@ -0,0 +1 @@ +1000000 diff --git a/tests/queries/0_stateless/02841_parallel_final_wrong_columns_order.sql b/tests/queries/0_stateless/02841_parallel_final_wrong_columns_order.sql new file mode 100644 index 00000000000..db15abb28cb --- /dev/null +++ b/tests/queries/0_stateless/02841_parallel_final_wrong_columns_order.sql @@ -0,0 +1,9 @@ +-- Tags: no-random-merge-tree-settings +-- Because we insert one million rows, it shouldn't choose too low index granularity. + +drop table if exists tab2; +create table tab2 (id String, version Int64, l String, accountCode String, z Int32) engine = ReplacingMergeTree(z) PRIMARY KEY (accountCode, id) ORDER BY (accountCode, id, version, l); +insert into tab2 select toString(number), number, toString(number), toString(number), 0 from numbers(1e6); +set max_threads=2; +select count() from tab2 final; +DROP TABLE tab2; diff --git a/tests/queries/0_stateless/02841_parallel_replicas_summary.reference b/tests/queries/0_stateless/02841_parallel_replicas_summary.reference new file mode 100644 index 00000000000..9b7fd74dbaa --- /dev/null +++ b/tests/queries/0_stateless/02841_parallel_replicas_summary.reference @@ -0,0 +1,4 @@ +1 +1 +02841_summary_default_interactive_0 1 +02841_summary_default_interactive_high 1 diff --git a/tests/queries/0_stateless/02841_parallel_replicas_summary.sh b/tests/queries/0_stateless/02841_parallel_replicas_summary.sh new file mode 100755 index 00000000000..792c45b06d6 --- /dev/null +++ b/tests/queries/0_stateless/02841_parallel_replicas_summary.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function involved_parallel_replicas () { + # Not using current_database = '$CLICKHOUSE_DATABASE' as nested parallel queries aren't run with it + $CLICKHOUSE_CLIENT --query " + SELECT + initial_query_id, + countIf(initial_query_id != query_id) != 0 as parallel_replicas_were_used + FROM system.query_log + WHERE event_date >= yesterday() + AND initial_query_id LIKE '$1%' + GROUP BY initial_query_id + ORDER BY min(event_time_microseconds) ASC + FORMAT TSV" +} + +$CLICKHOUSE_CLIENT --query "CREATE TABLE replicas_summary (n Int64) ENGINE = MergeTree() ORDER BY n AS Select * from numbers(100_000)" + +# Note that we are not verifying the exact read rows and bytes (apart from not being 0) for 2 reasons: +# - Different block sizes lead to different read rows +# - Depending on how fast the replicas are they might need data that ends up being discarded because the coordinator +# already has enough (but it has been read in parallel, so it's reported). + +query_id_base="02841_summary_$CLICKHOUSE_DATABASE" + +echo " + SELECT * + FROM replicas_summary + LIMIT 100 + SETTINGS + max_parallel_replicas = 2, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + allow_experimental_parallel_reading_from_replicas = 2, + parallel_replicas_for_non_replicated_merge_tree = 1, + use_hedged_requests = 0, + interactive_delay=0 + "\ + | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query_id=${query_id_base}_interactive_0" --data-binary @- -vvv 2>&1 \ + | grep "Summary" | grep -cv '"read_rows":"0"' + +echo " + SELECT * + FROM replicas_summary + LIMIT 100 + SETTINGS + max_parallel_replicas = 2, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + allow_experimental_parallel_reading_from_replicas = 2, + parallel_replicas_for_non_replicated_merge_tree = 1, + use_hedged_requests = 0, + interactive_delay=99999999999 + "\ + | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query_id=${query_id_base}_interactive_high" --data-binary @- -vvv 2>&1 \ + | grep "Summary" | grep -cv '"read_rows":"0"' + +$CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" +involved_parallel_replicas "${query_id_base}" diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown.reference b/tests/queries/0_stateless/02841_parquet_filter_pushdown.reference new file mode 100644 index 00000000000..4adf418bcc7 --- /dev/null +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown.reference @@ -0,0 +1,73 @@ +number Nullable(UInt64) +u8 Nullable(UInt8) +u16 Nullable(UInt16) +u32 Nullable(UInt32) +u64 Nullable(UInt64) +i8 Nullable(Int8) +i16 Nullable(Int16) +i32 Nullable(Int32) +i64 Nullable(Int64) +date32 Nullable(Date32) +dt64_ms Nullable(DateTime64(3, \'UTC\')) +dt64_us Nullable(DateTime64(6, \'UTC\')) +dt64_ns Nullable(DateTime64(9, \'UTC\')) +dt64_s Nullable(DateTime64(3, \'UTC\')) +dt64_cs Nullable(DateTime64(3, \'UTC\')) +f32 Nullable(Float32) +f64 Nullable(Float64) +s Nullable(String) +fs Nullable(FixedString(9)) +d32 Nullable(Decimal(9, 3)) +d64 Nullable(Decimal(18, 10)) +d128 Nullable(Decimal(38, 20)) +d256 Nullable(Decimal(76, 40)) +800 3959600 +1000 4999500 +1800 6479100 +500 2474750 +300 1604850 +500 2474750 +300 1604850 +500 2474750 +2100 5563950 +300 1184850 +400 1599800 +300 1184850 +500 2524750 +500 2524750 +300 1514850 +300 1514850 +300 1594850 +300 1594850 +200 999900 +200 999900 +200 999900 +200 999900 +0 \N +400 1709800 +0 \N +10000 49995000 +0 \N +200 909900 +10000 49995000 +0 \N +2 +500 244750 +500 244750 +300 104850 +300 104850 +200 179900 +200 179900 +200 179900 +200 179900 +200 19900 +200 19900 +600 259700 +600 259700 +500 244750 +500 244750 +0 \N +500 244750 +500 244750 +500 244750 +500 244750 diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql b/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql new file mode 100644 index 00000000000..8521ada04d5 --- /dev/null +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql @@ -0,0 +1,137 @@ +-- Tags: no-fasttest, no-parallel + +set output_format_parquet_row_group_size = 100; + +set input_format_null_as_default = 1; +set engine_file_truncate_on_insert = 1; +set optimize_or_like_chain = 0; +set max_block_size = 100000; +set max_insert_threads = 1; + +-- Analyzer breaks the queries with IN and some queries with BETWEEN. +-- TODO: Figure out why. +set allow_experimental_analyzer=0; + +-- Try all the types. +insert into function file('02841.parquet') + -- Use negative numbers to test sign extension for signed types and lack of sign extension for + -- unsigned types. + with 5000 - number as n select + + number, + + intDiv(n, 11)::UInt8 as u8, + n::UInt16 u16, + n::UInt32 as u32, + n::UInt64 as u64, + intDiv(n, 11)::Int8 as i8, + n::Int16 i16, + n::Int32 as i32, + n::Int64 as i64, + + toDate32(n*500000) as date32, + toDateTime64(n*1e6, 3) as dt64_ms, + toDateTime64(n*1e6, 6) as dt64_us, + toDateTime64(n*1e6, 9) as dt64_ns, + toDateTime64(n*1e6, 0) as dt64_s, + toDateTime64(n*1e6, 2) as dt64_cs, + + (n/1000)::Float32 as f32, + (n/1000)::Float64 as f64, + + n::String as s, + n::String::FixedString(9) as fs, + + n::Decimal32(3)/1234 as d32, + n::Decimal64(10)/12345678 as d64, + n::Decimal128(20)/123456789012345 as d128, + n::Decimal256(40)/123456789012345/678901234567890 as d256 + + from numbers(10000); + +desc file('02841.parquet'); + +-- To generate reference results, use a temporary table and GROUP BYs to simulate row group filtering: +-- create temporary table t as with [as above] select intDiv(number, 100) as group, [as above]; +-- then e.g. for a query that filters by `x BETWEEN a AND b`: +-- select sum(c), sum(h) from (select count() as c, sum(number) as h, min(x) as mn, max(x) as mx from t group by group) where a <= mx and b >= mn; + +-- Go over all types individually. +select count(), sum(number) from file('02841.parquet') where indexHint(u8 in (10, 15, 250)); +select count(), sum(number) from file('02841.parquet') where indexHint(i8 between -3 and 2); +select count(), sum(number) from file('02841.parquet') where indexHint(u16 between 4000 and 61000 or u16 == 42); +select count(), sum(number) from file('02841.parquet') where indexHint(i16 between -150 and 250); +select count(), sum(number) from file('02841.parquet') where indexHint(u32 in (42, 4294966296)); +select count(), sum(number) from file('02841.parquet') where indexHint(i32 between -150 and 250); +select count(), sum(number) from file('02841.parquet') where indexHint(u64 in (42, 18446744073709550616)); +select count(), sum(number) from file('02841.parquet') where indexHint(i64 between -150 and 250); +select count(), sum(number) from file('02841.parquet') where indexHint(date32 between '1992-01-01' and '2023-08-02'); +select count(), sum(number) from file('02841.parquet') where indexHint(dt64_ms between '2000-01-01' and '2005-01-01'); +select count(), sum(number) from file('02841.parquet') where indexHint(dt64_us between toDateTime64(900000000, 2) and '2005-01-01'); +select count(), sum(number) from file('02841.parquet') where indexHint(dt64_ns between '2000-01-01' and '2005-01-01'); +select count(), sum(number) from file('02841.parquet') where indexHint(dt64_s between toDateTime64('-2.01e8'::Decimal64(0), 0) and toDateTime64(1.5e8::Decimal64(0), 0)); +select count(), sum(number) from file('02841.parquet') where indexHint(dt64_cs between toDateTime64('-2.01e8'::Decimal64(1), 1) and toDateTime64(1.5e8::Decimal64(2), 2)); +select count(), sum(number) from file('02841.parquet') where indexHint(f32 between -0.11::Float32 and 0.06::Float32); +select count(), sum(number) from file('02841.parquet') where indexHint(f64 between -0.11 and 0.06); +select count(), sum(number) from file('02841.parquet') where indexHint(s between '-9' and '1!!!'); +select count(), sum(number) from file('02841.parquet') where indexHint(fs between '-9' and '1!!!'); +select count(), sum(number) from file('02841.parquet') where indexHint(d32 between '-0.011'::Decimal32(3) and 0.006::Decimal32(3)); +select count(), sum(number) from file('02841.parquet') where indexHint(d64 between '-0.0000011'::Decimal64(7) and 0.0000006::Decimal64(9)); +select count(), sum(number) from file('02841.parquet') where indexHint(d128 between '-0.00000000000011'::Decimal128(20) and 0.00000000000006::Decimal128(20)); +select count(), sum(number) from file('02841.parquet') where indexHint(d256 between '-0.00000000000000000000000000011'::Decimal256(40) and 0.00000000000000000000000000006::Decimal256(35)); + +-- Some random other cases. +select count(), sum(number) from file('02841.parquet') where indexHint(0); +select count(), sum(number) from file('02841.parquet') where indexHint(s like '99%' or u64 == 2000); +select count(), sum(number) from file('02841.parquet') where indexHint(s like 'z%'); +select count(), sum(number) from file('02841.parquet') where indexHint(u8 == 10 or 1 == 1); +select count(), sum(number) from file('02841.parquet') where indexHint(u8 < 0); +select count(), sum(number) from file('02841.parquet') where indexHint(u64 + 1000000 == 1001000); +select count(), sum(number) from file('02841.parquet') where indexHint(u64 + 1000000 == 1001000) settings input_format_parquet_filter_push_down = 0; +select count(), sum(number) from file('02841.parquet') where indexHint(u32 + 1000000 == 999000); + +-- Very long string, which makes the Parquet encoder omit the corresponding min/max stat. +insert into function file('02841.parquet') + select arrayStringConcat(range(number*1000000)) as s from numbers(2); +select count() from file('02841.parquet') where indexHint(s > ''); + +-- Nullable and LowCardinality. +insert into function file('02841.parquet') select + number, + if(number%234 == 0, NULL, number) as sometimes_null, + toNullable(number) as never_null, + if(number%345 == 0, number::String, NULL) as mostly_null, + toLowCardinality(if(number%234 == 0, NULL, number)) as sometimes_null_lc, + toLowCardinality(toNullable(number)) as never_null_lc, + toLowCardinality(if(number%345 == 0, number::String, NULL)) as mostly_null_lc + from numbers(1000); + +select count(), sum(number) from file('02841.parquet') where indexHint(sometimes_null is NULL); +select count(), sum(number) from file('02841.parquet') where indexHint(sometimes_null_lc is NULL); +select count(), sum(number) from file('02841.parquet') where indexHint(mostly_null is not NULL); +select count(), sum(number) from file('02841.parquet') where indexHint(mostly_null_lc is not NULL); +select count(), sum(number) from file('02841.parquet') where indexHint(sometimes_null > 850); +select count(), sum(number) from file('02841.parquet') where indexHint(sometimes_null_lc > 850); +select count(), sum(number) from file('02841.parquet') where indexHint(never_null > 850); +select count(), sum(number) from file('02841.parquet') where indexHint(never_null_lc > 850); +select count(), sum(number) from file('02841.parquet') where indexHint(never_null < 150); +select count(), sum(number) from file('02841.parquet') where indexHint(never_null_lc < 150); +-- Quirk with infinities: this reads too much because KeyCondition represents NULLs as infinities. +select count(), sum(number) from file('02841.parquet') where indexHint(sometimes_null < 150); +select count(), sum(number) from file('02841.parquet') where indexHint(sometimes_null_lc < 150); + +-- Settings that affect the table schema or contents. +insert into function file('02841.parquet') select + number, + if(number%234 == 0, NULL, number + 100) as positive_or_null, + if(number%234 == 0, NULL, -number - 100) as negative_or_null, + if(number%234 == 0, NULL, 'I am a string') as string_or_null + from numbers(1000); + +select count(), sum(number) from file('02841.parquet') where indexHint(positive_or_null < 50); -- quirk with infinities +select count(), sum(number) from file('02841.parquet', Parquet, 'number UInt64, positive_or_null UInt64') where indexHint(positive_or_null < 50); +select count(), sum(number) from file('02841.parquet') where indexHint(negative_or_null > -50); +select count(), sum(number) from file('02841.parquet', Parquet, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null > -50); +select count(), sum(number) from file('02841.parquet') where indexHint(string_or_null == ''); -- quirk with infinities +select count(), sum(number) from file('02841.parquet', Parquet, 'number UInt64, string_or_null String') where indexHint(string_or_null == ''); +select count(), sum(number) from file('02841.parquet', Parquet, 'number UInt64, nEgAtIvE_oR_nUlL Int64') where indexHint(nEgAtIvE_oR_nUlL > -50) settings input_format_parquet_case_insensitive_column_matching = 1; diff --git a/tests/queries/0_stateless/02841_remote_parameter_parsing_error.reference b/tests/queries/0_stateless/02841_remote_parameter_parsing_error.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02841_remote_parameter_parsing_error.sql b/tests/queries/0_stateless/02841_remote_parameter_parsing_error.sql new file mode 100644 index 00000000000..9e467a1f69a --- /dev/null +++ b/tests/queries/0_stateless/02841_remote_parameter_parsing_error.sql @@ -0,0 +1,14 @@ +-- Tags: shard + + +select * from remote('127.0.0.1', sys); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select * from remote('127.0.0.1', system); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select * from remote('127.0.0.1', system.o); -- { serverError UNKNOWN_TABLE } +select * from remote('127.0.0.1', system.one, default); -- { serverError UNKNOWN_IDENTIFIER } +select * from remote('127.0.0.1', system.one, default, ''); -- { serverError BAD_ARGUMENTS } +select * from remote('127.0.0.1', system.one, default, key1); -- { serverError BAD_ARGUMENTS } +select * from remote('127.0.0.1', system.one, 'default', '', key1); -- { serverError UNKNOWN_IDENTIFIER } +select * from remote('127.0.0.1', system.one, default, '', key1); -- { serverError BAD_ARGUMENTS } +select * from remote('127.0.0.1', system.one, 'default', pwd, key1); -- { serverError BAD_ARGUMENTS } +select * from remote('127.0.0.1', system.one, 'default', '', key1, key2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select * from remote('127.0.0.1', system, one, 'default', '', key1, key2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } diff --git a/tests/queries/0_stateless/02841_tuple_modulo.reference b/tests/queries/0_stateless/02841_tuple_modulo.reference new file mode 100644 index 00000000000..6e6f07d0683 --- /dev/null +++ b/tests/queries/0_stateless/02841_tuple_modulo.reference @@ -0,0 +1,4 @@ +(1,0) +(2,2) +(2,2) +(0,0) diff --git a/tests/queries/0_stateless/02841_tuple_modulo.sql b/tests/queries/0_stateless/02841_tuple_modulo.sql new file mode 100644 index 00000000000..56bacf87967 --- /dev/null +++ b/tests/queries/0_stateless/02841_tuple_modulo.sql @@ -0,0 +1,4 @@ +SELECT (5,4) % 2; +SELECT intDiv((5,4), 2); +SELECT intDivOrZero((5,4), 2); +SELECT intDivOrZero((5,4), 0); diff --git a/tests/queries/0_stateless/02841_with_clause_resolve.reference b/tests/queries/0_stateless/02841_with_clause_resolve.reference new file mode 100644 index 00000000000..e2dfc4d85a9 --- /dev/null +++ b/tests/queries/0_stateless/02841_with_clause_resolve.reference @@ -0,0 +1,15 @@ +2.5 +2.5 +2.5 +2.5 +2.5 +(9399,2858) +(7159,6972) +(7456,3524) +(12685,10191) +(12598,4979) +(9824,2699) +(5655,7793) +(14410,10296) +(16211,7662) +(9349,9053) diff --git a/tests/queries/0_stateless/02841_with_clause_resolve.sql b/tests/queries/0_stateless/02841_with_clause_resolve.sql new file mode 100644 index 00000000000..b416446461b --- /dev/null +++ b/tests/queries/0_stateless/02841_with_clause_resolve.sql @@ -0,0 +1,141 @@ +set allow_experimental_analyzer = 1; + +WITH + -- Input + 44100 AS sample_frequency + , number AS tick + , tick / sample_frequency AS time + + -- Delay + , (time, wave, delay_, decay, count) -> arraySum(n1 -> wave(time - delay_ * n1), range(count)) AS delay + + , delay(time, (time -> 0.5), 0.2, 0.5, 5) AS kick + +SELECT + + kick + +FROM system.numbers +LIMIT 5; + +WITH + -- Input + 44100 AS sample_frequency + , number AS tick + , tick / sample_frequency AS time + + -- Output control + , 1 AS master_volume + , level -> least(1.0, greatest(-1.0, level)) AS clamp + , level -> (clamp(level) * 0x7FFF * master_volume)::Int16 AS output + , x -> (x, x) AS mono + + -- Basic waves + , time -> sin(time * 2 * pi()) AS sine_wave + , time -> time::UInt64 % 2 * 2 - 1 AS square_wave + , time -> (time - floor(time)) * 2 - 1 AS sawtooth_wave + , time -> abs(sawtooth_wave(time)) * 2 - 1 AS triangle_wave + + -- Helpers + , (from, to, wave, time) -> from + ((wave(time) + 1) / 2) * (to - from) AS lfo + , (from, to, steps, time) -> from + floor((time - floor(time)) * steps) / steps * (to - from) AS step_lfo + , (from, to, steps, time) -> exp(step_lfo(log(from), log(to), steps, time)) AS exp_step_lfo + + -- Noise + , time -> cityHash64(time) / 0xFFFFFFFFFFFFFFFF AS uniform_noise + , time -> erf(uniform_noise(time)) AS white_noise + , time -> cityHash64(time) % 2 ? 1 : -1 AS bernoulli_noise + + -- Distortion + , (x, amount) -> clamp(x * amount) AS clipping + , (x, amount) -> clamp(x > 0 ? pow(x, amount) : -pow(-x, amount)) AS power_distortion + , (x, amount) -> round(x * exp2(amount)) / exp2(amount) AS bitcrush + , (time, sample_frequency) -> round(time * sample_frequency) / sample_frequency AS desample + , (time, wave, amount) -> (time - floor(time) < (1 - amount)) ? wave(time * (1 - amount)) : 0 AS thin + , (time, wave, amount) -> wave(floor(time) + pow(time - floor(time), amount)) AS skew + + -- Combining + , (a, b, weight) -> a * (1 - weight) + b * weight AS combine + + -- Envelopes + , (time, offset, attack, hold, release) -> + time < offset ? 0 + : (time < offset + attack ? ((time - offset) / attack) + : (time < offset + attack + hold ? 1 + : (time < offset + attack + hold + release ? (offset + attack + hold + release - time) / release + : 0))) AS envelope + + , (bpm, time, offset, attack, hold, release) -> + envelope( + time * (bpm / 60) - floor(time * (bpm / 60)), + offset, + attack, + hold, + release) AS running_envelope + + -- Sequencers + , (sequence, time) -> sequence[1 + time::UInt64 % length(sequence)] AS sequencer + + -- Delay + , (time, wave, delay, decay, count) -> arraySum(n -> wave(time - delay * n) * pow(decay, n), range(count)) AS delay + + + , delay(time, (time -> power_distortion(sine_wave(time * 80 + sine_wave(time * 2)), lfo(0.5, 1, sine_wave, time / 16)) + * running_envelope(60, time, 0, 0.0, 0.01, 0.1)), + 0.2, 0.5, 5) AS kick + +SELECT + + (output( + kick + + delay(time, (time -> + power_distortion( + sine_wave(time * 50 + 1 * sine_wave(time * 100 + 1/4)) + * running_envelope(60, time, 0, 0.01, 0.01, 0.1), + lfo(1, 0.75, triangle_wave, time / 8))), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, time / 7) + + + delay(time, (time -> + power_distortion( + sine_wave(time * sequencer([50, 100, 200, 400], time / 2) + 1 * sine_wave(time * sequencer([50, 100, 200], time / 4) + 1/4)) + * running_envelope(60, time, 0.5, 0.01, 0.01, 0.1), + lfo(1, 0.75, triangle_wave, time / 8))), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, 16 + time / 11) + + + delay(time, (time -> + white_noise(time) * running_envelope(60, time, 0.75, 0.01, 0.01, 0.1)), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, 24 + time / 13) + + + sine_wave(time * 100 + 1 * sine_wave(time * 10 + 1/4)) + * running_envelope(120, time, 0, 0.01, 0.01, 0.1) + ), + + output( + kick + + delay(time + 0.01, (time -> + power_distortion( + sine_wave(time * 50 + 1 * sine_wave(time * 100 + 1/4)) + * running_envelope(60, time, 0, 0.01, 0.01, 0.1), + lfo(1, 0.75, triangle_wave, time / 8))), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, time / 7) + + + delay(time - 0.01, (time -> + power_distortion( + sine_wave(time * sequencer([50, 100, 200, 400], time / 2) + 1 * sine_wave(time * sequencer([50, 100, 200], time / 4) + 1/4)) + * running_envelope(60, time, 0.5, 0.01, 0.01, 0.1), + lfo(1, 0.75, triangle_wave, time / 8))), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, 16 + time / 11) + + + delay(time + 0.005, (time -> + white_noise(time) * running_envelope(60, time, 0.75, 0.01, 0.01, 0.1)), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, 24 + time / 13) + )) + +FROM system.numbers +LIMIT 10; diff --git a/tests/queries/0_stateless/02842_filesystem_cache_validate_path.reference b/tests/queries/0_stateless/02842_filesystem_cache_validate_path.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02842_filesystem_cache_validate_path.sql b/tests/queries/0_stateless/02842_filesystem_cache_validate_path.sql new file mode 100644 index 00000000000..c8fb776ac5f --- /dev/null +++ b/tests/queries/0_stateless/02842_filesystem_cache_validate_path.sql @@ -0,0 +1,45 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS test; +DROP TABLE IF EXISTS test_1; +DROP TABLE IF EXISTS test_2; + +CREATE TABLE test (a Int32) +ENGINE = MergeTree() +ORDER BY tuple() +SETTINGS disk = disk(type = cache, + max_size = '1Mi', + path = '/kek', + disk = 'local_disk'); -- {serverError BAD_ARGUMENTS} + +CREATE TABLE test (a Int32) +ENGINE = MergeTree() +ORDER BY tuple() +SETTINGS disk = disk(type = cache, + max_size = '1Mi', + path = '/var/lib/clickhouse/filesystem_caches/../kek', + disk = 'local_disk'); -- {serverError BAD_ARGUMENTS} + +CREATE TABLE test (a Int32) +ENGINE = MergeTree() +ORDER BY tuple() +SETTINGS disk = disk(type = cache, + max_size = '1Mi', + path = '../kek', + disk = 'local_disk'); -- {serverError BAD_ARGUMENTS} + +CREATE TABLE test_1 (a Int32) +ENGINE = MergeTree() +ORDER BY tuple() +SETTINGS disk = disk(type = cache, + max_size = '1Mi', + path = '/var/lib/clickhouse/filesystem_caches/kek', + disk = 'local_disk'); + +CREATE TABLE test_2 (a Int32) +ENGINE = MergeTree() +ORDER BY tuple() +SETTINGS disk = disk(type = cache, + max_size = '1Mi', + path = 'kek', + disk = 'local_disk'); diff --git a/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference new file mode 100644 index 00000000000..6e04d969e67 --- /dev/null +++ b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference @@ -0,0 +1,20 @@ +SELECT count() +FROM t_02848_mt1 +PREWHERE notEmpty(v) AND (k = 3) +1 +SELECT count() +FROM t_02848_mt2 +PREWHERE (d LIKE \'%es%\') AND (c < 20) AND (b = \'3\') AND (a = 3) +1 +SELECT count() +FROM t_02848_mt2 +PREWHERE (d LIKE \'%es%\') AND (c < 20) AND (c > 0) AND (a = 3) +1 +SELECT count() +FROM t_02848_mt2 +PREWHERE (d LIKE \'%es%\') AND (b = \'3\') AND (c < 20) +1 +SELECT count() +FROM t_02848_mt2 +PREWHERE (d LIKE \'%es%\') AND (b = \'3\') AND (a = 3) +1 diff --git a/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql new file mode 100644 index 00000000000..bc9d7e5664e --- /dev/null +++ b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql @@ -0,0 +1,34 @@ +SET optimize_move_to_prewhere = 1; +SET enable_multiple_prewhere_read_steps = 1; + +DROP TABLE IF EXISTS t_02848_mt1; +DROP TABLE IF EXISTS t_02848_mt2; + +CREATE TABLE t_02848_mt1 (k UInt32, v String) ENGINE = MergeTree ORDER BY k SETTINGS min_bytes_for_wide_part=0; + +INSERT INTO t_02848_mt1 SELECT number, toString(number) FROM numbers(100); + +EXPLAIN SYNTAX SELECT count() FROM t_02848_mt1 WHERE k = 3 AND notEmpty(v); +SELECT count() FROM t_02848_mt1 WHERE k = 3 AND notEmpty(v); + +CREATE TABLE t_02848_mt2 (a UInt32, b String, c Int32, d String) ENGINE = MergeTree ORDER BY (a,b,c) SETTINGS min_bytes_for_wide_part=0; + +INSERT INTO t_02848_mt2 SELECT number, toString(number), number, 'aaaabbbbccccddddtestxxxyyy' FROM numbers(100); + +-- the estimated column sizes are: {a: 428, b: 318, c: 428, d: 73} +-- it's not correct but let's fix it in the future. + +EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND c < 20 AND d like '%es%'; +SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND c < 20 AND d like '%es%'; + +EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE a = 3 AND c < 20 AND c > 0 AND d like '%es%'; +SELECT count() FROM t_02848_mt2 WHERE a = 3 AND c < 20 AND c > 0 AND d like '%es%'; + +EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE b == '3' AND c < 20 AND d like '%es%'; +SELECT count() FROM t_02848_mt2 WHERE b == '3' AND c < 20 AND d like '%es%'; + +EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND d like '%es%'; +SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND d like '%es%'; + +DROP TABLE t_02848_mt1; +DROP TABLE t_02848_mt2; diff --git a/tests/queries/0_stateless/02842_mutations_replace_non_deterministic.reference b/tests/queries/0_stateless/02842_mutations_replace_non_deterministic.reference new file mode 100644 index 00000000000..729d6cc94e6 --- /dev/null +++ b/tests/queries/0_stateless/02842_mutations_replace_non_deterministic.reference @@ -0,0 +1,10 @@ +10 4950 +UPDATE v = _CAST(4950, \'Nullable(UInt64)\') WHERE 1 +10 [0,1,2,3,4,5,6,7,8,9] +UPDATE v = _CAST([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], \'Array(UInt64)\') WHERE 1 +10 5 +UPDATE v = _CAST(\'\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\', \'AggregateFunction(uniqExact, UInt64)\') WHERE 1 +10 1 +UPDATE v = _CAST(timestamp, \'DateTime\') WHERE 1 +UPDATE v = (SELECT sum(number) FROM numbers(1000) WHERE number > randConstant()) WHERE 1 +20 2100-10-10 00:00:00 diff --git a/tests/queries/0_stateless/02842_mutations_replace_non_deterministic.sql b/tests/queries/0_stateless/02842_mutations_replace_non_deterministic.sql new file mode 100644 index 00000000000..b0e1b5c54fb --- /dev/null +++ b/tests/queries/0_stateless/02842_mutations_replace_non_deterministic.sql @@ -0,0 +1,134 @@ +DROP TABLE IF EXISTS t_mutations_nondeterministic SYNC; + +SET mutations_sync = 2; +SET mutations_execute_subqueries_on_initiator = 1; +SET mutations_execute_nondeterministic_on_initiator = 1; + +-- SELECT sum(...) + +CREATE TABLE t_mutations_nondeterministic (id UInt64, v UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02842_mutations_replace', '1') +ORDER BY id; + +INSERT INTO t_mutations_nondeterministic VALUES (10, 20); + +ALTER TABLE t_mutations_nondeterministic UPDATE v = (SELECT sum(number) FROM numbers(100)) WHERE 1; + +SELECT id, v FROM t_mutations_nondeterministic ORDER BY id; + +SELECT command FROM system.mutations +WHERE database = currentDatabase() AND table = 't_mutations_nondeterministic' AND is_done +ORDER BY command; + +DROP TABLE t_mutations_nondeterministic SYNC; + +-- SELECT groupArray(...) + +CREATE TABLE t_mutations_nondeterministic (id UInt64, v Array(UInt64)) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02842_mutations_replace', '1') +ORDER BY id; + +INSERT INTO t_mutations_nondeterministic VALUES (10, [20]); + +ALTER TABLE t_mutations_nondeterministic UPDATE v = (SELECT groupArray(number) FROM numbers(10)) WHERE 1; + +SELECT id, v FROM t_mutations_nondeterministic ORDER BY id; + +-- Too big result. +ALTER TABLE t_mutations_nondeterministic UPDATE v = (SELECT groupArray(number) FROM numbers(10000)) WHERE 1; -- { serverError BAD_ARGUMENTS } + +SELECT command FROM system.mutations +WHERE database = currentDatabase() AND table = 't_mutations_nondeterministic' AND is_done +ORDER BY command; + +DROP TABLE t_mutations_nondeterministic SYNC; + +-- SELECT uniqExactState(...) + +CREATE TABLE t_mutations_nondeterministic (id UInt64, v AggregateFunction(uniqExact, UInt64)) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02842_mutations_replace', '1') +ORDER BY id; + +INSERT INTO t_mutations_nondeterministic VALUES (10, initializeAggregation('uniqExactState', 1::UInt64)); + +ALTER TABLE t_mutations_nondeterministic UPDATE v = (SELECT uniqExactState(number) FROM numbers(5)) WHERE 1; + +SELECT id, finalizeAggregation(v) FROM t_mutations_nondeterministic ORDER BY id; + +SELECT command FROM system.mutations +WHERE database = currentDatabase() AND table = 't_mutations_nondeterministic' AND is_done +ORDER BY command; + +DROP TABLE t_mutations_nondeterministic SYNC; + +-- now() + +CREATE TABLE t_mutations_nondeterministic (id UInt64, v DateTime) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02842_mutations_replace', '1') +ORDER BY id; + +INSERT INTO t_mutations_nondeterministic VALUES (10, '2020-10-10'); + +ALTER TABLE t_mutations_nondeterministic UPDATE v = now() WHERE 1; + +SELECT id, v BETWEEN now() - INTERVAL 10 MINUTE AND now() FROM t_mutations_nondeterministic; + +SELECT + replaceRegexpOne(command, '(\\d{10})', 'timestamp'), +FROM system.mutations +WHERE database = currentDatabase() AND table = 't_mutations_nondeterministic' AND is_done +ORDER BY command; + +DROP TABLE t_mutations_nondeterministic SYNC; + +-- filesystem(...) + +CREATE TABLE t_mutations_nondeterministic (id UInt64, v UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02842_mutations_replace', '1') ORDER BY id; + +INSERT INTO t_mutations_nondeterministic VALUES (10, 10); + +ALTER TABLE t_mutations_nondeterministic UPDATE v = filesystemCapacity(materialize('default')) WHERE 1; -- { serverError BAD_ARGUMENTS } + +DROP TABLE t_mutations_nondeterministic SYNC; + +-- UPDATE SELECT randConstant() + +CREATE TABLE t_mutations_nondeterministic (id UInt64, v UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02842_mutations_replace', '1') +ORDER BY id; + +INSERT INTO t_mutations_nondeterministic VALUES (10, 10); + +-- Check that function in subquery is not rewritten. +ALTER TABLE t_mutations_nondeterministic +UPDATE v = +( + SELECT sum(number) FROM numbers(1000) WHERE number > randConstant() +) WHERE 1 +SETTINGS mutations_execute_subqueries_on_initiator = 0, allow_nondeterministic_mutations = 1; + +SELECT command FROM system.mutations +WHERE database = currentDatabase() AND table = 't_mutations_nondeterministic' AND is_done +ORDER BY command; + +DROP TABLE t_mutations_nondeterministic SYNC; + +-- DELETE WHERE now() + +CREATE TABLE t_mutations_nondeterministic (id UInt64, d DateTime) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02842_mutations_replace', '1') +ORDER BY id; + +INSERT INTO t_mutations_nondeterministic VALUES (10, '2000-10-10'), (20, '2100-10-10'); + +ALTER TABLE t_mutations_nondeterministic DELETE WHERE d < now(); + +SELECT + replaceRegexpOne(command, '(\\d{10})', 'timestamp'), +FROM system.mutations +WHERE database = currentDatabase() AND table = 't_mutations_nondeterministic' AND NOT is_done +ORDER BY command; + +SELECT id, d FROM t_mutations_nondeterministic ORDER BY id; + +DROP TABLE t_mutations_nondeterministic SYNC; diff --git a/tests/queries/0_stateless/02842_one_input_format.reference b/tests/queries/0_stateless/02842_one_input_format.reference new file mode 100644 index 00000000000..714df685535 --- /dev/null +++ b/tests/queries/0_stateless/02842_one_input_format.reference @@ -0,0 +1,12 @@ +dummy UInt8 +0 +0 +0 +data.csv +data.jsonl +data.native +0 +0 +0 +1 +1 diff --git a/tests/queries/0_stateless/02842_one_input_format.sh b/tests/queries/0_stateless/02842_one_input_format.sh new file mode 100755 index 00000000000..f2199cbe2ce --- /dev/null +++ b/tests/queries/0_stateless/02842_one_input_format.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +FILE_DIR=$CLICKHOUSE_TEST_UNIQUE_NAME +mkdir -p $FILE_DIR + +$CLICKHOUSE_LOCAL -q "select * from numbers(100000) format Native" > $FILE_DIR/data.native +$CLICKHOUSE_LOCAL -q "select * from numbers(100000) format CSV" > $FILE_DIR/data.csv +$CLICKHOUSE_LOCAL -q "select * from numbers(100000) format JSONEachRow" > $FILE_DIR/data.jsonl + +$CLICKHOUSE_LOCAL -q "desc file('$FILE_DIR/*', One)" +$CLICKHOUSE_LOCAL -q "select * from file('$FILE_DIR/*', One)" +$CLICKHOUSE_LOCAL -q "select _file from file('$FILE_DIR/*', One) order by _file" +$CLICKHOUSE_LOCAL -q "select * from file('$FILE_DIR/*', One, 'x UInt8')" +$CLICKHOUSE_LOCAL -q "select * from file('$FILE_DIR/*', One, 'x UInt64')" 2>&1 | grep "BAD_ARGUMENTS" -c +$CLICKHOUSE_LOCAL -q "select * from file('$FILE_DIR/*', One, 'x UInt8, y UInt8')" 2>&1 | grep "BAD_ARGUMENTS" -c + +rm -rf $FILE_DIR + diff --git a/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.reference b/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.reference new file mode 100644 index 00000000000..0025187be30 --- /dev/null +++ b/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.reference @@ -0,0 +1,4 @@ +There is no handle /sashboards. Maybe you meant /dashboard +There is no handle /sashboard. Maybe you meant /dashboard +There is no handle /sashboarb. Maybe you meant /dashboard +There is no handle /sashboaxb. Maybe you meant /dashboard diff --git a/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.sh b/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.sh new file mode 100755 index 00000000000..cf69c742777 --- /dev/null +++ b/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +export CLICKHOUSE_URL="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/" + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}sashboards" | grep -o ".* Maybe you meant /dashboard" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}sashboard" | grep -o ".* Maybe you meant /dashboard" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}sashboarb" | grep -o ".* Maybe you meant /dashboard" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}sashboaxb" | grep -o ".* Maybe you meant /dashboard" diff --git a/tests/queries/0_stateless/02842_table_function_file_filter_by_virtual_columns.reference b/tests/queries/0_stateless/02842_table_function_file_filter_by_virtual_columns.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/02842_table_function_file_filter_by_virtual_columns.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/02842_table_function_file_filter_by_virtual_columns.sh b/tests/queries/0_stateless/02842_table_function_file_filter_by_virtual_columns.sh new file mode 100755 index 00000000000..c69de80b1c1 --- /dev/null +++ b/tests/queries/0_stateless/02842_table_function_file_filter_by_virtual_columns.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo "1" > $CLICKHOUSE_TEST_UNIQUE_NAME.data1.tsv +echo "2" > $CLICKHOUSE_TEST_UNIQUE_NAME.data2.tsv +echo "3" > $CLICKHOUSE_TEST_UNIQUE_NAME.data3.tsv + +$CLICKHOUSE_LOCAL --print-profile-events -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.data{1,2,3}.tsv', auto, 'x UInt64') where _file like '%data1%' format Null" 2>&1 | grep -F -c "EngineFileLikeReadFiles: 1" + +$CLICKHOUSE_LOCAL --print-profile-events -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.data{1,2,3}.tsv', auto, 'x UInt64') where _path like '%data1%' format Null" 2>&1 | grep -F -c "EngineFileLikeReadFiles: 1" + +rm $CLICKHOUSE_TEST_UNIQUE_NAME.data* + diff --git a/tests/queries/0_stateless/02842_truncate_database.reference b/tests/queries/0_stateless/02842_truncate_database.reference new file mode 100644 index 00000000000..71f52bcd1da --- /dev/null +++ b/tests/queries/0_stateless/02842_truncate_database.reference @@ -0,0 +1,22 @@ +0 +0 +0 +0 +0 +dest_dictionary test_truncate_database 0 +1 First +=== TABLES IN test_truncate_database === +dest_dictionary +dest_view_log +dest_view_memory +dest_view_merge_tree +dest_view_stripe_log +dest_view_tiny_log +source_table_dictionary +source_table_log +source_table_memory +source_table_merge_tree +source_table_stripe_log +source_table_tiny_log +=== DICTIONARIES IN test_truncate_database === +dest_dictionary diff --git a/tests/queries/0_stateless/02842_truncate_database.sql b/tests/queries/0_stateless/02842_truncate_database.sql new file mode 100644 index 00000000000..a767acba14c --- /dev/null +++ b/tests/queries/0_stateless/02842_truncate_database.sql @@ -0,0 +1,76 @@ +-- Tags: no-parallel + +DROP DATABASE IF EXISTS test_truncate_database; + +-- test TRUNCATE DATABASE operation. +-- create tables, views and dictionary and populate them. Then try truncating the database. +-- all tables, views and dictionaries should be removed leaving an empty database +CREATE DATABASE test_truncate_database; +USE test_truncate_database; + +-- create tables with several different types of table engines +CREATE TABLE source_table_memory (x UInt16) ENGINE = Memory; +CREATE TABLE source_table_log (x UInt16) ENGINE = Log; +CREATE TABLE source_table_tiny_log (x UInt16) ENGINE = TinyLog; +CREATE TABLE source_table_stripe_log (x UInt16) ENGINE = StripeLog; +CREATE TABLE source_table_merge_tree (x UInt16) ENGINE = MergeTree ORDER BY x PARTITION BY x; +-- create dictionary source table +CREATE TABLE source_table_dictionary +( + id UInt64, + value String +) ENGINE = Memory(); + +-- insert data into the tables +INSERT INTO source_table_memory SELECT * FROM system.numbers LIMIT 10; +INSERT INTO source_table_log SELECT * FROM system.numbers LIMIT 10; +INSERT INTO source_table_tiny_log SELECT * FROM system.numbers LIMIT 10; +INSERT INTO source_table_stripe_log SELECT * FROM system.numbers LIMIT 10; +INSERT INTO source_table_merge_tree SELECT * FROM system.numbers LIMIT 10; +INSERT INTO source_table_dictionary VALUES (1, 'First'); + + +-- create view based on the tables +CREATE VIEW dest_view_memory (x UInt64) AS SELECT * FROM source_table_memory; +CREATE VIEW dest_view_log (x UInt64) AS SELECT * FROM source_table_log; +CREATE VIEW dest_view_tiny_log (x UInt64) AS SELECT * FROM source_table_tiny_log; +CREATE VIEW dest_view_stripe_log (x UInt64) AS SELECT * FROM source_table_stripe_log; +CREATE VIEW dest_view_merge_tree (x UInt64) AS SELECT * FROM source_table_merge_tree; +-- create dictionary based on source table +CREATE DICTIONARY dest_dictionary +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() DB 'test_truncate_database' TABLE 'source_table_dictionary')) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); + + +SELECT * FROM dest_view_memory ORDER BY x LIMIT 1; +SELECT * FROM dest_view_log ORDER BY x LIMIT 1; +SELECT * FROM dest_view_tiny_log ORDER BY x LIMIT 1; +SELECT * FROM dest_view_stripe_log ORDER BY x LIMIT 1; +SELECT * FROM dest_view_merge_tree ORDER BY x LIMIT 1; +SELECT name, database, element_count FROM system.dictionaries WHERE database = 'test_truncate_database' AND name = 'dest_dictionary'; +SELECT * FROM dest_dictionary; +SELECT '=== TABLES IN test_truncate_database ==='; +SHOW TABLES FROM test_truncate_database; +SELECT '=== DICTIONARIES IN test_truncate_database ==='; +SHOW DICTIONARIES FROM test_truncate_database; + +TRUNCATE DATABASE test_truncate_database; + +SELECT * FROM dest_view_set ORDER BY x LIMIT 1; -- {serverError 60} +SELECT * FROM dest_view_memory ORDER BY x LIMIT 1; -- {serverError 60} +SELECT * FROM dest_view_log ORDER BY x LIMIT 1; -- {serverError 60} +SELECT * FROM dest_view_tiny_log ORDER BY x LIMIT 1; -- {serverError 60} +SELECT * FROM dest_view_stripe_log ORDER BY x LIMIT 1; -- {serverError 60} +SELECT * FROM dest_view_merge_tree ORDER BY x LIMIT 1; -- {serverError 60} +SELECT name, database, element_count FROM system.dictionaries WHERE database = 'test_truncate_database' AND name = 'dest_dictionary'; +SELECT * FROM dest_dictionary; -- {serverError 60} +SHOW TABLES FROM test_truncate_database; +SHOW DICTIONARIES FROM test_truncate_database; + +DROP DATABASE test_truncate_database; diff --git a/tests/queries/0_stateless/02842_vertical_merge_after_add_drop_column.reference b/tests/queries/0_stateless/02842_vertical_merge_after_add_drop_column.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02842_vertical_merge_after_add_drop_column.sql b/tests/queries/0_stateless/02842_vertical_merge_after_add_drop_column.sql new file mode 100644 index 00000000000..0a06eb05431 --- /dev/null +++ b/tests/queries/0_stateless/02842_vertical_merge_after_add_drop_column.sql @@ -0,0 +1,25 @@ +-- In some versions vertical merges after DROP COLUMN was broken in some cases + +drop table if exists data; + +create table data ( + key Int, + `legacy_features_Map.id` Array(UInt8), + `legacy_features_Map.count` Array(UInt32), +) engine=MergeTree() +order by key +settings + min_bytes_for_wide_part=0, + min_rows_for_wide_part=0, + vertical_merge_algorithm_min_rows_to_activate=0, + vertical_merge_algorithm_min_columns_to_activate=0; + +insert into data (key) values (1); +insert into data (key) values (2); + +alter table data add column `features_legacy_Map.id` Array(UInt8), add column `features_legacy_Map.count` Array(UInt32); + +alter table data drop column legacy_features_Map settings mutations_sync=2; + +optimize table data final; +DROP TABLE data; diff --git a/tests/queries/0_stateless/02843_backup_use_same_s3_credentials_for_base_backup.reference b/tests/queries/0_stateless/02843_backup_use_same_s3_credentials_for_base_backup.reference new file mode 100644 index 00000000000..2862c459ae1 --- /dev/null +++ b/tests/queries/0_stateless/02843_backup_use_same_s3_credentials_for_base_backup.reference @@ -0,0 +1,14 @@ +use_same_s3_credentials_for_base_backup for S3 +BACKUP_CREATED +BACKUP_CREATED +The request signature we calculated does not match the signature you provided. Check your key and signing method. (S3_ERROR) +BACKUP_CREATED +The request signature we calculated does not match the signature you provided. Check your key and signing method. (S3_ERROR) +RESTORED +RESTORED +use_same_s3_credentials_for_base_backup for S3 (invalid arguments) +BACKUP_CREATED +NUMBER_OF_ARGUMENTS_DOESNT_MATCH +use_same_s3_credentials_for_base_backup for Disk +BACKUP_CREATED +BAD_ARGUMENTS diff --git a/tests/queries/0_stateless/02843_backup_use_same_s3_credentials_for_base_backup.sh b/tests/queries/0_stateless/02843_backup_use_same_s3_credentials_for_base_backup.sh new file mode 100755 index 00000000000..939179baa26 --- /dev/null +++ b/tests/queries/0_stateless/02843_backup_use_same_s3_credentials_for_base_backup.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag: no-fasttest - requires S3 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data; + create table data (key Int) engine=MergeTree() order by tuple(); + insert into data select * from numbers(10); +" + +function write_invalid_password_to_base_backup() +{ + local name=$1 && shift + local content + content=$($CLICKHOUSE_CLIENT -q "select replace(line, 'testtest', 'INVALID_PASSWORD') from s3($(s3_location $name/.backup), 'LineAsString')") || return 1 + $CLICKHOUSE_CLIENT --param_content="$content" -q "insert into function s3($(s3_location $name/.backup), 'LineAsString') settings s3_truncate_on_insert=1 values ({content:String})" +} + +# Returns the arguments for the BACKUP TO S3() function, i.e. (url, access_key_id, secret_access_key) +function s3_location() { echo "'http://localhost:11111/test/backups/$CLICKHOUSE_DATABASE/use_same_s3_credentials_for_base_backup_base_$*', 'test', 'testtest'"; } + +echo 'use_same_s3_credentials_for_base_backup for S3' +$CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO S3($(s3_location base))" | cut -f2 +$CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO S3($(s3_location inc_1)) SETTINGS base_backup=S3($(s3_location base))" | cut -f2 +write_invalid_password_to_base_backup inc_1 +$CLICKHOUSE_CLIENT --format Null -q "BACKUP TABLE data TO S3($(s3_location inc_2)) SETTINGS base_backup=S3($(s3_location inc_1))" |& grep -m1 -o 'The request signature we calculated does not match the signature you provided. Check your key and signing method. (S3_ERROR)' +$CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO S3($(s3_location inc_3)) SETTINGS base_backup=S3($(s3_location inc_1)), use_same_s3_credentials_for_base_backup=1" | cut -f2 + +$CLICKHOUSE_CLIENT --format Null -q "RESTORE TABLE data AS data FROM S3($(s3_location inc_1))" |& grep -m1 -o 'The request signature we calculated does not match the signature you provided. Check your key and signing method. (S3_ERROR)' +$CLICKHOUSE_CLIENT -q "RESTORE TABLE data AS data_1 FROM S3($(s3_location inc_1)) SETTINGS use_same_s3_credentials_for_base_backup=1" | cut -f2 +$CLICKHOUSE_CLIENT -q "RESTORE TABLE data AS data_2 FROM S3($(s3_location inc_3)) SETTINGS use_same_s3_credentials_for_base_backup=1" | cut -f2 + +echo 'use_same_s3_credentials_for_base_backup for S3 (invalid arguments)' +$CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO S3($(s3_location inc_4_bad)) SETTINGS base_backup=S3($(s3_location inc_1), 'foo'), use_same_s3_credentials_for_base_backup=1" |& cut -f2 +$CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO S3($(s3_location inc_5_bad), 'foo') SETTINGS base_backup=S3($(s3_location inc_1)), use_same_s3_credentials_for_base_backup=1" |& grep -o -m1 NUMBER_OF_ARGUMENTS_DOESNT_MATCH + +echo 'use_same_s3_credentials_for_base_backup for Disk' +$CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO Disk('backups', '$CLICKHOUSE_DATABASE/backup_1') SETTINGS use_same_s3_credentials_for_base_backup=1" | cut -f2 +$CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO Disk('backups', '$CLICKHOUSE_DATABASE/backup_2') SETTINGS use_same_s3_credentials_for_base_backup=1, base_backup=Disk('backups', '$CLICKHOUSE_DATABASE/backup_1')" |& grep -o -m1 BAD_ARGUMENTS + +exit 0 diff --git a/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference b/tests/queries/0_stateless/02843_context_has_expired.reference similarity index 80% rename from tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference rename to tests/queries/0_stateless/02843_context_has_expired.reference index 44e0be8e356..229972f2924 100644 --- a/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference +++ b/tests/queries/0_stateless/02843_context_has_expired.reference @@ -2,3 +2,4 @@ 0 0 0 +0 diff --git a/tests/queries/0_stateless/02843_context_has_expired.sql b/tests/queries/0_stateless/02843_context_has_expired.sql new file mode 100644 index 00000000000..8355ce2c18c --- /dev/null +++ b/tests/queries/0_stateless/02843_context_has_expired.sql @@ -0,0 +1,36 @@ +DROP DICTIONARY IF EXISTS 02843_dict; +DROP TABLE IF EXISTS 02843_source; +DROP TABLE IF EXISTS 02843_join; + +CREATE TABLE 02843_source +( + id UInt64, + value String +) +ENGINE=Memory; + +CREATE DICTIONARY 02843_dict +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE '02843_source')) +LAYOUT(DIRECT()); + +SELECT 1 IN (SELECT dictGet('02843_dict', 'value', materialize('1'))); + +CREATE TABLE 02843_join (id UInt8, value String) ENGINE Join(ANY, LEFT, id); +SELECT 1 IN (SELECT joinGet(02843_join, 'value', materialize(1))); +SELECT 1 IN (SELECT joinGetOrNull(02843_join, 'value', materialize(1))); + +SELECT 1 IN (SELECT materialize(connectionId())); +SELECT 1000000 IN (SELECT materialize(getSetting('max_threads'))); +SELECT 1 in (SELECT file(materialize('a'))); -- { serverError 107 } + +EXPLAIN ESTIMATE SELECT 1 IN (SELECT dictGet('02843_dict', 'value', materialize('1'))); +EXPLAIN ESTIMATE SELECT 1 IN (SELECT joinGet(`02843_join`, 'value', materialize(1))); + +DROP DICTIONARY 02843_dict; +DROP TABLE 02843_source; +DROP TABLE 02843_join; diff --git a/tests/queries/0_stateless/02843_date_predicate_optimizations_bugs.reference b/tests/queries/0_stateless/02843_date_predicate_optimizations_bugs.reference new file mode 100644 index 00000000000..d641328e9a5 --- /dev/null +++ b/tests/queries/0_stateless/02843_date_predicate_optimizations_bugs.reference @@ -0,0 +1,2 @@ +202308 1 +202308 2 diff --git a/tests/queries/0_stateless/02843_date_predicate_optimizations_bugs.sql b/tests/queries/0_stateless/02843_date_predicate_optimizations_bugs.sql new file mode 100644 index 00000000000..6e26a5166cb --- /dev/null +++ b/tests/queries/0_stateless/02843_date_predicate_optimizations_bugs.sql @@ -0,0 +1,9 @@ +select + toYYYYMM(date) as date_, + n +from (select + [toDate('20230815'), toDate('20230816')] as date, + [1, 2] as n +) as data +array join date, n +where date_ >= 202303; diff --git a/tests/queries/0_stateless/02843_insertion_table_schema_infer.reference b/tests/queries/0_stateless/02843_insertion_table_schema_infer.reference new file mode 100644 index 00000000000..aec86406a98 --- /dev/null +++ b/tests/queries/0_stateless/02843_insertion_table_schema_infer.reference @@ -0,0 +1,3 @@ +user127 1 +user405 1 +user902 1 diff --git a/tests/queries/0_stateless/02843_insertion_table_schema_infer.sh b/tests/queries/0_stateless/02843_insertion_table_schema_infer.sh new file mode 100755 index 00000000000..d806b678456 --- /dev/null +++ b/tests/queries/0_stateless/02843_insertion_table_schema_infer.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_DIR=$CUR_DIR/data_tsv + +$CLICKHOUSE_LOCAL --multiquery \ +"CREATE VIEW users AS SELECT * FROM file('$DATA_DIR/mock_data.tsv', TSVWithNamesAndTypes); + CREATE TABLE users_output (name String, tag UInt64)ENGINE = Memory; + INSERT INTO users_output WITH (SELECT groupUniqArrayArray(mapKeys(Tags)) FROM users) AS unique_tags SELECT UserName AS name, length(unique_tags) AS tag FROM users; + SELECT * FROM users_output;" diff --git a/tests/queries/0_stateless/02844_distributed_virtual_columns.reference b/tests/queries/0_stateless/02844_distributed_virtual_columns.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02844_distributed_virtual_columns.sql b/tests/queries/0_stateless/02844_distributed_virtual_columns.sql new file mode 100644 index 00000000000..31a6780f19e --- /dev/null +++ b/tests/queries/0_stateless/02844_distributed_virtual_columns.sql @@ -0,0 +1,5 @@ +drop table if exists data_01072; +drop table if exists dist_01072; +create table data_01072 (key Int) Engine=MergeTree() ORDER BY key; +create table dist_01072 (key Int) Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01072, key); +select * from dist_01072 where key=0 and _part='0'; diff --git a/tests/queries/0_stateless/02844_subquery_timeout_with_break.reference b/tests/queries/0_stateless/02844_subquery_timeout_with_break.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql b/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql new file mode 100644 index 00000000000..511ed0c59de --- /dev/null +++ b/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (key UInt64, value UInt64, INDEX value_idx value TYPE bloom_filter GRANULARITY 1) ENGINE=MergeTree() ORDER BY key; + +INSERT INTO t SELECT number, rand()%1000 FROM numbers(10000); + +SET timeout_overflow_mode='break'; +SET max_execution_time=0.1; +SELECT * FROM t WHERE value IN (SELECT number FROM numbers(1000000000)); + +DROP TABLE t; diff --git a/tests/queries/0_stateless/02844_table_function_url_filter_by_virtual_columns.reference b/tests/queries/0_stateless/02844_table_function_url_filter_by_virtual_columns.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/02844_table_function_url_filter_by_virtual_columns.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/02844_table_function_url_filter_by_virtual_columns.sh b/tests/queries/0_stateless/02844_table_function_url_filter_by_virtual_columns.sh new file mode 100755 index 00000000000..8207273bfc3 --- /dev/null +++ b/tests/queries/0_stateless/02844_table_function_url_filter_by_virtual_columns.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --print-profile-events -q "select * from url('http://localhost:11111/test/{a,b,c}.tsv', auto, 'x UInt64, y UInt64, z UInt64') where _file = 'a.tsv' format Null" 2>&1 | grep -F -c "EngineFileLikeReadFiles: 1" + +$CLICKHOUSE_LOCAL --print-profile-events -q "select * from url('http://localhost:11111/test/{a,b,c}.tsv', auto, 'x UInt64, y UInt64, z UInt64') where _path = '/test/a.tsv' format Null" 2>&1 | grep -F -c "EngineFileLikeReadFiles: 1" + diff --git a/tests/queries/0_stateless/02844_tsv_carriage_return_parallel_parsing.reference b/tests/queries/0_stateless/02844_tsv_carriage_return_parallel_parsing.reference new file mode 100644 index 00000000000..749fce669df --- /dev/null +++ b/tests/queries/0_stateless/02844_tsv_carriage_return_parallel_parsing.reference @@ -0,0 +1 @@ +1000000 diff --git a/tests/queries/0_stateless/02844_tsv_carriage_return_parallel_parsing.sh b/tests/queries/0_stateless/02844_tsv_carriage_return_parallel_parsing.sh new file mode 100755 index 00000000000..cfd419d908e --- /dev/null +++ b/tests/queries/0_stateless/02844_tsv_carriage_return_parallel_parsing.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select 'Hello\rWorld' from numbers(1000000) format TSVRaw" > $CLICKHOUSE_TEST_UNIQUE_NAME.tsv +$CLICKHOUSE_LOCAL -q "select count() from file('$CLICKHOUSE_TEST_UNIQUE_NAME.tsv')" +rm $CLICKHOUSE_TEST_UNIQUE_NAME.tsv + diff --git a/tests/queries/0_stateless/02845_arrayShiftRotate.reference b/tests/queries/0_stateless/02845_arrayShiftRotate.reference new file mode 100644 index 00000000000..9a8670f387c --- /dev/null +++ b/tests/queries/0_stateless/02845_arrayShiftRotate.reference @@ -0,0 +1,115 @@ +== arrayRotateLeft +[3,4,5,1,2] +[4,5,1,2,3] +[4,5,1,2,3] +['l','l','o','H','e'] +[[[5,6],[7,8]],[[1,2],[3,4]]] + +== arrayRotateRight +[4,5,1,2,3] +[3,4,5,1,2] +[3,4,5,1,2] +['l','o','H','e','l'] +[[[5,6],[7,8]],[[1,2],[3,4]]] + +== arrayShiftLeft +[4,5,0,0,0] +[0,0,0,1,2] +[0,0,0,0,0] +['d','e','','',''] +[[5,6],[],[]] +[[[5,6],[7,8]],[]] +[4,5,7,7,7] +['d','e','foo','foo','foo'] +[[5,6],[7,8],[7,8]] +[2,3,4,5,6,1000] + +== arrayShiftRight +[0,0,0,1,2] +[4,5,0,0,0] +[0,0,0,0,0] +['','','','a','b'] +[[],[],[1,2]] +[[],[[1,2],[3,4]]] +[7,7,7,1,2] +['foo','foo','foo','a','b'] +[[7,8],[7,8],[1,2]] +[1000,1,2,3,4,5] + +== table +== table with constants +-- arrayRotateLeft +[3,4,5,6,1,2] +[3,4,5,6,1,2] +[3,4,1,2] +[15,16,23,42,4,8] +[18,28,18,28,45,90,45,2,7] +[159,26,5,3,14] +-- arrayRotateRight +[5,6,1,2,3,4] +[5,6,1,2,3,4] +[3,4,1,2] +[23,42,4,8,15,16] +[90,45,2,7,18,28,18,28,45] +[26,5,3,14,159] +-- arrayShiftLeft +[4,5,6,0,0,0] +[4,5,6,0,0,0] +[4,0,0,0] +[16,23,42,0,0,0] +[28,18,28,45,90,45,0,0,0] +[26,5,0,0,0] +-- arrayShiftRight +[0,0,0,1,2,3] +[0,0,0,1,2,3] +[0,0,0,1] +[0,0,0,4,8,15] +[0,0,0,2,7,18,28,18,28] +[0,0,0,3,14] +== table with constants and defaults +-- arrayShiftLeft +[4,5,6,7,7,7] +[4,5,6,7,7,7] +[4,7,7,7] +[16,23,42,7,7,7] +[28,18,28,45,90,45,7,7,7] +[26,5,7,7,7] +-- arrayShiftRight +[7,7,7,1,2,3] +[7,7,7,1,2,3] +[7,7,7,1] +[7,7,7,4,8,15] +[7,7,7,2,7,18,28,18,28] +[7,7,7,3,14] +== table values +-- arrayRotateLeft +[3,4,5,6,1,2] +[4,5,6,1,2,3] +[4,1,2,3] +[42,4,8,15,16,23] +[90,45,2,7,18,28,18,28,45] +[14,159,26,5,3] +-- arrayRotateRight +[5,6,1,2,3,4] +[4,5,6,1,2,3] +[2,3,4,1] +[8,15,16,23,42,4] +[18,28,18,28,45,90,45,2,7] +[5,3,14,159,26] +-- arrayShiftLeft +[3,4,5,6,1,1] +[4,5,6,2,2,2] +[4,3,3,3] +[42,4,4,4,4,4] +[90,45,5,5,5,5,5,5,5] +[6,6,6,6,6] +-- arrayShiftRight +[1,1,1,2,3,4] +[2,2,2,1,2,3] +[3,3,3,1] +[4,4,4,4,4,4] +[5,5,5,5,5,5,5,2,7] +[6,6,6,6,6] +== problematic cast cases +[5] +[[]] diff --git a/tests/queries/0_stateless/02845_arrayShiftRotate.sql b/tests/queries/0_stateless/02845_arrayShiftRotate.sql new file mode 100644 index 00000000000..bdb409c3fe5 --- /dev/null +++ b/tests/queries/0_stateless/02845_arrayShiftRotate.sql @@ -0,0 +1,78 @@ +select '== arrayRotateLeft'; +select arrayRotateLeft([1,2,3,4,5], 2); +select arrayRotateLeft([1,2,3,4,5], -2); +select arrayRotateLeft([1,2,3,4,5], 8); +select arrayRotateLeft(['H', 'e', 'l', 'l', 'o'], 2); +select arrayRotateLeft([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 1); +select ''; + +select '== arrayRotateRight'; +select arrayRotateRight([1,2,3,4,5], 2); +select arrayRotateRight([1,2,3,4,5], -2); +select arrayRotateRight([1,2,3,4,5], 8); +select arrayRotateRight(['H', 'e', 'l', 'l', 'o'], 2); +select arrayRotateRight([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 1); +select ''; + +select '== arrayShiftLeft'; +select arrayShiftLeft([1, 2, 3, 4, 5], 3); +select arrayShiftLeft([1, 2, 3, 4, 5], -3); +select arrayShiftLeft([1, 2, 3, 4, 5], 8); +select arrayShiftLeft(['a', 'b', 'c', 'd', 'e'], 3); +select arrayShiftLeft([[1, 2], [3, 4], [5, 6]], 2); +select arrayShiftLeft([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 1); +select arrayShiftLeft([1, 2, 3, 4, 5], 3, 7); +select arrayShiftLeft(['a', 'b', 'c', 'd', 'e'], 3, 'foo'); +select arrayShiftLeft([[1, 2], [3, 4], [5, 6]], 2, [7, 8]); +select arrayShiftLeft(CAST('[1, 2, 3, 4, 5, 6]', 'Array(UInt16)'), 1, 1000); +select ''; + +select '== arrayShiftRight'; +select arrayShiftRight([1, 2, 3, 4, 5], 3); +select arrayShiftRight([1, 2, 3, 4, 5], -3); +select arrayShiftRight([1, 2, 3, 4, 5], 8); +select arrayShiftRight(['a', 'b', 'c', 'd', 'e'], 3); +select arrayShiftRight([[1, 2], [3, 4], [5, 6]], 2); +select arrayShiftRight([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 1); +select arrayShiftRight([1, 2, 3, 4, 5], 3, 7); +select arrayShiftRight(['a', 'b', 'c', 'd', 'e'], 3, 'foo'); +select arrayShiftRight([[1, 2], [3, 4], [5, 6]], 2, [7, 8]); +select arrayShiftRight(CAST('[1, 2, 3, 4, 5, 6]', 'Array(UInt16)'), 1, 1000); +select ''; + +select '== table'; +drop table if exists t02845; +create table t02845 (a Array(UInt8), s Int16, d UInt8) engine = MergeTree order by d; +insert into t02845 values ([1,2,3,4,5,6], 2, 1),([1,2,3,4,5,6], 3, 2),([1,2,3,4], 3, 3),([4,8,15,16,23,42], 5, 4),([2, 7, 18, 28, 18, 28, 45, 90, 45], 7, 5),([3, 14, 159, 26, 5], 11, 6); + +select '== table with constants'; +select '-- arrayRotateLeft'; +select arrayRotateLeft(a, 2) from t02845; +select '-- arrayRotateRight'; +select arrayRotateRight(a, 2) from t02845; +select '-- arrayShiftLeft'; +select arrayShiftLeft(a, 3) from t02845; +select '-- arrayShiftRight'; +select arrayShiftRight(a, 3) from t02845; + +select '== table with constants and defaults'; +select '-- arrayShiftLeft'; +select arrayShiftLeft(a, 3, 7) from t02845; +select '-- arrayShiftRight'; +select arrayShiftRight(a, 3, 7) from t02845; + +select '== table values'; +select '-- arrayRotateLeft'; +select arrayRotateLeft(a, s) from t02845; +select '-- arrayRotateRight'; +select arrayRotateRight(a, s) from t02845; +select '-- arrayShiftLeft'; +select arrayShiftLeft(a, s, d) from t02845; +select '-- arrayShiftRight'; +select arrayShiftRight(a, s, d) from t02845; + +select '== problematic cast cases'; +select arrayShiftLeft([30000], 3, 5); +select arrayShiftLeft([[1]], 3, []); +select arrayShiftLeft(['foo'], 3, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select arrayShiftLeft([1], 3, 'foo'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02845_domain_rfc_support_ipv6.reference b/tests/queries/0_stateless/02845_domain_rfc_support_ipv6.reference new file mode 100644 index 00000000000..26ba1a3422b --- /dev/null +++ b/tests/queries/0_stateless/02845_domain_rfc_support_ipv6.reference @@ -0,0 +1,32 @@ +"2001:db8::1" +"2001:db8::1" +"::200" +"2001:db8::1" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" diff --git a/tests/queries/0_stateless/02845_domain_rfc_support_ipv6.sql b/tests/queries/0_stateless/02845_domain_rfc_support_ipv6.sql new file mode 100644 index 00000000000..e590064af44 --- /dev/null +++ b/tests/queries/0_stateless/02845_domain_rfc_support_ipv6.sql @@ -0,0 +1,33 @@ +SELECT domainRFC('http://[2001:db8::1]:80') FORMAT CSV; +SELECT domainRFC('[2001:db8::1]:80') FORMAT CSV; +SELECT domainRFC('[::200]:80') FORMAT CSV; +SELECT domainRFC('[2001:db8::1]') FORMAT CSV; +-- Does not conform to the IPv6 format. +SELECT domainRFC('[2001db81]:80') FORMAT CSV; +SELECT domainRFC('[20[01:db8::1]:80') FORMAT CSV; +SELECT domainRFC('[20[01:db]8::1]:80') FORMAT CSV; +SELECT domainRFC('[2001:db8::1') FORMAT CSV; +SELECT domainRFC('2001:db8::1]:80') FORMAT CSV; +SELECT domainRFC('[2001db81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db.81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db/81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db?81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db#81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db@81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db;81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db=81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db&81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db~81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db%81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db<81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db>81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db{81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db}81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db|81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db\81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db^81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db 81]:80') FORMAT CSV; +SELECT domainRFC('[[]:80') FORMAT CSV; +SELECT domainRFC('[]]:80') FORMAT CSV; +SELECT domainRFC('[]:80') FORMAT CSV; +SELECT domainRFC('[ ]:80') FORMAT CSV; diff --git a/tests/queries/0_stateless/02845_join_on_cond_sparse.reference b/tests/queries/0_stateless/02845_join_on_cond_sparse.reference new file mode 100644 index 00000000000..9e4a1331412 --- /dev/null +++ b/tests/queries/0_stateless/02845_join_on_cond_sparse.reference @@ -0,0 +1 @@ +0 1 0 0 diff --git a/tests/queries/0_stateless/02845_join_on_cond_sparse.sql b/tests/queries/0_stateless/02845_join_on_cond_sparse.sql new file mode 100644 index 00000000000..b70419af057 --- /dev/null +++ b/tests/queries/0_stateless/02845_join_on_cond_sparse.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 ( id UInt32, attr UInt32 ) ENGINE = MergeTree ORDER BY id +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.1; + +INSERT INTO t1 VALUES (0, 0); + +CREATE TABLE t2 ( id UInt32, attr UInt32 ) ENGINE = MergeTree ORDER BY id +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.1; + +INSERT INTO t2 VALUES (0, 0); + +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND t1.attr != 0; + +INSERT INTO t1 VALUES (0, 1); + +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND t1.attr != 0; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02845_parquet_odd_decimals.reference b/tests/queries/0_stateless/02845_parquet_odd_decimals.reference new file mode 100644 index 00000000000..29d6383b52c --- /dev/null +++ b/tests/queries/0_stateless/02845_parquet_odd_decimals.reference @@ -0,0 +1 @@ +100 diff --git a/tests/queries/0_stateless/02845_parquet_odd_decimals.sh b/tests/queries/0_stateless/02845_parquet_odd_decimals.sh new file mode 100755 index 00000000000..f1e2ec849c4 --- /dev/null +++ b/tests/queries/0_stateless/02845_parquet_odd_decimals.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# 9-byte decimals produced by spark in integration test test_storage_delta/test.py::test_single_log_file + +${CLICKHOUSE_CLIENT} --query="drop table if exists 02845_parquet_odd_decimals" +${CLICKHOUSE_CLIENT} --query="create table 02845_parquet_odd_decimals (\`col-1de12c05-5dd5-4fa7-9f93-33c43c9a4028\` Decimal(20, 0), \`col-5e1b97f1-dade-4c7d-b71b-e31d789e01a4\` String) engine Memory" +${CLICKHOUSE_CLIENT} --query="insert into 02845_parquet_odd_decimals from infile '$CUR_DIR/data_parquet/nine_byte_decimals_from_spark.parquet'" +${CLICKHOUSE_CLIENT} --query="select count() from 02845_parquet_odd_decimals" diff --git a/tests/queries/0_stateless/02845_table_function_hdfs_filter_by_virtual_columns.reference b/tests/queries/0_stateless/02845_table_function_hdfs_filter_by_virtual_columns.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/02845_table_function_hdfs_filter_by_virtual_columns.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/02845_table_function_hdfs_filter_by_virtual_columns.sh b/tests/queries/0_stateless/02845_table_function_hdfs_filter_by_virtual_columns.sh new file mode 100755 index 00000000000..50afe5173c5 --- /dev/null +++ b/tests/queries/0_stateless/02845_table_function_hdfs_filter_by_virtual_columns.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, use-hdfs + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "insert into table function hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data1.tsv') select 1 settings hdfs_truncate_on_insert=1;" +$CLICKHOUSE_CLIENT -q "insert into table function hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data2.tsv') select 2 settings hdfs_truncate_on_insert=1;" +$CLICKHOUSE_CLIENT -q "insert into table function hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data3.tsv') select 3 settings hdfs_truncate_on_insert=1;" + + +$CLICKHOUSE_CLIENT --print-profile-events -q "select * from hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data*.tsv', auto, 'x UInt64') where _file like '%data1%' format Null" 2>&1 | grep -F -c "EngineFileLikeReadFiles: 1" + +$CLICKHOUSE_CLIENT --print-profile-events -q "select * from hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data*.tsv', auto, 'x UInt64') where _path like '%data1%' format Null" 2>&1 | grep -F -c "EngineFileLikeReadFiles: 1" diff --git a/tests/queries/0_stateless/02860_distributed_flush_on_detach.reference b/tests/queries/0_stateless/02860_distributed_flush_on_detach.reference new file mode 100644 index 00000000000..8e5815188e7 --- /dev/null +++ b/tests/queries/0_stateless/02860_distributed_flush_on_detach.reference @@ -0,0 +1,27 @@ +-- { echoOn } + +create table data (key Int) engine=Memory(); +create table dist (key Int) engine=Distributed(default, currentDatabase(), data); +system stop distributed sends dist; +-- check that FLUSH DISTRIBUTED does flushing anyway +insert into dist values (1); +select * from data; +system flush distributed dist; +select * from data; +1 +truncate table data; +-- check that flush_on_detach=1 by default +insert into dist values (1); +detach table dist; +select * from data; +1 +attach table dist; +truncate table data; +-- check flush_on_detach=0 +drop table dist; +create table dist (key Int) engine=Distributed(default, currentDatabase(), data) settings flush_on_detach=0; +system stop distributed sends dist; +insert into dist values (1); +detach table dist; +select * from data; +attach table dist; diff --git a/tests/queries/0_stateless/02860_distributed_flush_on_detach.sql b/tests/queries/0_stateless/02860_distributed_flush_on_detach.sql new file mode 100644 index 00000000000..5ba45d72c08 --- /dev/null +++ b/tests/queries/0_stateless/02860_distributed_flush_on_detach.sql @@ -0,0 +1,33 @@ +set prefer_localhost_replica=0; + +drop table if exists data; +drop table if exists dist; + +-- { echoOn } + +create table data (key Int) engine=Memory(); +create table dist (key Int) engine=Distributed(default, currentDatabase(), data); +system stop distributed sends dist; + +-- check that FLUSH DISTRIBUTED does flushing anyway +insert into dist values (1); +select * from data; +system flush distributed dist; +select * from data; +truncate table data; + +-- check that flush_on_detach=1 by default +insert into dist values (1); +detach table dist; +select * from data; +attach table dist; +truncate table data; + +-- check flush_on_detach=0 +drop table dist; +create table dist (key Int) engine=Distributed(default, currentDatabase(), data) settings flush_on_detach=0; +system stop distributed sends dist; +insert into dist values (1); +detach table dist; +select * from data; +attach table dist; diff --git a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference new file mode 100644 index 00000000000..428ba88bff0 --- /dev/null +++ b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference @@ -0,0 +1,8 @@ +1 +1 +1 +1 1 +1 +1 +1 +1 1 diff --git a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql new file mode 100644 index 00000000000..a5ddf830d48 --- /dev/null +++ b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS t1; + +CREATE TABLE t1 (key UInt8) ENGINE = Memory; +INSERT INTO t1 VALUES (1),(2); + +SET join_algorithm = 'full_sorting_merge'; + +SELECT key FROM ( SELECT key FROM t1 ) AS t1 JOIN ( SELECT key FROM t1 ) AS t2 ON t1.key = t2.key WHERE key; +SELECT key FROM ( SELECT 1 AS key ) AS t1 JOIN ( SELECT 1 AS key ) AS t2 ON t1.key = t2.key WHERE key; +SELECT * FROM ( SELECT 1 AS key GROUP BY NULL ) AS t1 INNER JOIN (SELECT 1 AS key) AS t2 ON t1.key = t2.key WHERE t1.key ORDER BY key; + +SET max_rows_in_set_to_optimize_join = 0; + +SELECT key FROM ( SELECT key FROM t1 ) AS t1 JOIN ( SELECT key FROM t1 ) AS t2 ON t1.key = t2.key WHERE key; +SELECT key FROM ( SELECT 1 AS key ) AS t1 JOIN ( SELECT 1 AS key ) AS t2 ON t1.key = t2.key WHERE key; +SELECT * FROM ( SELECT 1 AS key GROUP BY NULL ) AS t1 INNER JOIN (SELECT 1 AS key) AS t2 ON t1.key = t2.key WHERE t1.key ORDER BY key; + +DROP TABLE IF EXISTS t1; diff --git a/tests/queries/0_stateless/02861_index_set_incorrect_args.reference b/tests/queries/0_stateless/02861_index_set_incorrect_args.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02861_index_set_incorrect_args.sql b/tests/queries/0_stateless/02861_index_set_incorrect_args.sql new file mode 100644 index 00000000000..fa51f5c9abc --- /dev/null +++ b/tests/queries/0_stateless/02861_index_set_incorrect_args.sql @@ -0,0 +1,6 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/52019 +DROP TABLE IF EXISTS set_index__fuzz_41; +CREATE TABLE set_index__fuzz_41 (`a` Date, `b` Nullable(DateTime64(3)), INDEX b_set b TYPE set(0) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO set_index__fuzz_41 (a) VALUES (today()); +SELECT b FROM set_index__fuzz_41 WHERE and(b = 256) SETTINGS force_data_skipping_indices = 'b_set', optimize_move_to_prewhere = 0, max_parallel_replicas=2, parallel_replicas_for_non_replicated_merge_tree=1, allow_experimental_parallel_reading_from_replicas=2, use_hedged_requests=0; -- { serverError TOO_FEW_ARGUMENTS_FOR_FUNCTION } +DROP TABLE set_index__fuzz_41; diff --git a/tests/queries/0_stateless/02861_interpolate_alias_precedence.reference b/tests/queries/0_stateless/02861_interpolate_alias_precedence.reference new file mode 100644 index 00000000000..0e207969f7c --- /dev/null +++ b/tests/queries/0_stateless/02861_interpolate_alias_precedence.reference @@ -0,0 +1,8 @@ +2023-05-15 1 +2023-05-16 1 +2023-05-17 1 +2023-05-18 1 +2023-05-19 1 +2023-05-20 1 +2023-05-21 1 +2023-05-22 15 diff --git a/tests/queries/0_stateless/02861_interpolate_alias_precedence.sql b/tests/queries/0_stateless/02861_interpolate_alias_precedence.sql new file mode 100644 index 00000000000..dc96b9c57c7 --- /dev/null +++ b/tests/queries/0_stateless/02861_interpolate_alias_precedence.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS 02861_interpolate; + +CREATE TABLE 02861_interpolate (date Date, id String, f Int16) ENGINE=MergeTree() ORDER BY (date); +INSERT INTO 02861_interpolate VALUES ('2023-05-15', '1', 1), ('2023-05-22', '1', 15); + +SELECT date AS d, toNullable(f) AS f FROM 02861_interpolate WHERE id = '1' ORDER BY d ASC WITH FILL STEP toIntervalDay(1) INTERPOLATE (f); + +DROP TABLE 02861_interpolate; diff --git a/tests/queries/0_stateless/02861_join_on_nullsafe_compare.reference.j2 b/tests/queries/0_stateless/02861_join_on_nullsafe_compare.reference.j2 new file mode 100644 index 00000000000..d97d6c2b314 --- /dev/null +++ b/tests/queries/0_stateless/02861_join_on_nullsafe_compare.reference.j2 @@ -0,0 +1,671 @@ +{% for join_algorithm in ['default', 'grace_hash', 'full_sorting_merge'] -%} +join_algorithm = {{ join_algorithm }}, join_use_nulls = 0, t1 JOIN t2 +-- +\N 0 2 2 +\N 0 6 6 +\N 0 10 10 +\N 0 14 14 +\N 0 18 18 +1 1 1 1 +\N 2 \N 4 +\N 2 \N 8 +\N 2 \N 12 +\N 2 \N 16 +\N 2 \N 20 +3 3 3 3 +\N 4 \N 4 +\N 4 \N 8 +\N 4 \N 12 +\N 4 \N 16 +\N 4 \N 20 +5 5 5 5 +\N 6 \N 4 +\N 6 \N 8 +\N 6 \N 12 +\N 6 \N 16 +\N 6 \N 20 +7 7 7 7 +\N 8 \N 4 +\N 8 \N 8 +\N 8 \N 12 +\N 8 \N 16 +\N 8 \N 20 +9 9 9 9 +\N 10 \N 4 +\N 10 \N 8 +\N 10 \N 12 +\N 10 \N 16 +\N 10 \N 20 +11 11 11 11 +\N 12 \N 4 +\N 12 \N 8 +\N 12 \N 12 +\N 12 \N 16 +\N 12 \N 20 +13 13 13 13 +\N 14 \N 4 +\N 14 \N 8 +\N 14 \N 12 +\N 14 \N 16 +\N 14 \N 20 +15 15 15 15 +\N 16 \N 4 +\N 16 \N 8 +\N 16 \N 12 +\N 16 \N 16 +\N 16 \N 20 +17 17 17 17 +\N 18 \N 4 +\N 18 \N 8 +\N 18 \N 12 +\N 18 \N 16 +\N 18 \N 20 +19 19 19 19 +\N 20 \N 4 +\N 20 \N 8 +\N 20 \N 12 +\N 20 \N 16 +\N 20 \N 20 +-- +\N \N 0 2 2 2 +\N \N 0 3 \N 3 +\N \N 0 6 \N 6 +\N \N 0 9 \N 9 +\N \N 0 10 10 10 +\N \N 0 14 14 14 +\N \N 0 15 \N 15 +\N \N 0 18 \N 18 +1 1 1 1 1 1 +\N 2 2 \N \N 0 +3 3 3 \N \N 0 +\N 4 4 \N 4 4 +5 5 5 5 5 5 +\N \N 6 \N \N 12 +7 7 7 7 7 7 +\N 8 8 \N 8 8 +9 9 9 \N \N 0 +\N 10 10 \N \N 0 +11 11 11 11 11 11 +\N \N 12 \N \N 12 +13 13 13 13 13 13 +\N 14 14 \N \N 0 +15 15 15 \N \N 0 +\N 16 16 \N 16 16 +17 17 17 17 17 17 +\N \N 18 \N \N 12 +19 19 19 19 19 19 +\N 20 20 \N 20 20 +-- +\N \N 0 2 2 2 +\N \N 0 3 \N 3 +\N \N 0 \N 4 4 +\N \N 0 6 \N 6 +\N \N 0 \N 8 8 +\N \N 0 9 \N 9 +\N \N 0 10 10 10 +\N \N 0 \N \N 12 +\N \N 0 14 14 14 +\N \N 0 15 \N 15 +\N \N 0 \N 16 16 +\N \N 0 18 \N 18 +\N \N 0 \N 20 20 +1 1 1 1 1 1 +\N 2 2 \N \N 0 +3 3 3 \N \N 0 +\N 4 4 \N \N 0 +5 5 5 5 5 5 +\N \N 6 \N \N 0 +7 7 7 7 7 7 +\N 8 8 \N \N 0 +9 9 9 \N \N 0 +\N 10 10 \N \N 0 +11 11 11 11 11 11 +\N \N 12 \N \N 0 +13 13 13 13 13 13 +\N 14 14 \N \N 0 +15 15 15 \N \N 0 +\N 16 16 \N \N 0 +17 17 17 17 17 17 +\N \N 18 \N \N 0 +19 19 19 19 19 19 +\N 20 20 \N \N 0 +-- +\N \N 0 2 2 2 +\N \N 0 3 \N 3 +\N \N 0 6 \N 6 +\N \N 0 9 \N 9 +\N \N 0 10 10 10 +\N \N 0 \N \N 12 +\N \N 0 14 14 14 +\N \N 0 15 \N 15 +\N \N 0 18 \N 18 +1 1 1 1 1 1 +\N 2 2 \N \N 0 +3 3 3 \N \N 0 +\N 4 4 \N 4 4 +5 5 5 5 5 5 +\N \N 6 \N \N 0 +7 7 7 7 7 7 +\N 8 8 \N 8 8 +9 9 9 \N \N 0 +\N 10 10 \N \N 0 +11 11 11 11 11 11 +\N \N 12 \N \N 0 +13 13 13 13 13 13 +\N 14 14 \N \N 0 +15 15 15 \N \N 0 +\N 16 16 \N 16 16 +17 17 17 17 17 17 +\N \N 18 \N \N 0 +19 19 19 19 19 19 +\N 20 20 \N 20 20 +{% endfor -%} +join_algorithm = default, join_use_nulls = 1, t1 JOIN t2 +-- +\N \N 2 2 +\N \N 6 6 +\N \N 10 10 +\N \N 14 14 +\N \N 18 18 +1 1 1 1 +\N 2 \N 4 +\N 2 \N 8 +\N 2 \N 12 +\N 2 \N 16 +\N 2 \N 20 +3 3 3 3 +\N 4 \N 4 +\N 4 \N 8 +\N 4 \N 12 +\N 4 \N 16 +\N 4 \N 20 +5 5 5 5 +\N 6 \N 4 +\N 6 \N 8 +\N 6 \N 12 +\N 6 \N 16 +\N 6 \N 20 +7 7 7 7 +\N 8 \N 4 +\N 8 \N 8 +\N 8 \N 12 +\N 8 \N 16 +\N 8 \N 20 +9 9 9 9 +\N 10 \N 4 +\N 10 \N 8 +\N 10 \N 12 +\N 10 \N 16 +\N 10 \N 20 +11 11 11 11 +\N 12 \N 4 +\N 12 \N 8 +\N 12 \N 12 +\N 12 \N 16 +\N 12 \N 20 +13 13 13 13 +\N 14 \N 4 +\N 14 \N 8 +\N 14 \N 12 +\N 14 \N 16 +\N 14 \N 20 +15 15 15 15 +\N 16 \N 4 +\N 16 \N 8 +\N 16 \N 12 +\N 16 \N 16 +\N 16 \N 20 +17 17 17 17 +\N 18 \N 4 +\N 18 \N 8 +\N 18 \N 12 +\N 18 \N 16 +\N 18 \N 20 +19 19 19 19 +\N 20 \N 4 +\N 20 \N 8 +\N 20 \N 12 +\N 20 \N 16 +\N 20 \N 20 +-- +\N \N \N 2 2 2 +\N \N \N 3 \N 3 +\N \N \N 6 \N 6 +\N \N \N 9 \N 9 +\N \N \N 10 10 10 +\N \N \N 14 14 14 +\N \N \N 15 \N 15 +\N \N \N 18 \N 18 +1 1 1 1 1 1 +\N 2 2 \N \N \N +3 3 3 \N \N \N +\N 4 4 \N 4 4 +5 5 5 5 5 5 +\N \N 6 \N \N 12 +7 7 7 7 7 7 +\N 8 8 \N 8 8 +9 9 9 \N \N \N +\N 10 10 \N \N \N +11 11 11 11 11 11 +\N \N 12 \N \N 12 +13 13 13 13 13 13 +\N 14 14 \N \N \N +15 15 15 \N \N \N +\N 16 16 \N 16 16 +17 17 17 17 17 17 +\N \N 18 \N \N 12 +19 19 19 19 19 19 +\N 20 20 \N 20 20 +-- +\N \N \N 2 2 2 +\N \N \N 3 \N 3 +\N \N \N \N 4 4 +\N \N \N 6 \N 6 +\N \N \N \N 8 8 +\N \N \N 9 \N 9 +\N \N \N 10 10 10 +\N \N \N \N \N 12 +\N \N \N 14 14 14 +\N \N \N 15 \N 15 +\N \N \N \N 16 16 +\N \N \N 18 \N 18 +\N \N \N \N 20 20 +1 1 1 1 1 1 +\N 2 2 \N \N \N +3 3 3 \N \N \N +\N 4 4 \N \N \N +5 5 5 5 5 5 +\N \N 6 \N \N \N +7 7 7 7 7 7 +\N 8 8 \N \N \N +9 9 9 \N \N \N +\N 10 10 \N \N \N +11 11 11 11 11 11 +\N \N 12 \N \N \N +13 13 13 13 13 13 +\N 14 14 \N \N \N +15 15 15 \N \N \N +\N 16 16 \N \N \N +17 17 17 17 17 17 +\N \N 18 \N \N \N +19 19 19 19 19 19 +\N 20 20 \N \N \N +-- +\N \N \N 2 2 2 +\N \N \N 3 \N 3 +\N \N \N 6 \N 6 +\N \N \N 9 \N 9 +\N \N \N 10 10 10 +\N \N \N \N \N 12 +\N \N \N 14 14 14 +\N \N \N 15 \N 15 +\N \N \N 18 \N 18 +1 1 1 1 1 1 +\N 2 2 \N \N \N +3 3 3 \N \N \N +\N 4 4 \N 4 4 +5 5 5 5 5 5 +\N \N 6 \N \N \N +7 7 7 7 7 7 +\N 8 8 \N 8 8 +9 9 9 \N \N \N +\N 10 10 \N \N \N +11 11 11 11 11 11 +\N \N 12 \N \N \N +13 13 13 13 13 13 +\N 14 14 \N \N \N +15 15 15 \N \N \N +\N 16 16 \N 16 16 +17 17 17 17 17 17 +\N \N 18 \N \N \N +19 19 19 19 19 19 +\N 20 20 \N 20 20 +join_algorithm = default, join_use_nulls = 0, t1 JOIN t3 +-- +\N 0 2 2 +\N 0 6 6 +\N 0 10 10 +\N 0 14 14 +\N 0 18 18 +1 1 1 1 +\N 2 \N 4 +\N 2 \N 8 +\N 2 \N 12 +\N 2 \N 16 +\N 2 \N 20 +3 3 3 3 +\N 4 \N 4 +\N 4 \N 8 +\N 4 \N 12 +\N 4 \N 16 +\N 4 \N 20 +5 5 5 5 +\N 6 \N 4 +\N 6 \N 8 +\N 6 \N 12 +\N 6 \N 16 +\N 6 \N 20 +7 7 7 7 +\N 8 \N 4 +\N 8 \N 8 +\N 8 \N 12 +\N 8 \N 16 +\N 8 \N 20 +9 9 9 9 +\N 10 \N 4 +\N 10 \N 8 +\N 10 \N 12 +\N 10 \N 16 +\N 10 \N 20 +11 11 11 11 +\N 12 \N 4 +\N 12 \N 8 +\N 12 \N 12 +\N 12 \N 16 +\N 12 \N 20 +13 13 13 13 +\N 14 \N 4 +\N 14 \N 8 +\N 14 \N 12 +\N 14 \N 16 +\N 14 \N 20 +15 15 15 15 +\N 16 \N 4 +\N 16 \N 8 +\N 16 \N 12 +\N 16 \N 16 +\N 16 \N 20 +17 17 17 17 +\N 18 \N 4 +\N 18 \N 8 +\N 18 \N 12 +\N 18 \N 16 +\N 18 \N 20 +19 19 19 19 +\N 20 \N 4 +\N 20 \N 8 +\N 20 \N 12 +\N 20 \N 16 +\N 20 \N 20 +-- +\N \N 0 2 2 2 +\N \N 0 3 0 3 +\N \N 0 6 0 6 +\N \N 0 9 0 9 +\N \N 0 10 10 10 +\N \N 0 \N 0 12 +\N \N 0 14 14 14 +\N \N 0 15 0 15 +\N \N 0 18 0 18 +1 1 1 1 1 1 +\N 2 2 \N 0 0 +3 3 3 \N 0 0 +\N 4 4 \N 4 4 +5 5 5 5 5 5 +\N \N 6 \N 0 0 +7 7 7 7 7 7 +\N 8 8 \N 8 8 +9 9 9 \N 0 0 +\N 10 10 \N 0 0 +11 11 11 11 11 11 +\N \N 12 \N 0 0 +13 13 13 13 13 13 +\N 14 14 \N 0 0 +15 15 15 \N 0 0 +\N 16 16 \N 16 16 +17 17 17 17 17 17 +\N \N 18 \N 0 0 +19 19 19 19 19 19 +\N 20 20 \N 20 20 +-- +\N \N 0 2 2 2 +\N \N 0 3 0 3 +\N \N 0 \N 4 4 +\N \N 0 6 0 6 +\N \N 0 \N 8 8 +\N \N 0 9 0 9 +\N \N 0 10 10 10 +\N \N 0 \N 0 12 +\N \N 0 14 14 14 +\N \N 0 15 0 15 +\N \N 0 \N 16 16 +\N \N 0 18 0 18 +\N \N 0 \N 20 20 +1 1 1 1 1 1 +\N 2 2 \N 0 0 +3 3 3 \N 0 0 +\N 4 4 \N 0 0 +5 5 5 5 5 5 +\N \N 6 \N 0 0 +7 7 7 7 7 7 +\N 8 8 \N 0 0 +9 9 9 \N 0 0 +\N 10 10 \N 0 0 +11 11 11 11 11 11 +\N \N 12 \N 0 0 +13 13 13 13 13 13 +\N 14 14 \N 0 0 +15 15 15 \N 0 0 +\N 16 16 \N 0 0 +17 17 17 17 17 17 +\N \N 18 \N 0 0 +19 19 19 19 19 19 +\N 20 20 \N 0 0 +-- +\N \N 0 2 2 2 +\N \N 0 3 0 3 +\N \N 0 6 0 6 +\N \N 0 9 0 9 +\N \N 0 10 10 10 +\N \N 0 \N 0 12 +\N \N 0 14 14 14 +\N \N 0 15 0 15 +\N \N 0 18 0 18 +1 1 1 1 1 1 +\N 2 2 \N 0 0 +3 3 3 \N 0 0 +\N 4 4 \N 4 4 +5 5 5 5 5 5 +\N \N 6 \N 0 0 +7 7 7 7 7 7 +\N 8 8 \N 8 8 +9 9 9 \N 0 0 +\N 10 10 \N 0 0 +11 11 11 11 11 11 +\N \N 12 \N 0 0 +13 13 13 13 13 13 +\N 14 14 \N 0 0 +15 15 15 \N 0 0 +\N 16 16 \N 16 16 +17 17 17 17 17 17 +\N \N 18 \N 0 0 +19 19 19 19 19 19 +\N 20 20 \N 20 20 +join_algorithm = default, join_use_nulls = 0, t1 JOIN t4 +-- +\N 0 2 2 +\N 0 0 4 +\N 0 6 6 +\N 0 0 8 +\N 0 10 10 +\N 0 0 12 +\N 0 14 14 +\N 0 0 16 +\N 0 18 18 +\N 0 0 20 +1 1 1 1 +\N 2 0 0 +3 3 3 3 +\N 4 0 0 +5 5 5 5 +\N 6 0 0 +7 7 7 7 +\N 8 0 0 +9 9 9 9 +\N 10 0 0 +11 11 11 11 +\N 12 0 0 +13 13 13 13 +\N 14 0 0 +15 15 15 15 +\N 16 0 0 +17 17 17 17 +\N 18 0 0 +19 19 19 19 +\N 20 0 0 +-- +\N \N 0 2 2 2 +\N \N 0 3 0 3 +\N \N 0 0 4 4 +\N \N 0 6 0 6 +\N \N 0 0 8 8 +\N \N 0 9 0 9 +\N \N 0 10 10 10 +\N \N 0 0 0 12 +\N \N 0 14 14 14 +\N \N 0 15 0 15 +\N \N 0 0 16 16 +\N \N 0 18 0 18 +\N \N 0 0 20 20 +1 1 1 1 1 1 +\N 2 2 0 0 0 +3 3 3 0 0 0 +\N 4 4 0 0 0 +5 5 5 5 5 5 +\N \N 6 0 0 0 +7 7 7 7 7 7 +\N 8 8 0 0 0 +9 9 9 0 0 0 +\N 10 10 0 0 0 +11 11 11 11 11 11 +\N \N 12 0 0 0 +13 13 13 13 13 13 +\N 14 14 0 0 0 +15 15 15 0 0 0 +\N 16 16 0 0 0 +17 17 17 17 17 17 +\N \N 18 0 0 0 +19 19 19 19 19 19 +\N 20 20 0 0 0 +-- +\N \N 0 2 2 2 +\N \N 0 3 0 3 +\N \N 0 0 4 4 +\N \N 0 6 0 6 +\N \N 0 0 8 8 +\N \N 0 9 0 9 +\N \N 0 10 10 10 +\N \N 0 0 0 12 +\N \N 0 14 14 14 +\N \N 0 15 0 15 +\N \N 0 0 16 16 +\N \N 0 18 0 18 +\N \N 0 0 20 20 +1 1 1 1 1 1 +\N 2 2 0 0 0 +3 3 3 0 0 0 +\N 4 4 0 0 0 +5 5 5 5 5 5 +\N \N 6 0 0 0 +7 7 7 7 7 7 +\N 8 8 0 0 0 +9 9 9 0 0 0 +\N 10 10 0 0 0 +11 11 11 11 11 11 +\N \N 12 0 0 0 +13 13 13 13 13 13 +\N 14 14 0 0 0 +15 15 15 0 0 0 +\N 16 16 0 0 0 +17 17 17 17 17 17 +\N \N 18 0 0 0 +19 19 19 19 19 19 +\N 20 20 0 0 0 +-- +\N \N 0 2 2 2 +\N \N 0 3 0 3 +\N \N 0 0 4 4 +\N \N 0 6 0 6 +\N \N 0 0 8 8 +\N \N 0 9 0 9 +\N \N 0 10 10 10 +\N \N 0 0 0 12 +\N \N 0 14 14 14 +\N \N 0 15 0 15 +\N \N 0 0 16 16 +\N \N 0 18 0 18 +\N \N 0 0 20 20 +1 1 1 1 1 1 +\N 2 2 0 0 0 +3 3 3 0 0 0 +\N 4 4 0 0 0 +5 5 5 5 5 5 +\N \N 6 0 0 0 +7 7 7 7 7 7 +\N 8 8 0 0 0 +9 9 9 0 0 0 +\N 10 10 0 0 0 +11 11 11 11 11 11 +\N \N 12 0 0 0 +13 13 13 13 13 13 +\N 14 14 0 0 0 +15 15 15 0 0 0 +\N 16 16 0 0 0 +17 17 17 17 17 17 +\N \N 18 0 0 0 +19 19 19 19 19 19 +\N 20 20 0 0 0 +-- +\N 0 2 2 +\N 0 \N 4 +\N 0 6 6 +\N 0 \N 8 +\N 0 10 10 +\N 0 \N 12 +\N 0 14 14 +\N 0 \N 16 +\N 0 18 18 +\N 0 \N 20 +1 1 1 1 +\N 2 \N 0 +3 3 3 3 +\N 4 \N 0 +5 5 5 5 +\N 6 \N 0 +7 7 7 7 +\N 8 \N 0 +9 9 9 9 +\N 10 \N 0 +11 11 11 11 +\N 12 \N 0 +13 13 13 13 +\N 14 \N 0 +15 15 15 15 +\N 16 \N 0 +17 17 17 17 +\N 18 \N 0 +19 19 19 19 +\N 20 \N 0 +-- +1 42 420 1 1 43 430 1 +\N 42 420 2 \N 43 430 4 +\N 42 420 2 \N 43 430 8 +\N 42 420 2 \N 43 430 12 +\N 42 420 2 \N 43 430 16 +\N 42 420 2 \N 43 430 20 +3 42 420 3 3 43 430 3 +\N 42 420 4 \N 43 430 4 +\N 42 420 4 \N 43 430 8 +\N 42 420 4 \N 43 430 12 +-- +1 42 420 1 1 43 430 1 +\N 42 420 2 \N 43 430 4 +\N 42 420 2 \N 43 430 8 +\N 42 420 2 \N 43 430 12 +\N 42 420 2 \N 43 430 16 +\N 42 420 2 \N 43 430 20 +3 42 420 3 3 43 430 3 +\N 42 420 4 \N 43 430 4 +\N 42 420 4 \N 43 430 8 +\N 42 420 4 \N 43 430 12 +-- diff --git a/tests/queries/0_stateless/02861_join_on_nullsafe_compare.sql.j2 b/tests/queries/0_stateless/02861_join_on_nullsafe_compare.sql.j2 new file mode 100644 index 00000000000..64960d2b2e5 --- /dev/null +++ b/tests/queries/0_stateless/02861_join_on_nullsafe_compare.sql.j2 @@ -0,0 +1,101 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (a Nullable(UInt32), b Nullable(Int16), val UInt32) ENGINE = MergeTree ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 1; +INSERT INTO t1 SELECT if(number % 2 == 0, NULL, number), if(number % 6 == 0, NULL, number), number, FROM numbers(1, 20); + +CREATE TABLE t2 (a Nullable(UInt32), b Nullable(UInt16), val UInt32) ENGINE = MergeTree ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 1; +INSERT INTO t2 SELECT if(number % 4 == 0, NULL, number), if(number % 3 == 0, NULL, number), number, FROM numbers(1, 20); + +CREATE TABLE t3 (a Nullable(UInt32), b UInt16, val UInt32) ENGINE = MergeTree ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 1; +INSERT INTO t3 SELECT if(number % 4 == 0, NULL, number), if(number % 3 == 0, NULL, number), number, FROM numbers(1, 20); + +CREATE TABLE t4 (a UInt32, b UInt16, val UInt32) ENGINE = MergeTree ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 1; +INSERT INTO t4 SELECT if(number % 4 == 0, NULL, number), if(number % 3 == 0, NULL, number), number, FROM numbers(1, 20); + +{% for join_algorithm, join_use_nulls, t1, t2 in [ + ('default', 0, 't1', 't2'), + ('grace_hash', 0, 't1', 't2'), + ('full_sorting_merge', 0, 't1', 't2'), + ('default', 1, 't1', 't2'), + ('default', 0, 't1', 't3'), + ('default', 0, 't1', 't4'), +] -%} + +SET join_algorithm = '{{ join_algorithm }}'; +SET join_use_nulls = {{ join_use_nulls }}; + +SELECT 'join_algorithm = {{ join_algorithm }}, join_use_nulls = {{ join_use_nulls }}, {{ t1 }} JOIN {{ t2 }}'; + +SELECT '--'; + +SELECT {{ t1 }}.a, {{ t1 }}.val, {{ t2 }}.a, {{ t2 }}.val FROM {{ t1 }} FULL JOIN {{ t2 }} +ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a) +ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST +; + +SELECT '--'; + +SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }} +ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a) AND isNotDistinctFrom({{ t1 }}.b, {{ t2 }}.b) +ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST +; + +SELECT '--'; + +SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }} +ON {{ t1 }}.a == {{ t2 }}.a AND isNotDistinctFrom({{ t1 }}.b, {{ t2 }}.b) +ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST +; + +SELECT '--'; + +SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }} +ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a) AND {{ t1 }}.b == {{ t2 }}.b +ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST +; + +{% endfor -%} + +SELECT '--'; + +SET join_use_nulls = 0; +SET join_algorithm = 'hash'; +SELECT t1.a, t1.val, t2.a, t2.val FROM t1 FULL JOIN t2 +ON isNotDistinctFrom(t1.a, t2.a) AND t1.b < 2 OR t1.a == t2.a +ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST +; + +SELECT '--'; + +SET join_algorithm = 'default'; +SET join_use_nulls = 1; + +-- try to cause column name clash intentionally using internal name + +SELECT * +FROM (SELECT a, 42 as `__wrapNullsafe(a)`, 420 as `tuple(a)`, val FROM t1) t1 +JOIN (SELECT a, 43 as `__wrapNullsafe(t2.a)`, 430 as `tuple(t2.a)`, val FROM t2) t2 +ON isNotDistinctFrom(t1.a, t2.a) +ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST +LIMIT 10; + +SELECT '--'; + +SELECT a, 42 as `__wrapNullsafe(a)`, 420 as `tuple(a)`, val, t2.a, 43 as `__wrapNullsafe(t2.a)`, 430 as `tuple(t2.a)`, t2.val +FROM (SELECT a, val, 111 as `__wrapNullsafe(a)_0` FROM t1) t1 +JOIN (SELECT a, val, 111 as `__wrapNullsafe(t2.a)_0` FROM t2) t2 +ON isNotDistinctFrom(t1.a, t2.a) +ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST +LIMIT 10; + +SELECT '--'; + +-- check illegal queries + +SELECT * FROM t1 JOIN t2 ON isNotDistinctFrom(); -- { serverError SYNTAX_ERROR,NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT * FROM t1 JOIN t2 ON isNotDistinctFrom(t1.a); -- { serverError SYNTAX_ERROR,NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT * FROM t1 JOIN t2 ON isNotDistinctFrom(t1.a, t2.a, t2.b); -- { serverError SYNTAX_ERROR,NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT isNotDistinctFrom(a) from t1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT isNotDistinctFrom(a, b) from t1; -- { serverError NOT_IMPLEMENTED } diff --git a/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.reference b/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.reference new file mode 100644 index 00000000000..9c9caa22139 --- /dev/null +++ b/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.reference @@ -0,0 +1,13 @@ +== Only last version remains after OPTIMIZE W/ CLEANUP == +d1 5 0 +d2 1 0 +d3 1 0 +d4 1 0 +d5 1 0 +d6 3 0 +== OPTIMIZE W/ CLEANUP (remove d6) == +d1 5 0 +d2 1 0 +d3 1 0 +d4 1 0 +d5 1 0 diff --git a/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.sql b/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.sql new file mode 100644 index 00000000000..7b78e2900e7 --- /dev/null +++ b/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) SETTINGS vertical_merge_algorithm_min_rows_to_activate = 1, + vertical_merge_algorithm_min_columns_to_activate = 0, + min_rows_for_wide_part = 1, + min_bytes_for_wide_part = 1; + +-- Expect d6 to be version=3 is_deleted=false +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 0); +-- Insert previous version of 'd6' but only v=3 is_deleted=false will remain +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 2, 1); +SELECT '== Only last version remains after OPTIMIZE W/ CLEANUP =='; +OPTIMIZE TABLE test FINAL CLEANUP; +select * from test order by uid; + +-- insert d6 v=3 is_deleted=true (timestamp more recent so this version should be the one take into acount) +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 1); + +SELECT '== OPTIMIZE W/ CLEANUP (remove d6) =='; +OPTIMIZE TABLE test FINAL CLEANUP; +-- No d6 anymore +select * from test order by uid; + +DROP TABLE IF EXISTS test; diff --git a/tests/queries/0_stateless/02861_uuid_format_serialization.reference b/tests/queries/0_stateless/02861_uuid_format_serialization.reference new file mode 100644 index 00000000000..4c6b4cd21e8 Binary files /dev/null and b/tests/queries/0_stateless/02861_uuid_format_serialization.reference differ diff --git a/tests/queries/0_stateless/02861_uuid_format_serialization.sql b/tests/queries/0_stateless/02861_uuid_format_serialization.sql new file mode 100644 index 00000000000..e73ef2d5197 --- /dev/null +++ b/tests/queries/0_stateless/02861_uuid_format_serialization.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS t_uuid; +CREATE TABLE t_uuid (x UUID) ENGINE=MergeTree ORDER BY x; + +INSERT INTO t_uuid VALUES ('61f0c404-5cb3-11e7-907b-a6006ad3dba0'), ('992f6910-42b2-43cd-98bc-c812fbf9b683'), ('417ddc5d-e556-4d27-95dd-a34d84e46a50'); + +SELECT * FROM t_uuid ORDER BY x LIMIT 1 FORMAT RowBinary; +SELECT * FROM t_uuid ORDER BY x FORMAT RowBinary; + +DROP TABLE IF EXISTS t_uuid; diff --git a/tests/queries/0_stateless/02862_index_inverted_incorrect_args.reference b/tests/queries/0_stateless/02862_index_inverted_incorrect_args.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02862_index_inverted_incorrect_args.sql b/tests/queries/0_stateless/02862_index_inverted_incorrect_args.sql new file mode 100644 index 00000000000..0678023f2f4 --- /dev/null +++ b/tests/queries/0_stateless/02862_index_inverted_incorrect_args.sql @@ -0,0 +1,9 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/52019 +DROP TABLE IF EXISTS tab; +SET allow_experimental_inverted_index=1; +CREATE TABLE tab (`k` UInt64, `s` Map(String, String), INDEX af mapKeys(s) TYPE inverted(2) GRANULARITY 1) ENGINE = MergeTree ORDER BY k SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi'; +INSERT INTO tab (k) VALUES (0); +SELECT * FROM tab PREWHERE (s[NULL]) = 'Click a03' SETTINGS allow_experimental_analyzer=1; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM tab PREWHERE (s[1]) = 'Click a03' SETTINGS allow_experimental_analyzer=1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT * FROM tab PREWHERE (s['foo']) = 'Click a03' SETTINGS allow_experimental_analyzer=1; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02862_sorted_distinct_sparse_fix.reference b/tests/queries/0_stateless/02862_sorted_distinct_sparse_fix.reference new file mode 100644 index 00000000000..3a17878aea7 --- /dev/null +++ b/tests/queries/0_stateless/02862_sorted_distinct_sparse_fix.reference @@ -0,0 +1,13 @@ +-- { echoOn } +SELECT name, column, serialization_kind +FROM system.parts_columns +WHERE table = 't_sparse_distinct' AND database = currentDatabase() AND column = 'v' +ORDER BY name; +all_1_1_0 v Default +all_2_2_0 v Sparse +set optimize_distinct_in_order=1; +set max_threads=1; +select trimLeft(explain) from (explain pipeline SELECT DISTINCT id, v FROM t_sparse_distinct) where explain ilike '%DistinctSortedChunkTransform%'; +DistinctSortedChunkTransform +SELECT DISTINCT id, v FROM t_sparse_distinct format Null; +DROP TABLE t_sparse_distinct; diff --git a/tests/queries/0_stateless/02862_sorted_distinct_sparse_fix.sql b/tests/queries/0_stateless/02862_sorted_distinct_sparse_fix.sql new file mode 100644 index 00000000000..2bcdb3d43ff --- /dev/null +++ b/tests/queries/0_stateless/02862_sorted_distinct_sparse_fix.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS t_sparse_distinct; + +CREATE TABLE t_sparse_distinct (id UInt32, v String) +ENGINE = MergeTree +ORDER BY id +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; + +SYSTEM STOP MERGES t_sparse_distinct; + +INSERT INTO t_sparse_distinct SELECT number % 10, toString(number % 100 = 0) FROM numbers(100); +INSERT INTO t_sparse_distinct(id) SELECT number % 10 FROM numbers(100); + +-- { echoOn } +SELECT name, column, serialization_kind +FROM system.parts_columns +WHERE table = 't_sparse_distinct' AND database = currentDatabase() AND column = 'v' +ORDER BY name; + +set optimize_distinct_in_order=1; +set max_threads=1; + +select trimLeft(explain) from (explain pipeline SELECT DISTINCT id, v FROM t_sparse_distinct) where explain ilike '%DistinctSortedChunkTransform%'; +SELECT DISTINCT id, v FROM t_sparse_distinct format Null; + +DROP TABLE t_sparse_distinct; diff --git a/tests/queries/0_stateless/02862_uuid_reinterpret_as_numeric.reference b/tests/queries/0_stateless/02862_uuid_reinterpret_as_numeric.reference new file mode 100644 index 00000000000..a874ad9ebc5 --- /dev/null +++ b/tests/queries/0_stateless/02862_uuid_reinterpret_as_numeric.reference @@ -0,0 +1,5 @@ +61f0c404-5cb3-11e7-907b-a6006ad3dba0 +403229640000000000 6.034192082918747e163 +-25 4583 1555239399 7057356139103719911 -148231516101255056243829344033567469081 192050850819683407219545263398200742375 +231 4583 1555239399 7057356139103719911 192050850819683407219545263398200742375 192050850819683407219545263398200742375 +00000000-5cb3-11e7-0000-000000000000 diff --git a/tests/queries/0_stateless/02862_uuid_reinterpret_as_numeric.sql b/tests/queries/0_stateless/02862_uuid_reinterpret_as_numeric.sql new file mode 100644 index 00000000000..d6369835f04 --- /dev/null +++ b/tests/queries/0_stateless/02862_uuid_reinterpret_as_numeric.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t_uuid; +CREATE TABLE t_uuid (x UUID) ENGINE=MergeTree ORDER BY x; + +INSERT INTO t_uuid VALUES ('61f0c404-5cb3-11e7-907b-a6006ad3dba0'); + +SELECT reinterpretAsUUID(x) FROM t_uuid; +SELECT reinterpretAsFloat32(x), reinterpretAsFloat64(x) FROM t_uuid; +SELECT reinterpretAsInt8(x), reinterpretAsInt16(x), reinterpretAsInt32(x), reinterpretAsInt64(x), reinterpretAsInt128(x), reinterpretAsInt256(x) FROM t_uuid; +SELECT reinterpretAsUInt8(x), reinterpretAsUInt16(x), reinterpretAsUInt32(x), reinterpretAsUInt64(x), reinterpretAsUInt128(x), reinterpretAsUInt256(x) FROM t_uuid; + +SELECT reinterpretAsUUID(reinterpretAsUInt128(reinterpretAsUInt32(reinterpretAsUInt256(x)))) FROM t_uuid; + +DROP TABLE IF EXISTS t_uuid; diff --git a/tests/queries/0_stateless/02863_decode_html_component.reference b/tests/queries/0_stateless/02863_decode_html_component.reference new file mode 100644 index 00000000000..3749c14591a --- /dev/null +++ b/tests/queries/0_stateless/02863_decode_html_component.reference @@ -0,0 +1,22 @@ +Hello, "world"! +<123> +&clickhouse +\'foo\' +Hello, && world +Hello, &;& world +Hello, &a;& world +Hello, <t;& world +Hello, <t& world +Hello, &t;& world + !"#$%&\'()*+,-./012 +)*+,-./0123456789:;< +=>?@ABCDEFGHIJKLMNOP +为 +为 +�\'123 +ЦЦЮЮЫㄱ +C𝓁𝒾𝒸𝓀𝐻𝑜𝓊𝓈𝑒 +C𝓁𝒾𝒸𝓀𝐻𝑜𝓊𝓈𝑒 +C𝓁𝒾𝒸𝓀𝐻𝑜𝓊𝓈𝑒{ + +C diff --git a/tests/queries/0_stateless/02863_decode_html_component.sql b/tests/queries/0_stateless/02863_decode_html_component.sql new file mode 100644 index 00000000000..0eb4653e38b --- /dev/null +++ b/tests/queries/0_stateless/02863_decode_html_component.sql @@ -0,0 +1,24 @@ +SELECT decodeHTMLComponent('Hello, "world"!'); +SELECT decodeHTMLComponent('<123>'); +SELECT decodeHTMLComponent('&clickhouse'); +SELECT decodeHTMLComponent(''foo''); +SELECT decodeHTMLComponent('Hello, && world'); +SELECT decodeHTMLComponent('Hello, &;& world'); +SELECT decodeHTMLComponent('Hello, &a;& world'); +SELECT decodeHTMLComponent('Hello, <t;& world'); +SELECT decodeHTMLComponent('Hello, <t& world'); +SELECT decodeHTMLComponent('Hello, &t;& world'); + +SELECT decodeHTMLComponent(' !"#$%&'()*+,-./012'); +SELECT decodeHTMLComponent(')*+,-./0123456789:;<'); +SELECT decodeHTMLComponent('=>?@ABCDEFGHIJKLMNOP'); +SELECT decodeHTMLComponent('为'); +SELECT decodeHTMLComponent('为'); +SELECT decodeHTMLComponent('�'123'); +SELECT decodeHTMLComponent('ЦЦЮЮЫㄱ'); +SELECT decodeHTMLComponent('C𝓁𝒾𝒸𝓀𝐻𝑜𝓊𝓈𝑒'); +SELECT decodeHTMLComponent('C𝓁𝒾𝒸𝓀𝐻𝑜𝓊𝓈𝑒'); +SELECT decodeHTMLComponent('C𝓁𝒾𝒸𝓀𝐻𝑜𝓊𝓈𝑒{'); +SELECT decodeHTMLComponent(''); +SELECT decodeHTMLComponent('C'); + diff --git a/tests/queries/0_stateless/02863_delayed_source_with_totals_and_extremes.reference b/tests/queries/0_stateless/02863_delayed_source_with_totals_and_extremes.reference new file mode 100644 index 00000000000..6ef9d9b6eeb --- /dev/null +++ b/tests/queries/0_stateless/02863_delayed_source_with_totals_and_extremes.reference @@ -0,0 +1,27 @@ +3 + +3 + +3 +3 +1 +{ + "meta": + [ + { + "name": "sum(a)", + "type": "Int64" + } + ], + + "data": + [ + { + "sum(a)": "1" + } + ], + + "rows": 1, + + "rows_before_limit_at_least": 2 +} diff --git a/tests/queries/0_stateless/02863_delayed_source_with_totals_and_extremes.sql b/tests/queries/0_stateless/02863_delayed_source_with_totals_and_extremes.sql new file mode 100644 index 00000000000..9269df8b51e --- /dev/null +++ b/tests/queries/0_stateless/02863_delayed_source_with_totals_and_extremes.sql @@ -0,0 +1,17 @@ +-- Tags: no-parallel +-- Tag no-parallel: failpoint is used which can force DelayedSource on other tests + +DROP TABLE IF EXISTS 02863_delayed_source; + +CREATE TABLE 02863_delayed_source(a Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02863_delayed_source/{replica}', 'r1') ORDER BY a; +INSERT INTO 02863_delayed_source VALUES (1), (2); + +SYSTEM ENABLE FAILPOINT use_delayed_remote_source; + +SELECT sum(a) FROM remote('127.0.0.4', currentDatabase(), '02863_delayed_source') WITH TOTALS SETTINGS extremes = 1; +SELECT max(explain like '%Delayed%') FROM (EXPLAIN PIPELINE graph=1 SELECT sum(a) FROM remote('127.0.0.4', currentDatabase(), '02863_delayed_source') WITH TOTALS SETTINGS extremes = 1); +SELECT sum(a) FROM remote('127.0.0.4', currentDatabase(), '02863_delayed_source') GROUP BY a ORDER BY a LIMIT 1 FORMAT JSON settings output_format_write_statistics=0; + +SYSTEM DISABLE FAILPOINT use_delayed_remote_source; + +DROP TABLE 02863_delayed_source; \ No newline at end of file diff --git a/tests/queries/0_stateless/02863_ignore_foreign_keys_in_tables_definition.reference b/tests/queries/0_stateless/02863_ignore_foreign_keys_in_tables_definition.reference new file mode 100644 index 00000000000..6e82dd5d023 --- /dev/null +++ b/tests/queries/0_stateless/02863_ignore_foreign_keys_in_tables_definition.reference @@ -0,0 +1,3 @@ +CREATE TABLE default.child\n(\n `id` Int32,\n `pid` Int32\n)\nENGINE = MergeTree\nPRIMARY KEY id\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.child2\n(\n `id` Int32,\n `pid` Int32\n)\nENGINE = MergeTree\nPRIMARY KEY id\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.child3\n(\n `id` Int32,\n `pid` Int32\n)\nENGINE = MergeTree\nPRIMARY KEY id\nORDER BY id\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02863_ignore_foreign_keys_in_tables_definition.sql b/tests/queries/0_stateless/02863_ignore_foreign_keys_in_tables_definition.sql new file mode 100644 index 00000000000..2d814643116 --- /dev/null +++ b/tests/queries/0_stateless/02863_ignore_foreign_keys_in_tables_definition.sql @@ -0,0 +1,29 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/53380 + + +drop table if exists parent; +drop table if exists child; + +create table parent (id int, primary key(id)) engine MergeTree; +create table child (id int, pid int, primary key(id), foreign key(pid)) engine MergeTree; -- { clientError SYNTAX_ERROR } +create table child (id int, pid int, primary key(id), foreign key(pid) references) engine MergeTree; -- { clientError SYNTAX_ERROR } +create table child (id int, pid int, primary key(id), foreign key(pid) references parent(pid)) engine MergeTree; + +show create table child; + +create table child2 (id int, pid int, primary key(id), + foreign key(pid) references parent(pid) on delete) engine MergeTree; -- { clientError SYNTAX_ERROR } +create table child2 (id int, pid int, primary key(id), + foreign key(pid) references parent(pid) on delete cascade) engine MergeTree; + +show create table child2; + +create table child3 (id int, pid int, primary key(id), + foreign key(pid) references parent(pid) on delete cascade on update restrict) engine MergeTree; + +show create table child3; + +drop table child3; +drop table child2; +drop table child; +drop table parent; \ No newline at end of file diff --git a/tests/queries/0_stateless/02863_interpolate_subquery.reference b/tests/queries/0_stateless/02863_interpolate_subquery.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02863_interpolate_subquery.sql b/tests/queries/0_stateless/02863_interpolate_subquery.sql new file mode 100644 index 00000000000..4d8ba5f9cb2 --- /dev/null +++ b/tests/queries/0_stateless/02863_interpolate_subquery.sql @@ -0,0 +1,7 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/53640 +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (i UInt32, a UInt32) ENGINE=Memory; +SELECT i, col1 FROM ( + SELECT i, a AS col1, a AS col2 FROM tab ORDER BY i WITH FILL INTERPOLATE (col1 AS col1+col2, col2) +); +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02863_mutation_where_in_set_result_cache_pipeline_stuck_bug.reference b/tests/queries/0_stateless/02863_mutation_where_in_set_result_cache_pipeline_stuck_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02863_mutation_where_in_set_result_cache_pipeline_stuck_bug.sql b/tests/queries/0_stateless/02863_mutation_where_in_set_result_cache_pipeline_stuck_bug.sql new file mode 100644 index 00000000000..4c30795758e --- /dev/null +++ b/tests/queries/0_stateless/02863_mutation_where_in_set_result_cache_pipeline_stuck_bug.sql @@ -0,0 +1,10 @@ +drop table if exists tab; +create table tab (x UInt32, y UInt32) engine = MergeTree order by x; + +insert into tab select number, number from numbers(10); +insert into tab select number, number from numbers(20); + +set mutations_sync=2; + +alter table tab delete where x > 1000 and y in (select sum(number + 1) from numbers_mt(1e7) group by number % 2 with totals); +drop table if exists tab; diff --git a/tests/queries/0_stateless/02863_non_const_timezone_check.reference b/tests/queries/0_stateless/02863_non_const_timezone_check.reference new file mode 100644 index 00000000000..7efacab94d3 --- /dev/null +++ b/tests/queries/0_stateless/02863_non_const_timezone_check.reference @@ -0,0 +1,20 @@ +173000 +083000 +173000 +083000 +173000 +173000 +083000 +173000 +083000 +173000 +2023-08-25 17:30:00 +2023-08-25 08:30:00 +2023-08-25 17:30:00 +2023-08-25 08:30:00 +2023-08-25 17:30:00 +2023-08-25 17:30:00 +2023-08-25 08:30:00 +2023-08-25 17:30:00 +2023-08-25 08:30:00 +2023-08-25 17:30:00 diff --git a/tests/queries/0_stateless/02863_non_const_timezone_check.sql b/tests/queries/0_stateless/02863_non_const_timezone_check.sql new file mode 100644 index 00000000000..4cb5457ae80 --- /dev/null +++ b/tests/queries/0_stateless/02863_non_const_timezone_check.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS Dates; + +CREATE TABLE Dates (date DateTime('UTC')) ENGINE = MergeTree() ORDER BY date; + +INSERT INTO Dates VALUES ('2023-08-25 15:30:00'); + +SELECT formatDateTime((SELECT date FROM Dates), '%H%i%S', number % 2 ? 'America/Los_Angeles' : 'Europe/Amsterdam') FROM numbers(5); + +SELECT formatDateTime((SELECT materialize(date) FROM Dates), '%H%i%S', number % 2 ? 'America/Los_Angeles' : 'Europe/Amsterdam') FROM numbers(5); + +SELECT formatDateTime((SELECT materialize(date) FROM Dates), '%H%i%S', number % 2 ? '' : 'Europe/Amsterdam') FROM numbers(5); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} + +SELECT toString((SELECT date FROM Dates), number % 2 ? 'America/Los_Angeles' : 'Europe/Amsterdam') FROM numbers(5); + +SELECT toString((SELECT materialize(date) FROM Dates), number % 2 ? 'America/Los_Angeles' : 'Europe/Amsterdam') FROM numbers(5); + +SELECT toString((SELECT materialize(date) FROM Dates), number % 2 ? 'America/Los_Angeles' : '') FROM numbers(5); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} + +DROP TABLE Dates; diff --git a/tests/queries/0_stateless/02864_filtered_url_with_globs.reference b/tests/queries/0_stateless/02864_filtered_url_with_globs.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02864_filtered_url_with_globs.sql b/tests/queries/0_stateless/02864_filtered_url_with_globs.sql new file mode 100644 index 00000000000..e952f63af04 --- /dev/null +++ b/tests/queries/0_stateless/02864_filtered_url_with_globs.sql @@ -0,0 +1,3 @@ +SELECT * FROM url('http://127.0.0.1:8123?query=select+{1,2}+as+x+format+TSV', 'TSV') WHERE 0; +SELECT _path FROM url('http://127.0.0.1:8123?query=select+{1,2}+as+x+format+TSV', 'TSV') WHERE 0; + diff --git a/tests/queries/0_stateless/02864_profile_event_part_lock.reference b/tests/queries/0_stateless/02864_profile_event_part_lock.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02864_profile_event_part_lock.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02864_profile_event_part_lock.sql b/tests/queries/0_stateless/02864_profile_event_part_lock.sql new file mode 100644 index 00000000000..2b2ac7b5512 --- /dev/null +++ b/tests/queries/0_stateless/02864_profile_event_part_lock.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS random_mt; + +CREATE TABLE random_mt +( + key UInt64, + value String +) +ENGINE MergeTree() +ORDER BY tuple(); + +INSERT INTO random_mt VALUES (1, 'Hello'); + +SELECT any(value > 0) from system.events WHERE event = 'PartsLockHoldMicroseconds' or event = 'PartsLockWaitMicroseconds'; + +DROP TABLE IF EXISTS random_mt; + diff --git a/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.reference b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.reference new file mode 100644 index 00000000000..0b4c65a3a45 --- /dev/null +++ b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.reference @@ -0,0 +1,4 @@ +REPLACE empty partition with duplicated parts +0 +4 8 +4 8 diff --git a/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh new file mode 100755 index 00000000000..edfed206d87 --- /dev/null +++ b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Tags: zookeeper, no-s3-storage + +# Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem +# (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) +# and when we do DETACH TABLE / ATTACH TABLE or SYSTEM RESTART REPLICA, these files may be discovered +# and discarded after restart with Warning/Error messages in log. This is Ok. +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r1;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r2;" + +$CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE dst_r1 (p UInt64, k String, d UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst_1', '1') PARTITION BY p ORDER BY k +SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE dst_r2 (p UInt64, k String, d UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst_1', '2') PARTITION BY p ORDER BY k +SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" + +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (0, '0', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1), (1, '0', 3);" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1), (1, '0', 3);" + +$CLICKHOUSE_CLIENT --query="SELECT 'REPLACE empty partition with duplicated parts';" +$CLICKHOUSE_CLIENT --query="ALTER TABLE dst_r1 REPLACE PARTITION 1 FROM src;" +echo $? + +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r1;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r2;" diff --git a/tests/queries/0_stateless/02864_restore_table_with_broken_part.reference b/tests/queries/0_stateless/02864_restore_table_with_broken_part.reference new file mode 100644 index 00000000000..9a8dcda81df --- /dev/null +++ b/tests/queries/0_stateless/02864_restore_table_with_broken_part.reference @@ -0,0 +1,5 @@ +data.bin doesn't exist: while restoring part all_2_2_0 +RESTORED +1 +3 +broken-from-backup_all_2_2_0 broken-from-backup diff --git a/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh b/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh new file mode 100755 index 00000000000..cf99c7e9284 --- /dev/null +++ b/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Copy backups/with_broken_part.zip into the disk named "backups". +SRC_BACKUP_DIR=$CURDIR/backups +SRC_BACKUP_FILENAME=with_broken_part.zip + +BACKUPS_DISK=backups +BACKUPS_DIR=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='$BACKUPS_DISK'") + +if [ -z "$BACKUPS_DIR" ]; then + echo Disk \'$BACKUPS_DISK\' not found + exit 1 +fi + +BACKUP_FILENAME=$CLICKHOUSE_DATABASE/${SRC_BACKUP_FILENAME} +BACKUP_NAME="Disk('$BACKUPS_DISK', '$BACKUP_FILENAME')" + +mkdir -p "$(dirname "$BACKUPS_DIR/$BACKUP_FILENAME")" +ln -s "$SRC_BACKUP_DIR/$SRC_BACKUP_FILENAME" "$BACKUPS_DIR/$BACKUP_FILENAME" + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tbl" + +# First try to restore with the setting `restore_broken_parts_as_detached` set to false. +$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM $BACKUP_NAME" 2>&1 | grep -o -m 1 "data.bin doesn't exist: while restoring part all_2_2_0" + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tbl" + +# Then try to restore with the setting `restore_broken_parts_as_detached` set to true. +$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM $BACKUP_NAME SETTINGS restore_broken_parts_as_detached = true" 2>/dev/null | awk -F '\t' '{print $2}' + +$CLICKHOUSE_CLIENT --multiquery < 1) == 1; +EXPLAIN AST SELECT 1 <=> 1 == 1; + +-- EXPLAIN AST SELECT (1 == 1) <=> 1; +EXPLAIN AST SELECT 1 == 1 <=> 1; diff --git a/tests/queries/0_stateless/02868_select_support_from_keywords.reference b/tests/queries/0_stateless/02868_select_support_from_keywords.reference new file mode 100644 index 00000000000..d2dcb047cf0 --- /dev/null +++ b/tests/queries/0_stateless/02868_select_support_from_keywords.reference @@ -0,0 +1 @@ +CREATE VIEW default.test_view\n(\n `date` Date,\n `__sign` Int8,\n `from` Float64,\n `to` Float64\n) AS\nWITH cte AS\n (\n SELECT\n date,\n __sign,\n from,\n to\n FROM default.test_table\n FINAL\n )\nSELECT\n date,\n __sign,\n from,\n to\nFROM cte diff --git a/tests/queries/0_stateless/02868_select_support_from_keywords.sql b/tests/queries/0_stateless/02868_select_support_from_keywords.sql new file mode 100644 index 00000000000..dc06651a8eb --- /dev/null +++ b/tests/queries/0_stateless/02868_select_support_from_keywords.sql @@ -0,0 +1,5 @@ +create table test_table ( `date` Date, `__sign` Int8, `from` Float64, `to` Float64 ) ENGINE = CollapsingMergeTree(__sign) PARTITION BY toYYYYMM(date) ORDER BY (date) SETTINGS index_granularity = 8192; +create VIEW test_view AS WITH cte AS (SELECT date, __sign, "from", "to" FROM test_table FINAL) SELECT date, __sign, "from", "to" FROM cte; +show create table test_view; +drop table test_view; +drop table test_table; diff --git a/tests/queries/0_stateless/02869_gcd_codec_test_incorrect_type.reference b/tests/queries/0_stateless/02869_gcd_codec_test_incorrect_type.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02869_gcd_codec_test_incorrect_type.sql b/tests/queries/0_stateless/02869_gcd_codec_test_incorrect_type.sql new file mode 100644 index 00000000000..61a964a288f --- /dev/null +++ b/tests/queries/0_stateless/02869_gcd_codec_test_incorrect_type.sql @@ -0,0 +1,2 @@ +DROP TABLE IF EXISTS table_gcd_codec; +CREATE TABLE table_gcd_codec (str String CODEC(GCD, LZ4)) ENGINE = Memory; -- { serverError 36 } diff --git a/tests/queries/0_stateless/02869_http_headers_elapsed_ns.reference b/tests/queries/0_stateless/02869_http_headers_elapsed_ns.reference new file mode 100644 index 00000000000..f89715e69fd --- /dev/null +++ b/tests/queries/0_stateless/02869_http_headers_elapsed_ns.reference @@ -0,0 +1,2 @@ +elapsed_ns in progress are all non zero +elapsed_ns in summary is not zero diff --git a/tests/queries/0_stateless/02869_http_headers_elapsed_ns.sh b/tests/queries/0_stateless/02869_http_headers_elapsed_ns.sh new file mode 100755 index 00000000000..df6302bb82a --- /dev/null +++ b/tests/queries/0_stateless/02869_http_headers_elapsed_ns.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +CURL_OUTPUT=$(echo 'SELECT number FROM numbers(10)' | \ + ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" --data-binary @- 2>&1) + +ELAPSED_NS_PROGRESS="$(echo "${CURL_OUTPUT}" | \ + grep 'X-ClickHouse-Progress' | \ + awk '{print $3}' | \ + jq -cM '.elapsed_ns | tonumber' + )" + +ELAPSED_NS_SUMMARY="$(echo "${CURL_OUTPUT}" | \ + grep 'X-ClickHouse-Summary' | \ + awk '{print $3}' | \ + jq -cM '.elapsed_ns | tonumber' + )" + + +ALL_ARE_NON_ZERO=1 +while read -r line; do + if [ "$line" -eq 0 ]; then + ALL_ARE_NON_ZERO=0 + break + fi +done <<< "$ELAPSED_NS_PROGRESS" + +if [ "$ALL_ARE_NON_ZERO" -eq 1 ] && [ "$(echo "$ELAPSED_NS_SUMMARY" | wc -l)" -gt 0 ]; then + echo "elapsed_ns in progress are all non zero" +else + echo "elapsed_ns in progress are all zero!" +fi + +if [ "$ELAPSED_NS_SUMMARY" -ne 0 ]; +then + echo "elapsed_ns in summary is not zero" +else + echo "elapsed_ns in summary is zero!" +fi diff --git a/tests/queries/0_stateless/02869_parallel_replicas_read_from_several.reference b/tests/queries/0_stateless/02869_parallel_replicas_read_from_several.reference new file mode 100644 index 00000000000..54a12c21822 --- /dev/null +++ b/tests/queries/0_stateless/02869_parallel_replicas_read_from_several.reference @@ -0,0 +1 @@ +9000 0 8999 4499.5 diff --git a/tests/queries/0_stateless/02869_parallel_replicas_read_from_several.sql b/tests/queries/0_stateless/02869_parallel_replicas_read_from_several.sql new file mode 100644 index 00000000000..9559b46fa08 --- /dev/null +++ b/tests/queries/0_stateless/02869_parallel_replicas_read_from_several.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS t1 SYNC; +DROP TABLE IF EXISTS t2 SYNC; +DROP TABLE IF EXISTS t3 SYNC; + +CREATE TABLE t1(k UInt32, v UInt32) ENGINE ReplicatedMergeTree('/parallel_replicas/{database}/test_tbl', 'r1') ORDER BY k settings index_granularity=10; +CREATE TABLE t2(k UInt32, v UInt32) ENGINE ReplicatedMergeTree('/parallel_replicas/{database}/test_tbl', 'r2') ORDER BY k settings index_granularity=10; +CREATE TABLE t3(k UInt32, v UInt32) ENGINE ReplicatedMergeTree('/parallel_replicas/{database}/test_tbl', 'r3') ORDER BY k settings index_granularity=10; + +insert into t1 select number, number from numbers(1000); +insert into t1 select number, number from numbers(1000, 1000); +insert into t1 select number, number from numbers(2000, 1000); + +insert into t2 select number, number from numbers(3000, 1000); +insert into t2 select number, number from numbers(4000, 1000); +insert into t2 select number, number from numbers(5000, 1000); + +insert into t3 select number, number from numbers(6000, 1000); +insert into t3 select number, number from numbers(7000, 1000); +insert into t3 select number, number from numbers(8000, 1000); + +system sync replica t1; +system sync replica t2; +system sync replica t3; + +SELECT count(), min(k), max(k), avg(k) +FROM t1 +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, prefer_localhost_replica = 0, use_hedged_requests=0, + cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost', parallel_replicas_single_task_marks_count_multiplier = 0.001; diff --git a/tests/queries/0_stateless/02870_move_partition_to_volume_io_throttling.reference b/tests/queries/0_stateless/02870_move_partition_to_volume_io_throttling.reference new file mode 100644 index 00000000000..49dfe275166 --- /dev/null +++ b/tests/queries/0_stateless/02870_move_partition_to_volume_io_throttling.reference @@ -0,0 +1,3 @@ +default tuple() 1000000 +Alter 1 +s3_disk tuple() 1000000 diff --git a/tests/queries/0_stateless/02870_move_partition_to_volume_io_throttling.sql b/tests/queries/0_stateless/02870_move_partition_to_volume_io_throttling.sql new file mode 100644 index 00000000000..b03d9849a80 --- /dev/null +++ b/tests/queries/0_stateless/02870_move_partition_to_volume_io_throttling.sql @@ -0,0 +1,12 @@ +-- Tags: no-random-merge-tree-settings, no-fasttest, no-replicated-database +-- Tag: no-fasttest -- requires S3 +-- Tag: no-replicated-database -- ALTER MOVE PARTITION TO should not be replicated (will be fixed separatelly) + +CREATE TABLE test_move_partition_throttling (key UInt64 CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple() SETTINGS storage_policy='local_remote'; +INSERT INTO test_move_partition_throttling SELECT number FROM numbers(1e6); +SELECT disk_name, partition, rows FROM system.parts WHERE database = currentDatabase() AND table = 'test_move_partition_throttling' and active; +ALTER TABLE test_move_partition_throttling MOVE PARTITION tuple() TO VOLUME 'remote' SETTINGS max_remote_write_network_bandwidth=1600000; +SYSTEM FLUSH LOGS; +-- (8e6-1600000)/1600000=4.0 +SELECT query_kind, query_duration_ms>4e3 FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND query_kind = 'Alter'; +SELECT disk_name, partition, rows FROM system.parts WHERE database = currentDatabase() AND table = 'test_move_partition_throttling' and active; diff --git a/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.reference b/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.reference new file mode 100644 index 00000000000..7b36cc96f5e --- /dev/null +++ b/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.reference @@ -0,0 +1,20 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh b/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh new file mode 100755 index 00000000000..cc4ce9b122e --- /dev/null +++ b/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# head by default print 10 rows, but it is not enough to query 11 rows, since +# we need to overflow the default pipe size, hence just 1 million of rows (it +# should be around 6 MiB in text representation, should be definitelly enough). +$CLICKHOUSE_CLIENT --ignore-error -nm --pager head -q " + select * from numbers(1e6); -- { clientError CANNOT_WRITE_TO_FILE_DESCRIPTOR } + select * from numbers(1e6); -- { clientError CANNOT_WRITE_TO_FILE_DESCRIPTOR } +" + +exit 0 diff --git a/tests/queries/0_stateless/02871_join_on_system_errors.reference b/tests/queries/0_stateless/02871_join_on_system_errors.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02871_join_on_system_errors.sql b/tests/queries/0_stateless/02871_join_on_system_errors.sql new file mode 100644 index 00000000000..ae30ef8f743 --- /dev/null +++ b/tests/queries/0_stateless/02871_join_on_system_errors.sql @@ -0,0 +1,13 @@ + +-- Unique table alias to distinguish between errors from different queries +SELECT * FROM (SELECT 1 as a) t +JOIN (SELECT 2 as a) `89467d35-77c2-4f82-ae7a-f093ff40f4cd` +ON t.a = `89467d35-77c2-4f82-ae7a-f093ff40f4cd`.a +; + +SELECT * +FROM system.errors +WHERE name = 'UNKNOWN_IDENTIFIER' +AND last_error_time > now() - 1 +AND last_error_message LIKE '%Missing columns%89467d35-77c2-4f82-ae7a-f093ff40f4cd%' +; diff --git a/tests/queries/0_stateless/02871_peak_threads_usage.reference b/tests/queries/0_stateless/02871_peak_threads_usage.reference new file mode 100644 index 00000000000..d772a2c4b4e --- /dev/null +++ b/tests/queries/0_stateless/02871_peak_threads_usage.reference @@ -0,0 +1,14 @@ +1 2 1 1 +2 2 1 1 +3 2 1 1 +4 2 1 1 +5 4 1 1 +6 6 1 1 +7 2 1 1 +8 2 1 1 +9 2 1 1 +10 6 1 1 +11 6 1 1 +12 6 1 1 +13 2 1 1 +14 2 1 1 diff --git a/tests/queries/0_stateless/02871_peak_threads_usage.sh b/tests/queries/0_stateless/02871_peak_threads_usage.sh new file mode 100755 index 00000000000..dfb3e665020 --- /dev/null +++ b/tests/queries/0_stateless/02871_peak_threads_usage.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash +# Tags: no-parallel +# Tag no-parallel: Avoid using threads in other parallel queries. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +QUERY_OPTIONS=( + "--log_query_threads=1" + "--log_queries_min_type=QUERY_FINISH" + "--log_queries=1" + "--format=Null" + "--use_concurrency_control=0" +) + +UNIQUE_QUERY_ID="02871_1_$$" + +# TCPHandler and QueryPullPipeEx threads are always part of the query thread group, but those threads are not within the max_threads limit. +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_1" --query='SELECT 1' "${QUERY_OPTIONS[@]}" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_2" --query='SELECT 1 SETTINGS max_threads = 1' "${QUERY_OPTIONS[@]}" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_3" --query='SELECT 1 SETTINGS max_threads = 8' "${QUERY_OPTIONS[@]}" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_4" --query='SELECT * FROM numbers_mt(500000) SETTINGS max_threads = 1' "${QUERY_OPTIONS[@]}" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_5" --query='SELECT * FROM numbers_mt(500000) SETTINGS max_threads = 2' "${QUERY_OPTIONS[@]}" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_6" --query='SELECT * FROM numbers_mt(500000) SETTINGS max_threads = 4' "${QUERY_OPTIONS[@]}" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_7" --query='SELECT * FROM numbers_mt(5000), numbers(5000) SETTINGS max_threads = 1, joined_subquery_requires_alias=0' "${QUERY_OPTIONS[@]}" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_8" --query='SELECT * FROM numbers_mt(5000), numbers(5000) SETTINGS max_threads = 4, joined_subquery_requires_alias=0' "${QUERY_OPTIONS[@]}" + +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_9" -mn --query=""" +SELECT count() FROM + (SELECT number FROM numbers_mt(1,100000) + UNION ALL SELECT number FROM numbers_mt(10000, 200000) + UNION ALL SELECT number FROM numbers_mt(30000, 40000) + UNION ALL SELECT number FROM numbers_mt(30000, 40000) + UNION ALL SELECT number FROM numbers_mt(300000, 400000) + UNION ALL SELECT number FROM numbers_mt(300000, 400000) + UNION ALL SELECT number FROM numbers_mt(300000, 4000000) + UNION ALL SELECT number FROM numbers_mt(300000, 4000000) + ) SETTINGS max_threads = 1""" "${QUERY_OPTIONS[@]}" + +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_10" -mn --query=""" +SELECT count() FROM + (SELECT number FROM numbers_mt(1,100000) + UNION ALL SELECT number FROM numbers_mt(10000, 2000) + UNION ALL SELECT number FROM numbers_mt(30000, 40000) + UNION ALL SELECT number FROM numbers_mt(30000, 40) + UNION ALL SELECT number FROM numbers_mt(300000, 400) + UNION ALL SELECT number FROM numbers_mt(300000, 4000) + UNION ALL SELECT number FROM numbers_mt(300000, 40000) + UNION ALL SELECT number FROM numbers_mt(300000, 4000000) + ) SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}" + +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_11" -mn --query=""" +SELECT count() FROM + (SELECT number FROM numbers_mt(1,100000) + UNION ALL SELECT number FROM numbers_mt(1, 1) + UNION ALL SELECT number FROM numbers_mt(1, 1) + UNION ALL SELECT number FROM numbers_mt(1, 1) + UNION ALL SELECT number FROM numbers_mt(1, 1) + UNION ALL SELECT number FROM numbers_mt(1, 1) + UNION ALL SELECT number FROM numbers_mt(1, 1) + UNION ALL SELECT number FROM numbers_mt(1, 4000000) + ) SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}" + +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_12" -mn --query=""" +SELECT sum(number) FROM numbers_mt(100000) +GROUP BY number % 2 +WITH TOTALS ORDER BY number % 2 +SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}" + +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_13" -mn --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 1" "${QUERY_OPTIONS[@]}" + +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_14" -mn --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 4" "${QUERY_OPTIONS[@]}" + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" +for i in {1..14} +do + ${CLICKHOUSE_CLIENT} -mn --query=""" + SELECT '${i}', + peak_threads_usage, + (select count() from system.query_thread_log WHERE system.query_thread_log.query_id = '${UNIQUE_QUERY_ID}_${i}' AND current_database = currentDatabase()) = length(thread_ids), + length(thread_ids) >= peak_threads_usage + FROM system.query_log + WHERE type = 'QueryFinish' AND query_id = '${UNIQUE_QUERY_ID}_${i}' AND current_database = currentDatabase()" +done diff --git a/tests/queries/0_stateless/02872_gcd_codec.reference b/tests/queries/0_stateless/02872_gcd_codec.reference new file mode 100644 index 00000000000..1dd1b67e047 --- /dev/null +++ b/tests/queries/0_stateless/02872_gcd_codec.reference @@ -0,0 +1,1004 @@ +0 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +1970-01-01 +1970-01-02 +1970-01-03 +1970-01-04 +1970-01-05 +1970-01-06 +1970-01-07 +1970-01-08 +1970-01-09 +1970-01-10 +1970-01-11 +1970-01-12 +1970-01-13 +1970-01-14 +1970-01-15 +1970-01-16 +1970-01-17 +1970-01-18 +1970-01-19 +1970-01-20 +1970-01-21 +1970-01-22 +1970-01-23 +1970-01-24 +1970-01-25 +1970-01-26 +1970-01-27 +1970-01-28 +1970-01-29 +1970-01-30 +1970-01-31 +1970-02-01 +1970-02-02 +1970-02-03 +1970-02-04 +1970-02-05 +1970-02-06 +1970-02-07 +1970-02-08 +1970-02-09 +1970-02-10 +1970-02-11 +1970-02-12 +1970-02-13 +1970-02-14 +1970-02-15 +1970-02-16 +1970-02-17 +1970-02-18 +1970-02-19 +1970-01-01 +1970-01-02 +1970-01-03 +1970-01-04 +1970-01-05 +1970-01-06 +1970-01-07 +1970-01-08 +1970-01-09 +1970-01-10 +1970-01-11 +1970-01-12 +1970-01-13 +1970-01-14 +1970-01-15 +1970-01-16 +1970-01-17 +1970-01-18 +1970-01-19 +1970-01-20 +1970-01-21 +1970-01-22 +1970-01-23 +1970-01-24 +1970-01-25 +1970-01-26 +1970-01-27 +1970-01-28 +1970-01-29 +1970-01-30 +1970-01-31 +1970-02-01 +1970-02-02 +1970-02-03 +1970-02-04 +1970-02-05 +1970-02-06 +1970-02-07 +1970-02-08 +1970-02-09 +1970-02-10 +1970-02-11 +1970-02-12 +1970-02-13 +1970-02-14 +1970-02-15 +1970-02-16 +1970-02-17 +1970-02-18 +1970-02-19 +1970-01-01 02:00:00 +1970-01-01 02:00:01 +1970-01-01 02:00:02 +1970-01-01 02:00:03 +1970-01-01 02:00:04 +1970-01-01 02:00:05 +1970-01-01 02:00:06 +1970-01-01 02:00:07 +1970-01-01 02:00:08 +1970-01-01 02:00:09 +1970-01-01 02:00:10 +1970-01-01 02:00:11 +1970-01-01 02:00:12 +1970-01-01 02:00:13 +1970-01-01 02:00:14 +1970-01-01 02:00:15 +1970-01-01 02:00:16 +1970-01-01 02:00:17 +1970-01-01 02:00:18 +1970-01-01 02:00:19 +1970-01-01 02:00:20 +1970-01-01 02:00:21 +1970-01-01 02:00:22 +1970-01-01 02:00:23 +1970-01-01 02:00:24 +1970-01-01 02:00:25 +1970-01-01 02:00:26 +1970-01-01 02:00:27 +1970-01-01 02:00:28 +1970-01-01 02:00:29 +1970-01-01 02:00:30 +1970-01-01 02:00:31 +1970-01-01 02:00:32 +1970-01-01 02:00:33 +1970-01-01 02:00:34 +1970-01-01 02:00:35 +1970-01-01 02:00:36 +1970-01-01 02:00:37 +1970-01-01 02:00:38 +1970-01-01 02:00:39 +1970-01-01 02:00:40 +1970-01-01 02:00:41 +1970-01-01 02:00:42 +1970-01-01 02:00:43 +1970-01-01 02:00:44 +1970-01-01 02:00:45 +1970-01-01 02:00:46 +1970-01-01 02:00:47 +1970-01-01 02:00:48 +1970-01-01 02:00:49 +1970-01-01 02:00:00.000 +1970-01-01 02:00:01.000 +1970-01-01 02:00:02.000 +1970-01-01 02:00:03.000 +1970-01-01 02:00:04.000 +1970-01-01 02:00:05.000 +1970-01-01 02:00:06.000 +1970-01-01 02:00:07.000 +1970-01-01 02:00:08.000 +1970-01-01 02:00:09.000 +1970-01-01 02:00:10.000 +1970-01-01 02:00:11.000 +1970-01-01 02:00:12.000 +1970-01-01 02:00:13.000 +1970-01-01 02:00:14.000 +1970-01-01 02:00:15.000 +1970-01-01 02:00:16.000 +1970-01-01 02:00:17.000 +1970-01-01 02:00:18.000 +1970-01-01 02:00:19.000 +1970-01-01 02:00:20.000 +1970-01-01 02:00:21.000 +1970-01-01 02:00:22.000 +1970-01-01 02:00:23.000 +1970-01-01 02:00:24.000 +1970-01-01 02:00:25.000 +1970-01-01 02:00:26.000 +1970-01-01 02:00:27.000 +1970-01-01 02:00:28.000 +1970-01-01 02:00:29.000 +1970-01-01 02:00:30.000 +1970-01-01 02:00:31.000 +1970-01-01 02:00:32.000 +1970-01-01 02:00:33.000 +1970-01-01 02:00:34.000 +1970-01-01 02:00:35.000 +1970-01-01 02:00:36.000 +1970-01-01 02:00:37.000 +1970-01-01 02:00:38.000 +1970-01-01 02:00:39.000 +1970-01-01 02:00:40.000 +1970-01-01 02:00:41.000 +1970-01-01 02:00:42.000 +1970-01-01 02:00:43.000 +1970-01-01 02:00:44.000 +1970-01-01 02:00:45.000 +1970-01-01 02:00:46.000 +1970-01-01 02:00:47.000 +1970-01-01 02:00:48.000 +1970-01-01 02:00:49.000 +0 +0 +0 diff --git a/tests/queries/0_stateless/02872_gcd_codec.sql b/tests/queries/0_stateless/02872_gcd_codec.sql new file mode 100644 index 00000000000..245a1211052 --- /dev/null +++ b/tests/queries/0_stateless/02872_gcd_codec.sql @@ -0,0 +1,110 @@ +-- GCD codec can't be used stand-alone +CREATE TEMPORARY TABLE table_gcd_codec (n UInt64 CODEC(GCD)) ENGINE = Memory; -- { serverError BAD_ARGUMENTS } + +-- GCD codec rejects non-integer/decimal/datetime types +CREATE TEMPORARY TABLE table_gcd_codec (str String CODEC(GCD, LZ4)) ENGINE = Memory; -- { serverError BAD_ARGUMENTS } + +-- Basic random-based correctness test +CREATE TEMPORARY TABLE table_lz4 (id UInt64, ui UInt256 CODEC(LZ4)) ENGINE = Memory; +INSERT INTO table_lz4 SELECT * FROM generateRandom() LIMIT 50; + +CREATE TEMPORARY TABLE table_gcd (id UInt64, ui UInt256 CODEC(GCD, LZ4)) ENGINE = Memory; +INSERT INTO table_gcd SELECT * FROM table_lz4; + +SELECT COUNT(*) +FROM ( + SELECT table_lz4.id, table_lz4.ui AS ui1, table_gcd.id, table_gcd.ui AS ui2 + FROM table_lz4 JOIN table_gcd + ON table_lz4.id = table_gcd.id +) +WHERE ui1 != ui2; + +------------------------------------------------------------------------------------------- +-- Compression/decompression works for all data types supported by GCD codec + +-- Int* +CREATE TEMPORARY TABLE table_gcd_codec_uint8 (n UInt8 CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_uint16 (n UInt16 CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_uint32 (n UInt32 CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_uint64 (n UInt64 CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_uint128 (n UInt128 CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_uint256 (n UInt256 CODEC(GCD, LZ4)) ENGINE = Memory; + +INSERT INTO table_gcd_codec_uint8 SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_uint16 SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_uint32 SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_uint64 SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_uint128 SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_uint256 SELECT number FROM system.numbers LIMIT 50; + +SELECT * FROM table_gcd_codec_uint8; +SELECT * FROM table_gcd_codec_uint16; +SELECT * FROM table_gcd_codec_uint32; +SELECT * FROM table_gcd_codec_uint64; +SELECT * FROM table_gcd_codec_uint128; +SELECT * FROM table_gcd_codec_uint256; + +-- UInt* +CREATE TEMPORARY TABLE table_gcd_codec_int8 (n Int8 CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_int16 (n Int16 CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_int32 (n Int32 CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_int64 (n Int64 CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_int128 (n Int128 CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_int256 (n Int256 CODEC(GCD, LZ4)) ENGINE = Memory; + +INSERT INTO table_gcd_codec_int8 SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_int16 SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_int32 SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_int64 SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_int128 SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_int256 SELECT number FROM system.numbers LIMIT 50; + +SELECT * FROM table_gcd_codec_int8; +SELECT * FROM table_gcd_codec_int16; +SELECT * FROM table_gcd_codec_int32; +SELECT * FROM table_gcd_codec_int64; +SELECT * FROM table_gcd_codec_int128; +SELECT * FROM table_gcd_codec_int256; + +-- Decimal* +CREATE TEMPORARY TABLE table_gcd_codec_decimal32 (n Decimal32(1) CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_decimal64 (n Decimal64(1) CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_decimal128 (n Decimal128(1) CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_decimal256 (n Decimal256(1) CODEC(GCD, LZ4)) ENGINE = Memory; + +INSERT INTO table_gcd_codec_decimal32 SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_decimal64 SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_decimal128 SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_decimal256 SELECT number FROM system.numbers LIMIT 50; + +SELECT * FROM table_gcd_codec_decimal32; +SELECT * FROM table_gcd_codec_decimal64; +SELECT * FROM table_gcd_codec_decimal128; +SELECT * FROM table_gcd_codec_decimal256; + +-- Date[32] +CREATE TEMPORARY TABLE table_gcd_codec_date (n Date CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_date32 (n Date32 CODEC(GCD, LZ4)) ENGINE = Memory; + +INSERT INTO table_gcd_codec_date SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_date32 SELECT number FROM system.numbers LIMIT 50; + +SELECT * FROM table_gcd_codec_date; +SELECT * FROM table_gcd_codec_date32; + +-- DateTimeTime[64] +CREATE TEMPORARY TABLE table_gcd_codec_datetime (n DateTime('Asia/Istanbul') CODEC(GCD, LZ4)) ENGINE = Memory; +CREATE TEMPORARY TABLE table_gcd_codec_datetime64 (n DateTime64(3, 'Asia/Istanbul') CODEC(GCD, LZ4)) ENGINE = Memory; + +INSERT INTO table_gcd_codec_datetime SELECT number FROM system.numbers LIMIT 50; +INSERT INTO table_gcd_codec_datetime64 SELECT number FROM system.numbers LIMIT 50; + +SELECT * FROM table_gcd_codec_datetime; +SELECT * FROM table_gcd_codec_datetime64; + + +-- A column with all 0 values can be compressed/decompressed + +CREATE TEMPORARY TABLE table_gcd_codec_only_zero_values (n UInt8 CODEC(GCD, LZ4)) ENGINE = Memory; +INSERT INTO table_gcd_codec_only_zero_values VALUES (0), (0), (0); +SELECT * FROM table_gcd_codec_only_zero_values; diff --git a/tests/queries/0_stateless/02872_null_as_default_nested.reference b/tests/queries/0_stateless/02872_null_as_default_nested.reference new file mode 100644 index 00000000000..dfcaf696d38 --- /dev/null +++ b/tests/queries/0_stateless/02872_null_as_default_nested.reference @@ -0,0 +1,66 @@ +Native +0 [0,0] [[[0,0],[0]]] ('hello',0,0) ('Hello',('Hello',(0,0)),0) {0:0} {0:{0:0}} ('Hello',[('Hello',{0:0},[0])],0) +42 [1,0] [[[1,0],[0]]] ('hello',0,1) ('Hello',('Hello',(1,0)),1) {1:0} {1:{1:0}} ('Hello',[('Hello',{1:0},[0])],1) +2 [2,2] [[[2,2],[0]]] ('hello',2,2) ('Hello',('Hello',(2,2)),2) {2:2} {2:{2:2}} ('Hello',[('Hello',{2:2},[2])],2) +42 [3,0] [[[3,0],[0]]] ('hello',0,3) ('Hello',('Hello',(3,0)),3) {3:0} {3:{3:0}} ('Hello',[('Hello',{3:0},[0])],3) +4 [4,4] [[[4,4],[0]]] ('hello',4,4) ('Hello',('Hello',(4,4)),4) {4:4} {4:{4:4}} ('Hello',[('Hello',{4:4},[4])],4) +Parquet +0 [0,0] [[[0,0],[0]]] ('hello',0,0) ('Hello',('Hello',(0,0)),0) {0:0} {0:{0:0}} ('Hello',[('Hello',{0:0},[0])],0) +42 [1,0] [[[1,0],[0]]] ('hello',0,1) ('Hello',('Hello',(1,0)),1) {1:0} {1:{1:0}} ('Hello',[('Hello',{1:0},[0])],1) +2 [2,2] [[[2,2],[0]]] ('hello',2,2) ('Hello',('Hello',(2,2)),2) {2:2} {2:{2:2}} ('Hello',[('Hello',{2:2},[2])],2) +42 [3,0] [[[3,0],[0]]] ('hello',0,3) ('Hello',('Hello',(3,0)),3) {3:0} {3:{3:0}} ('Hello',[('Hello',{3:0},[0])],3) +4 [4,4] [[[4,4],[0]]] ('hello',4,4) ('Hello',('Hello',(4,4)),4) {4:4} {4:{4:4}} ('Hello',[('Hello',{4:4},[4])],4) +ORC +0 [0,0] [[[0,0],[0]]] ('hello',0,0) ('Hello',('Hello',(0,0)),0) {0:0} {0:{0:0}} ('Hello',[('Hello',{0:0},[0])],0) +42 [1,0] [[[1,0],[0]]] ('hello',0,1) ('Hello',('Hello',(1,0)),1) {1:0} {1:{1:0}} ('Hello',[('Hello',{1:0},[0])],1) +2 [2,2] [[[2,2],[0]]] ('hello',2,2) ('Hello',('Hello',(2,2)),2) {2:2} {2:{2:2}} ('Hello',[('Hello',{2:2},[2])],2) +42 [3,0] [[[3,0],[0]]] ('hello',0,3) ('Hello',('Hello',(3,0)),3) {3:0} {3:{3:0}} ('Hello',[('Hello',{3:0},[0])],3) +4 [4,4] [[[4,4],[0]]] ('hello',4,4) ('Hello',('Hello',(4,4)),4) {4:4} {4:{4:4}} ('Hello',[('Hello',{4:4},[4])],4) +Arrow +0 [0,0] [[[0,0],[0]]] ('hello',0,0) ('Hello',('Hello',(0,0)),0) {0:0} {0:{0:0}} ('Hello',[('Hello',{0:0},[0])],0) +42 [1,0] [[[1,0],[0]]] ('hello',0,1) ('Hello',('Hello',(1,0)),1) {1:0} {1:{1:0}} ('Hello',[('Hello',{1:0},[0])],1) +2 [2,2] [[[2,2],[0]]] ('hello',2,2) ('Hello',('Hello',(2,2)),2) {2:2} {2:{2:2}} ('Hello',[('Hello',{2:2},[2])],2) +42 [3,0] [[[3,0],[0]]] ('hello',0,3) ('Hello',('Hello',(3,0)),3) {3:0} {3:{3:0}} ('Hello',[('Hello',{3:0},[0])],3) +4 [4,4] [[[4,4],[0]]] ('hello',4,4) ('Hello',('Hello',(4,4)),4) {4:4} {4:{4:4}} ('Hello',[('Hello',{4:4},[4])],4) +Avro +0 [0,0] [[[0,0],[0]]] ('hello',0,0) ('Hello',('Hello',(0,0)),0) {0:0} {0:{0:0}} ('Hello',[('Hello',{0:0},[0])],0) +42 [1,0] [[[1,0],[0]]] ('hello',0,1) ('Hello',('Hello',(1,0)),1) {1:0} {1:{1:0}} ('Hello',[('Hello',{1:0},[0])],1) +2 [2,2] [[[2,2],[0]]] ('hello',2,2) ('Hello',('Hello',(2,2)),2) {2:2} {2:{2:2}} ('Hello',[('Hello',{2:2},[2])],2) +42 [3,0] [[[3,0],[0]]] ('hello',0,3) ('Hello',('Hello',(3,0)),3) {3:0} {3:{3:0}} ('Hello',[('Hello',{3:0},[0])],3) +4 [4,4] [[[4,4],[0]]] ('hello',4,4) ('Hello',('Hello',(4,4)),4) {4:4} {4:{4:4}} ('Hello',[('Hello',{4:4},[4])],4) +BSONEachRow +0 [0,0] [[[0,0],[0]]] ('hello',0,0) ('Hello',('Hello',(0,0)),0) {0:0} {0:{0:0}} ('Hello',[('Hello',{0:0},[0])],0) +42 [1,0] [[[1,0],[0]]] ('hello',0,1) ('Hello',('Hello',(1,0)),1) {1:0} {1:{1:0}} ('Hello',[('Hello',{1:0},[0])],1) +2 [2,2] [[[2,2],[0]]] ('hello',2,2) ('Hello',('Hello',(2,2)),2) {2:2} {2:{2:2}} ('Hello',[('Hello',{2:2},[2])],2) +42 [3,0] [[[3,0],[0]]] ('hello',0,3) ('Hello',('Hello',(3,0)),3) {3:0} {3:{3:0}} ('Hello',[('Hello',{3:0},[0])],3) +4 [4,4] [[[4,4],[0]]] ('hello',4,4) ('Hello',('Hello',(4,4)),4) {4:4} {4:{4:4}} ('Hello',[('Hello',{4:4},[4])],4) +MsgPack +0 [0,0] [[[0,0],[0]]] ('hello',0,0) ('Hello',('Hello',(0,0)),0) {0:0} {0:{0:0}} ('Hello',[('Hello',{0:0},[0])],0) +42 [1,0] [[[1,0],[0]]] ('hello',0,1) ('Hello',('Hello',(1,0)),1) {1:0} {1:{1:0}} ('Hello',[('Hello',{1:0},[0])],1) +2 [2,2] [[[2,2],[0]]] ('hello',2,2) ('Hello',('Hello',(2,2)),2) {2:2} {2:{2:2}} ('Hello',[('Hello',{2:2},[2])],2) +42 [3,0] [[[3,0],[0]]] ('hello',0,3) ('Hello',('Hello',(3,0)),3) {3:0} {3:{3:0}} ('Hello',[('Hello',{3:0},[0])],3) +4 [4,4] [[[4,4],[0]]] ('hello',4,4) ('Hello',('Hello',(4,4)),4) {4:4} {4:{4:4}} ('Hello',[('Hello',{4:4},[4])],4) +JSONEachRow +0 [0,0] [[[0,0],[0]]] ('hello',0,0) ('Hello',('Hello',(0,0)),0) {0:0} {0:{0:0}} ('Hello',[('Hello',{0:0},[0])],0) +42 [1,0] [[[1,0],[0]]] ('hello',0,1) ('Hello',('Hello',(1,0)),1) {1:0} {1:{1:0}} ('Hello',[('Hello',{1:0},[0])],1) +2 [2,2] [[[2,2],[0]]] ('hello',2,2) ('Hello',('Hello',(2,2)),2) {2:2} {2:{2:2}} ('Hello',[('Hello',{2:2},[2])],2) +42 [3,0] [[[3,0],[0]]] ('hello',0,3) ('Hello',('Hello',(3,0)),3) {3:0} {3:{3:0}} ('Hello',[('Hello',{3:0},[0])],3) +4 [4,4] [[[4,4],[0]]] ('hello',4,4) ('Hello',('Hello',(4,4)),4) {4:4} {4:{4:4}} ('Hello',[('Hello',{4:4},[4])],4) +CSV +0 [0,0] [[[0,0],[0]]] ('hello',0,0) ('Hello',('Hello',(0,0)),0) {0:0} {0:{0:0}} ('Hello',[('Hello',{0:0},[0])],0) +42 [1,0] [[[1,0],[0]]] ('hello',0,1) ('Hello',('Hello',(1,0)),1) {1:0} {1:{1:0}} ('Hello',[('Hello',{1:0},[0])],1) +2 [2,2] [[[2,2],[0]]] ('hello',2,2) ('Hello',('Hello',(2,2)),2) {2:2} {2:{2:2}} ('Hello',[('Hello',{2:2},[2])],2) +42 [3,0] [[[3,0],[0]]] ('hello',0,3) ('Hello',('Hello',(3,0)),3) {3:0} {3:{3:0}} ('Hello',[('Hello',{3:0},[0])],3) +4 [4,4] [[[4,4],[0]]] ('hello',4,4) ('Hello',('Hello',(4,4)),4) {4:4} {4:{4:4}} ('Hello',[('Hello',{4:4},[4])],4) +TSV +0 [0,0] [[[0,0],[0]]] ('hello',0,0) ('Hello',('Hello',(0,0)),0) {0:0} {0:{0:0}} ('Hello',[('Hello',{0:0},[0])],0) +42 [1,0] [[[1,0],[0]]] ('hello',0,1) ('Hello',('Hello',(1,0)),1) {1:0} {1:{1:0}} ('Hello',[('Hello',{1:0},[0])],1) +2 [2,2] [[[2,2],[0]]] ('hello',2,2) ('Hello',('Hello',(2,2)),2) {2:2} {2:{2:2}} ('Hello',[('Hello',{2:2},[2])],2) +42 [3,0] [[[3,0],[0]]] ('hello',0,3) ('Hello',('Hello',(3,0)),3) {3:0} {3:{3:0}} ('Hello',[('Hello',{3:0},[0])],3) +4 [4,4] [[[4,4],[0]]] ('hello',4,4) ('Hello',('Hello',(4,4)),4) {4:4} {4:{4:4}} ('Hello',[('Hello',{4:4},[4])],4) +Values +0 [0,0] [[[0,0],[0]]] ('hello',0,0) ('Hello',('Hello',(0,0)),0) {0:0} {0:{0:0}} ('Hello',[('Hello',{0:0},[0])],0) +42 [1,0] [[[1,0],[0]]] ('hello',0,1) ('Hello',('Hello',(1,0)),1) {1:0} {1:{1:0}} ('Hello',[('Hello',{1:0},[0])],1) +2 [2,2] [[[2,2],[0]]] ('hello',2,2) ('Hello',('Hello',(2,2)),2) {2:2} {2:{2:2}} ('Hello',[('Hello',{2:2},[2])],2) +42 [3,0] [[[3,0],[0]]] ('hello',0,3) ('Hello',('Hello',(3,0)),3) {3:0} {3:{3:0}} ('Hello',[('Hello',{3:0},[0])],3) +4 [4,4] [[[4,4],[0]]] ('hello',4,4) ('Hello',('Hello',(4,4)),4) {4:4} {4:{4:4}} ('Hello',[('Hello',{4:4},[4])],4) diff --git a/tests/queries/0_stateless/02872_null_as_default_nested.sh b/tests/queries/0_stateless/02872_null_as_default_nested.sh new file mode 100755 index 00000000000..8f91d573b89 --- /dev/null +++ b/tests/queries/0_stateless/02872_null_as_default_nested.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +for format in Native Parquet ORC Arrow Avro BSONEachRow MsgPack JSONEachRow CSV TSV Values +do + echo $format + $CLICKHOUSE_LOCAL -q "select number % 2 ? NULL : number as n, [number, number % 2 ? NULL : number] as arr1, [[[number, number % 2 ? NULL : number], [NULL]]] as arr2, tuple('hello', number % 2 ? NULL : number, number) as tup1, tuple('Hello', tuple('Hello', tuple(number, number % 2 ? NULL : number)), number) as tup2, map(number, number % 2 ? NULL : number) as map1, map(number, map(number, number % 2 ? null : number)) as map2, tuple('Hello', [tuple('Hello', map(number, number % 2 ? NULL : number), [number % 2 ? NULL : number])], number) as nested from numbers(5) format $format" > $CLICKHOUSE_TEST_UNIQUE_NAME.$format + $CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.$format', auto, 'n UInt64 default 42, arr1 Array(UInt64), arr2 Array(Array(Array(UInt64))), tup1 Tuple(String, UInt64, UInt64), tup2 Tuple(String, Tuple(String, Tuple(UInt64, UInt64)), UInt64), map1 Map(UInt64, UInt64), map2 Map(UInt64, Map(UInt64, UInt64)), nested Tuple(String, Array(Tuple(String, Map(UInt64, UInt64), Array(UInt64))), UInt64)') settings input_format_null_as_default=1" + rm $CLICKHOUSE_TEST_UNIQUE_NAME.$format +done + diff --git a/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.reference b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql new file mode 100644 index 00000000000..da76a5cb88f --- /dev/null +++ b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql @@ -0,0 +1,6 @@ +-- Tags: no-fasttest + +select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/BU%20-%20UNIT%20-%201/*.parquet'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } + +select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/*.parquet?some_tocken=ABCD'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } + diff --git a/tests/queries/0_stateless/02874_analysis_of_variance_overflow.reference b/tests/queries/0_stateless/02874_analysis_of_variance_overflow.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02874_analysis_of_variance_overflow.sql b/tests/queries/0_stateless/02874_analysis_of_variance_overflow.sql new file mode 100644 index 00000000000..67fb4d28acb --- /dev/null +++ b/tests/queries/0_stateless/02874_analysis_of_variance_overflow.sql @@ -0,0 +1 @@ +SELECT analysisOfVariance(1, 18446744073709551615); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference b/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference new file mode 100644 index 00000000000..885332ab835 --- /dev/null +++ b/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference @@ -0,0 +1,13 @@ +Invalid parameters +Const argument +693961 +713569 +668394 +713569 +\N +Non-const argument +713569 +713569 +MySQL alias +713569 +713569 diff --git a/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql b/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql new file mode 100644 index 00000000000..2c35920e569 --- /dev/null +++ b/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql @@ -0,0 +1,22 @@ +SELECT 'Invalid parameters'; +SELECT toDaysSinceYearZero(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT toDaysSinceYearZero(toDate('2023-09-08'), toDate('2023-09-08')); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT toDaysSinceYearZero('str'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toDaysSinceYearZero(42); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toDaysSinceYearZero(toDateTime('2023-09-08 11:11:11')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toDaysSinceYearZero(toDateTime64('2023-09-08 11:11:11', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT 'Const argument'; +SELECT toDaysSinceYearZero(toDate('1970-01-01')); +SELECT toDaysSinceYearZero(toDate('2023-09-08')); +SELECT toDaysSinceYearZero(toDate32('1900-01-01')); +SELECT toDaysSinceYearZero(toDate32('2023-09-08')); +SELECT toDaysSinceYearZero(NULL); + +SELECT 'Non-const argument'; +SELECT toDaysSinceYearZero(materialize(toDate('2023-09-08'))); +SELECT toDaysSinceYearZero(materialize(toDate32('2023-09-08'))); + +SELECT 'MySQL alias'; +SELECT to_days(toDate('2023-09-08')); +SELECT TO_DAYS(toDate('2023-09-08')); diff --git a/tests/queries/0_stateless/02875_show_functions.reference b/tests/queries/0_stateless/02875_show_functions.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02875_show_functions.sh b/tests/queries/0_stateless/02875_show_functions.sh new file mode 100755 index 00000000000..6f8da63ca9e --- /dev/null +++ b/tests/queries/0_stateless/02875_show_functions.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +diff -q <($CLICKHOUSE_CLIENT -q "SELECT * from system.functions") \ + <($CLICKHOUSE_CLIENT -q "SHOW FUNCTIONS") + +diff -q <($CLICKHOUSE_CLIENT -q "SELECT * FROM system.functions WHERE name ILIKE 'quantile%'") \ + <($CLICKHOUSE_CLIENT -q "SHOW FUNCTIONS ILIKE 'quantile%'") + +diff -q <($CLICKHOUSE_CLIENT -q "SELECT * FROM system.functions WHERE name LIKE 'median%'") \ + <($CLICKHOUSE_CLIENT -q "SHOW FUNCTIONS LIKE 'median%'") diff --git a/tests/queries/0_stateless/02876_formats_with_names_dont_use_header.reference b/tests/queries/0_stateless/02876_formats_with_names_dont_use_header.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/02876_formats_with_names_dont_use_header.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02876_formats_with_names_dont_use_header.sh b/tests/queries/0_stateless/02876_formats_with_names_dont_use_header.sh new file mode 100755 index 00000000000..ce06ff530b9 --- /dev/null +++ b/tests/queries/0_stateless/02876_formats_with_names_dont_use_header.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo -e "a,b,c\n1,2,3" > $CLICKHOUSE_TEST_UNIQUE_NAME.csvwithnames + +$CLICKHOUSE_LOCAL -q "select b from file('$CLICKHOUSE_TEST_UNIQUE_NAME.csvwithnames') settings input_format_with_names_use_header=0" + diff --git a/tests/queries/0_stateless/02876_formats_with_names_dont_use_header_test.csvwithnames b/tests/queries/0_stateless/02876_formats_with_names_dont_use_header_test.csvwithnames new file mode 100644 index 00000000000..bfde6bfa0b8 --- /dev/null +++ b/tests/queries/0_stateless/02876_formats_with_names_dont_use_header_test.csvwithnames @@ -0,0 +1,2 @@ +a,b,c +1,2,3 diff --git a/tests/queries/0_stateless/02881_system_detached_parts_modification_time.reference b/tests/queries/0_stateless/02881_system_detached_parts_modification_time.reference new file mode 100644 index 00000000000..15d2c113fda --- /dev/null +++ b/tests/queries/0_stateless/02881_system_detached_parts_modification_time.reference @@ -0,0 +1,2 @@ +after detach 1 +after detach 1 diff --git a/tests/queries/0_stateless/02881_system_detached_parts_modification_time.sql.j2 b/tests/queries/0_stateless/02881_system_detached_parts_modification_time.sql.j2 new file mode 100644 index 00000000000..ded17b4e328 --- /dev/null +++ b/tests/queries/0_stateless/02881_system_detached_parts_modification_time.sql.j2 @@ -0,0 +1,16 @@ +set mutations_sync=1; + +{% for id, settings in [ + ("wide", "min_bytes_for_wide_part=0, min_rows_for_wide_part=0"), + ("compact", "min_bytes_for_wide_part=1000, min_rows_for_wide_part=100"), +] +%} + +drop table if exists data_{{ id }}; +create table data_{{ id }} (key Int) engine=MergeTree() order by tuple() settings {{ settings }}; +insert into data_{{ id }} values (1); +select 'before detach', now()-modification_time < 10 from system.detached_parts where database = currentDatabase() and table = 'data_{{ id }}'; +alter table data_{{ id }} detach partition all; +select 'after detach', now()-modification_time < 10 from system.detached_parts where database = currentDatabase() and table = 'data_{{ id }}'; + +{% endfor %} diff --git a/tests/queries/0_stateless/backups/with_broken_part.zip b/tests/queries/0_stateless/backups/with_broken_part.zip new file mode 100644 index 00000000000..c0ad218ad8e Binary files /dev/null and b/tests/queries/0_stateless/backups/with_broken_part.zip differ diff --git a/tests/queries/0_stateless/data_avro/union_one_type.avro b/tests/queries/0_stateless/data_avro/union_one_type.avro new file mode 100644 index 00000000000..07e6140e5e2 Binary files /dev/null and b/tests/queries/0_stateless/data_avro/union_one_type.avro differ diff --git a/tests/queries/0_stateless/data_parquet/nine_byte_decimals_from_spark.parquet b/tests/queries/0_stateless/data_parquet/nine_byte_decimals_from_spark.parquet new file mode 100644 index 00000000000..43fcd94e606 Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/nine_byte_decimals_from_spark.parquet differ diff --git a/tests/queries/0_stateless/data_tsv/mock_data.tsv b/tests/queries/0_stateless/data_tsv/mock_data.tsv new file mode 100644 index 00000000000..fcf2b300b15 --- /dev/null +++ b/tests/queries/0_stateless/data_tsv/mock_data.tsv @@ -0,0 +1,5 @@ +UserName Age Tags +String Int8 Map(String, UInt64) +user127 20 {'test': 123} +user405 43 {'test': 123} +user902 43 {'test': 123} diff --git a/tests/queries/0_stateless/helpers/tcp_client.py b/tests/queries/0_stateless/helpers/tcp_client.py new file mode 100644 index 00000000000..fdc4ab28e04 --- /dev/null +++ b/tests/queries/0_stateless/helpers/tcp_client.py @@ -0,0 +1,313 @@ +import socket +import os +import uuid +import struct + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) +CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") +CLIENT_NAME = "simple native protocol" + + +def writeVarUInt(x, ba): + for _ in range(0, 9): + byte = x & 0x7F + if x > 0x7F: + byte |= 0x80 + + ba.append(byte) + + x >>= 7 + if x == 0: + return + + +def writeStringBinary(s, ba): + b = bytes(s, "utf-8") + writeVarUInt(len(s), ba) + ba.extend(b) + + +def serializeClientInfo(ba, query_id): + writeStringBinary("default", ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary("127.0.0.1:9000", ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary("os_user", ba) # os_user + writeStringBinary("client_hostname", ba) # client_hostname + writeStringBinary(CLIENT_NAME, ba) # client_name + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary("", ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry + + +def serializeBlockInfo(ba): + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num + + +def assertPacket(packet, expected): + assert packet == expected, "Got: {}, expected: {}".format(packet, expected) + + +class Data(object): + def __init__(self, key, value): + self.key = key + self.value = value + + +class TCPClient(object): + def __init__(self, timeout=30): + self.timeout = timeout + self.socket = None + + def __enter__(self): + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.socket.settimeout(self.timeout) + self.socket.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + + self.sendHello() + self.receiveHello() + + return self + + def __exit__(self, exc_type, exc_value, traceback): + if self.socket: + self.socket.close() + + def readStrict(self, size=1): + res = bytearray() + while size: + cur = self.socket.recv(size) + # if not res: + # raise "Socket is closed" + size -= len(cur) + res.extend(cur) + + return res + + def readUInt(self, size=1): + res = self.readStrict(size) + val = 0 + for i in range(len(res)): + val += res[i] << (i * 8) + return val + + def readUInt8(self): + return self.readUInt() + + def readUInt16(self): + return self.readUInt(2) + + def readUInt32(self): + return self.readUInt(4) + + def readUInt64(self): + return self.readUInt(8) + + def readFloat16(self): + return struct.unpack("e", self.readStrict(2)) + + def readFloat32(self): + return struct.unpack("f", self.readStrict(4)) + + def readFloat64(self): + return struct.unpack("d", self.readStrict(8)) + + def readVarUInt(self): + x = 0 + for i in range(9): + byte = self.readStrict()[0] + x |= (byte & 0x7F) << (7 * i) + + if not byte & 0x80: + return x + + return x + + def readStringBinary(self): + size = self.readVarUInt() + s = self.readStrict(size) + return s.decode("utf-8") + + def send(self, byte_array): + self.socket.sendall(byte_array) + + def sendHello(self): + ba = bytearray() + writeVarUInt(0, ba) # Hello + writeStringBinary(CLIENT_NAME, ba) + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary(CLICKHOUSE_DATABASE, ba) # database + writeStringBinary("default", ba) # user + writeStringBinary("", ba) # pwd + self.send(ba) + + def receiveHello(self): + p_type = self.readVarUInt() + assert p_type == 0 # Hello + _server_name = self.readStringBinary() + _server_version_major = self.readVarUInt() + _server_version_minor = self.readVarUInt() + _server_revision = self.readVarUInt() + _server_timezone = self.readStringBinary() + _server_display_name = self.readStringBinary() + _server_version_patch = self.readVarUInt() + + def sendQuery(self, query, settings=None): + if settings == None: + settings = {} # No settings + + ba = bytearray() + query_id = uuid.uuid4().hex + writeVarUInt(1, ba) # query + writeStringBinary(query_id, ba) + + ba.append(1) # INITIAL_QUERY + + # client info + serializeClientInfo(ba, query_id) + + # Settings + for key, value in settings.items(): + writeStringBinary(key, ba) + writeVarUInt(1, ba) # is_important + writeStringBinary(str(value), ba) + writeStringBinary("", ba) # End of settings + + writeStringBinary("", ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally + self.send(ba) + + def sendEmptyBlock(self): + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) + serializeBlockInfo(ba) + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns + self.send(ba) + + def readException(self): + code = self.readUInt32() + _name = self.readStringBinary() + text = self.readStringBinary() + self.readStringBinary() # trace + assertPacket(self.readUInt8(), 0) # has_nested + return "code {}: {}".format(code, text.replace("DB::Exception:", "")) + + def readPacketType(self): + packet_type = self.readVarUInt() + if packet_type == 2: # Exception + raise RuntimeError(self.readException()) + + return packet_type + + def readResponse(self): + packet_type = self.readPacketType() + if packet_type == 1: # Data + return None + if packet_type == 3: # Progress + return None + if packet_type == 5: # End stream + return None + + raise RuntimeError("Unexpected packet: {}".format(packet_type)) + + def readProgressData(self): + read_rows = self.readVarUInt() + read_bytes = self.readVarUInt() + total_rows_to_read = self.readVarUInt() + written_rows = self.readVarUInt() + written_bytes = self.readVarUInt() + + return read_rows, read_bytes, total_rows_to_read, written_rows, written_bytes + + def readProgress(self): + packet_type = self.readPacketType() + if packet_type == 5: # End stream + return None + assertPacket(packet_type, 3) # Progress + return self.readProgressData() + + def readHeaderInfo(self): + self.readStringBinary() # external table name + # BlockInfo + assertPacket(self.readVarUInt(), 1) # field number 1 + assertPacket(self.readUInt8(), 0) # is_overflows + assertPacket(self.readVarUInt(), 2) # field number 2 + assertPacket(self.readUInt32(), 4294967295) # bucket_num + assertPacket(self.readVarUInt(), 0) # 0 + columns = self.readVarUInt() # rows + rows = self.readVarUInt() # columns + + return columns, rows + + def readHeader(self): + packet_type = self.readPacketType() + assertPacket(packet_type, 1) # Data + + columns, rows = self.readHeaderInfo() + print("Rows {} Columns {}".format(rows, columns)) + for _ in range(columns): + col_name = self.readStringBinary() + type_name = self.readStringBinary() + print("Column {} type {}".format(col_name, type_name)) + + def readRow(self, row_type, rows): + supported_row_types = { + "UInt8": self.readUInt8, + "UInt16": self.readUInt16, + "UInt32": self.readUInt32, + "UInt64": self.readUInt64, + "Float16": self.readFloat16, + "Float32": self.readFloat32, + "Float64": self.readFloat64, + } + if row_type in supported_row_types: + read_type = supported_row_types[row_type] + row = [read_type() for _ in range(rows)] + return row + else: + raise RuntimeError( + "Current python version of tcp client doesn't support the following type of row: {}".format( + row_type + ) + ) + + def readDataWithoutProgress(self, need_print_info=True): + packet_type = self.readPacketType() + while packet_type == 3: # Progress + self.readProgressData() + packet_type = self.readPacketType() + + if packet_type == 5: # End stream + return None + assertPacket(packet_type, 1) # Data + + columns, rows = self.readHeaderInfo() + data = [] + if need_print_info: + print("Rows {} Columns {}".format(rows, columns)) + + for _ in range(columns): + col_name = self.readStringBinary() + type_name = self.readStringBinary() + if need_print_info: + print("Column {} type {}".format(col_name, type_name)) + + data.append(Data(col_name, self.readRow(type_name, rows))) + + return data diff --git a/tests/queries/0_stateless/mergetree_mutations.lib b/tests/queries/0_stateless/mergetree_mutations.lib index 7d02f9f1b41..7a27200c523 100644 --- a/tests/queries/0_stateless/mergetree_mutations.lib +++ b/tests/queries/0_stateless/mergetree_mutations.lib @@ -40,3 +40,5 @@ function wait_for_all_mutations() done } + +# vi: ft=bash diff --git a/tests/queries/0_stateless/parts.lib b/tests/queries/0_stateless/parts.lib index 7aec10392f0..fe365a70ca5 100644 --- a/tests/queries/0_stateless/parts.lib +++ b/tests/queries/0_stateless/parts.lib @@ -40,3 +40,5 @@ function wait_for_delete_inactive_parts() export -f wait_for_delete_empty_parts export -f wait_for_delete_inactive_parts + +# vi: ft=bash diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 1805b56f8dc..e0e11990d83 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -37,7 +37,7 @@ function try_sync_replicas() pids[${i}]=$! i=$((i + 1)) done - for pid in ${pids[*]}; do + for pid in "${pids[@]}"; do wait $pid || (echo "Failed to sync some replicas" && exit 1) done echo "Replication did not hang: synced all replicas of $table_name_prefix" @@ -114,3 +114,5 @@ function check_replication_consistency() fi } + +# vi: ft=bash diff --git a/tests/queries/0_stateless/scripts_udf/function.xml b/tests/queries/0_stateless/scripts_udf/function.xml new file mode 100644 index 00000000000..69a0abb5cec --- /dev/null +++ b/tests/queries/0_stateless/scripts_udf/function.xml @@ -0,0 +1,9 @@ + + + executable + test_function + String + TabSeparated + udf.sh + + diff --git a/tests/queries/0_stateless/scripts_udf/udf.sh b/tests/queries/0_stateless/scripts_udf/udf.sh new file mode 100755 index 00000000000..add85833c3e --- /dev/null +++ b/tests/queries/0_stateless/scripts_udf/udf.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +echo qwerty diff --git a/tests/queries/0_stateless/transactions.lib b/tests/queries/0_stateless/transactions.lib index 6305caa4db1..12345ac2799 100755 --- a/tests/queries/0_stateless/transactions.lib +++ b/tests/queries/0_stateless/transactions.lib @@ -76,3 +76,5 @@ function tx_sync() tx_wait "$tx_num" tx "$tx_num" "$query" } + +# vi: ft=bash diff --git a/tests/queries/1_stateful/00061_storage_buffer.sql b/tests/queries/1_stateful/00061_storage_buffer.sql index e1f67abda20..e3cda3de36d 100644 --- a/tests/queries/1_stateful/00061_storage_buffer.sql +++ b/tests/queries/1_stateful/00061_storage_buffer.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS test.hits_dst; DROP TABLE IF EXISTS test.hits_buffer; CREATE TABLE test.hits_dst AS test.hits; -CREATE TABLE test.hits_buffer AS test.hits_dst ENGINE = Buffer(test, hits_dst, 8, 1, 10, 10000, 100000, 10000000, 100000000); +CREATE TABLE test.hits_buffer AS test.hits_dst ENGINE = Buffer(test, hits_dst, 8, 600, 600, 1000000, 1000000, 100000000, 1000000000); INSERT INTO test.hits_buffer SELECT * FROM test.hits WHERE CounterID = 800784; SELECT count() FROM test.hits_buffer; diff --git a/tests/queries/1_stateful/00072_compare_date_and_string_index.sql b/tests/queries/1_stateful/00072_compare_date_and_string_index.sql index d652b1bc559..424e6c2dfee 100644 --- a/tests/queries/1_stateful/00072_compare_date_and_string_index.sql +++ b/tests/queries/1_stateful/00072_compare_date_and_string_index.sql @@ -15,8 +15,8 @@ SELECT count() FROM test.hits WHERE EventDate IN (toDate('2014-03-18'), toDate(' SELECT count() FROM test.hits WHERE EventDate = concat('2014-0', '3-18'); DROP TABLE IF EXISTS test.hits_indexed_by_time; -CREATE TABLE test.hits_indexed_by_time (EventDate Date, EventTime DateTime('Asia/Dubai')) ENGINE = MergeTree ORDER BY (EventDate, EventTime); -INSERT INTO test.hits_indexed_by_time SELECT EventDate, EventTime FROM test.hits; +CREATE TABLE test.hits_indexed_by_time (EventDate Date, EventTime DateTime('Asia/Dubai')) ENGINE = MergeTree ORDER BY (EventDate, EventTime) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +INSERT INTO test.hits_indexed_by_time SELECT EventDate, EventTime FROM test.hits SETTINGS max_block_size = 65000; SELECT count() FROM test.hits_indexed_by_time WHERE EventTime = '2014-03-18 01:02:03'; SELECT count() FROM test.hits_indexed_by_time WHERE EventTime < '2014-03-18 01:02:03'; diff --git a/tests/queries/1_stateful/00162_mmap_compression_none.sql b/tests/queries/1_stateful/00162_mmap_compression_none.sql index 2178644214a..d2cbcea8aaa 100644 --- a/tests/queries/1_stateful/00162_mmap_compression_none.sql +++ b/tests/queries/1_stateful/00162_mmap_compression_none.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS hits_none; -CREATE TABLE hits_none (Title String CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE hits_none (Title String CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO hits_none SELECT Title FROM test.hits; SET min_bytes_to_use_mmap_io = 1; diff --git a/tests/queries/1_stateful/00174_distinct_in_order.sql b/tests/queries/1_stateful/00174_distinct_in_order.sql index aac54d46181..301ff36dd42 100644 --- a/tests/queries/1_stateful/00174_distinct_in_order.sql +++ b/tests/queries/1_stateful/00174_distinct_in_order.sql @@ -4,9 +4,9 @@ drop table if exists distinct_in_order sync; drop table if exists ordinary_distinct sync; select '-- DISTINCT columns are the same as in ORDER BY'; -create table distinct_in_order (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate); +create table distinct_in_order (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into distinct_in_order select distinct CounterID, EventDate from test.hits order by CounterID, EventDate settings optimize_distinct_in_order=1; -create table ordinary_distinct (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate); +create table ordinary_distinct (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into ordinary_distinct select distinct CounterID, EventDate from test.hits order by CounterID, EventDate settings optimize_distinct_in_order=0; select distinct * from distinct_in_order except select * from ordinary_distinct; @@ -14,9 +14,9 @@ drop table if exists distinct_in_order sync; drop table if exists ordinary_distinct sync; select '-- DISTINCT columns has prefix in ORDER BY columns'; -create table distinct_in_order (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate); +create table distinct_in_order (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into distinct_in_order select distinct CounterID, EventDate from test.hits order by CounterID settings optimize_distinct_in_order=1; -create table ordinary_distinct (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate); +create table ordinary_distinct (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into ordinary_distinct select distinct CounterID, EventDate from test.hits order by CounterID settings optimize_distinct_in_order=0; select distinct * from distinct_in_order except select * from ordinary_distinct; diff --git a/tests/queries/1_stateful/00177_memory_bound_merging.sh b/tests/queries/1_stateful/00177_memory_bound_merging.sh index 2c531b064db..ce889b338d6 100755 --- a/tests/queries/1_stateful/00177_memory_bound_merging.sh +++ b/tests/queries/1_stateful/00177_memory_bound_merging.sh @@ -8,9 +8,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) check_replicas_read_in_order() { - # to check this we actually look for at least one log message from MergeTreeInOrderSelectProcessor. - # hopefully logger's names are a bit more stable than log messages itself - # # NOTE: lack of "current_database = '$CLICKHOUSE_DATABASE'" filter is made on purpose $CLICKHOUSE_CLIENT -nq " SYSTEM FLUSH LOGS; @@ -18,7 +15,7 @@ check_replicas_read_in_order() { SELECT COUNT() > 0 FROM system.text_log WHERE query_id IN (SELECT query_id FROM system.query_log WHERE query_id != '$1' AND initial_query_id = '$1' AND event_date >= yesterday()) - AND event_date >= yesterday() AND logger_name = 'MergeTreeInOrderSelectProcessor'" + AND event_date >= yesterday() AND message ILIKE '%Reading%ranges in order%'" } # replicas should use reading in order following initiator's decision to execute aggregation in order. diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index ef70c82aefc..12bc0002191 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -155,3 +155,23 @@ function random_str() local n=$1 && shift tr -cd '[:lower:]' < /dev/urandom | head -c"$n" } + +function query_with_retry +{ + local query="$1" && shift + + local retry=0 + until [ $retry -ge 5 ] + do + local result + result="$($CLICKHOUSE_CLIENT "$@" --query="$query" 2>&1)" + if [ "$?" == 0 ]; then + echo -n "$result" + return + else + retry=$((retry + 1)) + sleep 3 + fi + done + echo "Query '$query' failed with '$result'" +} diff --git a/tests/sqllogic/connection.py b/tests/sqllogic/connection.py index 0033c29c41c..a49e8f5c62f 100644 --- a/tests/sqllogic/connection.py +++ b/tests/sqllogic/connection.py @@ -62,7 +62,8 @@ def default_clickhouse_odbc_conn_str(): return str( OdbcConnectingArgs.create_from_kw( dsn="ClickHouse DSN (ANSI)", - Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree&union_default_mode=DISTINCT&group_by_use_nulls=1&join_use_nulls=1&allow_create_index_without_type=1", + Timeout="300", + Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree&union_default_mode=DISTINCT&group_by_use_nulls=1&join_use_nulls=1&allow_create_index_without_type=1&create_index_ignore_unique=1", ) ) diff --git a/tests/sqllogic/runner.py b/tests/sqllogic/runner.py index 1cf4c19c649..5f4baf8e59b 100755 --- a/tests/sqllogic/runner.py +++ b/tests/sqllogic/runner.py @@ -186,10 +186,10 @@ def mode_check_statements(parser): out_stages_dir = os.path.join(out_dir, f"{args.mode}-stages") - complete_sqlite_dir = os.path.join(out_stages_dir, "complete-sqlite") + complete_sqlite_dir = os.path.join(out_stages_dir, "statements-sqlite") os.makedirs(complete_sqlite_dir, exist_ok=True) - reports["complete-sqlite"] = run_all_tests_in_parallel( + reports["statements-sqlite"] = run_all_tests_in_parallel( setup_kwargs=as_kwargs( engine=Engines.SQLITE, ), @@ -224,6 +224,64 @@ def mode_check_statements(parser): parser.set_defaults(func=calle) +def mode_check_complete(parser): + parser.add_argument("--input-dir", metavar="DIR", required=True) + parser.add_argument("--out-dir", metavar="DIR", required=True) + + def calle(args): + input_dir = os.path.realpath(args.input_dir) + out_dir = os.path.realpath(args.out_dir) + + if not os.path.exists(input_dir): + raise FileNotFoundError( + input_dir, f"check statements: no such file or directory {input_dir}" + ) + + if not os.path.isdir(input_dir): + raise NotADirectoryError( + input_dir, f"check statements:: not a dir {input_dir}" + ) + + reports = dict() + + out_stages_dir = os.path.join(out_dir, f"{args.mode}-stages") + + complete_sqlite_dir = os.path.join(out_stages_dir, "complete-sqlite") + os.makedirs(complete_sqlite_dir, exist_ok=True) + + reports["complete-sqlite"] = run_all_tests_in_parallel( + setup_kwargs=as_kwargs( + engine=Engines.SQLITE, + ), + runner_kwargs=as_kwargs( + verify_mode=False, + stop_at_statement_error=True, + ), + input_dir=input_dir, + output_dir=complete_sqlite_dir, + ) + + verify_clickhouse_dir = os.path.join(out_stages_dir, "complete-clickhouse") + os.makedirs(verify_clickhouse_dir, exist_ok=True) + + reports["complete-clickhouse"] = run_all_tests_in_parallel( + setup_kwargs=as_kwargs( + engine=Engines.ODBC, + conn_str=default_clickhouse_odbc_conn_str(), + ), + runner_kwargs=as_kwargs( + verify_mode=True, + stop_at_statement_error=True, + ), + input_dir=complete_sqlite_dir, + output_dir=verify_clickhouse_dir, + ) + + statements_report(reports, out_dir, args.mode) + + parser.set_defaults(func=calle) + + def make_actual_report(reports): return {stage: report.get_map() for stage, report in reports.items()} @@ -399,16 +457,22 @@ def parse_args(): ) subparsers = parser.add_subparsers(dest="mode") + mode_check_complete( + subparsers.add_parser( + "complete-test", + help="Run all tests. Check that all statements and queries are passed", + ) + ) mode_check_statements( subparsers.add_parser( "statements-test", - help="Run all test. Check that all statements are passed", + help="Run all tests. Check that all statements are passed", ) ) mode_self_test( subparsers.add_parser( "self-test", - help="Run all test. Check that all statements are passed", + help="Run all tests. Check that all statements are passed", ) ) args = parser.parse_args() diff --git a/tests/sqllogic/self-test/canonic_report.json b/tests/sqllogic/self-test/canonic_report.json index 0cd1aa4b43b..09adc0e1c1d 100644 --- a/tests/sqllogic/self-test/canonic_report.json +++ b/tests/sqllogic/self-test/canonic_report.json @@ -1 +1 @@ -{"sqlite-complete": {"dbms_name": "sqlite", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 14, "fail": 4}, "total": {"success": 18, "fail": 4}}, "input_dir": "/clickhouse-tests/sqllogic/self-test", "output_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "tests": {"test.test": {"test_name": "test.test", "test_file": "/clickhouse-tests/sqllogic/self-test/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 14, "fail": 4}, "total": {"success": 18, "fail": 4}}, "requests": {"5": {"status": "success", "position": 5, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER)", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "25": {"status": "error", "position": 25, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "query execution failed with an exception, exception: no such column: c"}, "38": {"status": "success", "position": 38, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "44": {"status": "error", "position": 44, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: no such column: c"}, "49": {"status": "success", "position": 49, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "55": {"status": "success", "position": 55, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "64": {"status": "success", "position": 64, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "70": {"status": "success", "position": 70, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "76": {"status": "success", "position": 76, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "82": {"status": "error", "position": 82, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "88": {"status": "error", "position": 88, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "94": {"status": "success", "position": 94, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "99": {"status": "success", "position": 99, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "105": {"status": "success", "position": 105, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "116": {"status": "success", "position": 116, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "121": {"status": "success", "position": 121, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "126": {"status": "success", "position": 126, "request_type": "query", "request": "WITH RECURSIVE cnt(x) AS ( SELECT 1 UNION ALL SELECT x+1 FROM cnt LIMIT 20 ) SELECT x FROM cnt;", "reason": "success"}}}}}, "sqlite-vs-sqlite": {"dbms_name": "sqlite", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 15, "fail": 3}, "total": {"success": 19, "fail": 3}}, "input_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "output_dir": "/test_output/self-test/self-test-stages/sqlite-vs-sqlite", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/sqlite-complete/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 15, "fail": 3}, "total": {"success": 19, "fail": 3}}, "requests": {"5": {"status": "success", "position": 5, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER)", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "success", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "success"}, "42": {"status": "success", "position": 42, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: no such column: c"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "success", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "124": {"status": "success", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "135": {"status": "success", "position": 135, "request_type": "query", "request": "WITH RECURSIVE cnt(x) AS ( SELECT 1 UNION ALL SELECT x+1 FROM cnt LIMIT 20 ) SELECT x FROM cnt;", "reason": "success"}}}}}, "clickhouse-complete": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 15, "fail": 4}, "total": {"success": 19, "fail": 4}}, "input_dir": "/clickhouse-tests/sqllogic/self-test", "output_dir": "/test_output/self-test/self-test-stages/clickhouse-complete", "tests": {"test.test": {"test_name": "test.test", "test_file": "/clickhouse-tests/sqllogic/self-test/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 15, "fail": 4}, "total": {"success": 19, "fail": 4}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "25": {"status": "error", "position": 25, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "query execution failed with an exception, exception: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1 ORDER BY c ASC, a ASC', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "32": {"status": "success", "position": 32, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "44": {"status": "error", "position": 44, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "49": {"status": "success", "position": 49, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "55": {"status": "success", "position": 55, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "64": {"status": "success", "position": 64, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "70": {"status": "success", "position": 70, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "76": {"status": "success", "position": 76, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "82": {"status": "error", "position": 82, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "88": {"status": "error", "position": 88, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "94": {"status": "success", "position": 94, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "99": {"status": "success", "position": 99, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "105": {"status": "success", "position": 105, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "110": {"status": "success", "position": 110, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "116": {"status": "success", "position": 116, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "121": {"status": "success", "position": 121, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "139": {"status": "success", "position": 139, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}}}}}, "clickhouse-vs-clickhouse": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 16, "fail": 3}, "total": {"success": 20, "fail": 3}}, "input_dir": "/test_output/self-test/self-test-stages/clickhouse-complete", "output_dir": "/test_output/self-test/self-test-stages/clickhouse-vs-clickhouse", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/clickhouse-complete/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 16, "fail": 3}, "total": {"success": 20, "fail": 3}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "success", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "success"}, "36": {"status": "success", "position": 36, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "success", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "118": {"status": "success", "position": 118, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "124": {"status": "success", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "148": {"status": "success", "position": 148, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}}}}}, "sqlite-vs-clickhouse": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 13, "fail": 6}, "total": {"success": 17, "fail": 6}}, "input_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "output_dir": "/test_output/self-test/self-test-stages/sqlite-vs-clickhouse", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/sqlite-complete/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 13, "fail": 6}, "total": {"success": 17, "fail": 6}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "error", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "canonic and actual results have different exceptions, details: canonic: query execution failed with an exception, original is: no such column: c, actual: query execution failed with an exception, original is: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1 ORDER BY c ASC, a ASC', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "36": {"status": "success", "position": 36, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "error", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "actual result has exception and canonic result doesn't, details: actual: query execution failed with an exception, original is: ('ODBC SQL type 0 is not yet supported. column-index=0 type=0', 'HY106')"}, "118": {"status": "success", "position": 118, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "124": {"status": "error", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "actual result has exception and canonic result doesn't, details: actual: query execution failed with an exception, original is: ('ODBC SQL type 0 is not yet supported. column-index=0 type=0', 'HY106')"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "148": {"status": "success", "position": 148, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}}}}}} +{"sqlite-complete": {"dbms_name": "sqlite", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 14, "fail": 5}, "total": {"success": 20, "fail": 5}}, "input_dir": "/clickhouse-tests/sqllogic/self-test", "output_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "tests": {"test.test": {"test_name": "test.test", "test_file": "/clickhouse-tests/sqllogic/self-test/test.test", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 14, "fail": 5}, "total": {"success": 20, "fail": 5}}, "requests": {"5": {"status": "success", "position": 5, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER)", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "25": {"status": "error", "position": 25, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "query execution failed with an exception, exception: no such column: c"}, "38": {"status": "success", "position": 38, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "44": {"status": "error", "position": 44, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: no such column: c"}, "49": {"status": "success", "position": 49, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "55": {"status": "success", "position": 55, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "64": {"status": "success", "position": 64, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "70": {"status": "success", "position": 70, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "76": {"status": "success", "position": 76, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "82": {"status": "error", "position": 82, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "88": {"status": "error", "position": 88, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "94": {"status": "success", "position": 94, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "99": {"status": "success", "position": 99, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "105": {"status": "success", "position": 105, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "116": {"status": "success", "position": 116, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "121": {"status": "success", "position": 121, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "126": {"status": "success", "position": 126, "request_type": "query", "request": "WITH RECURSIVE cnt(x) AS ( SELECT 1 UNION ALL SELECT x+1 FROM cnt LIMIT 20 ) SELECT x FROM cnt;", "reason": "success"}, "145": {"status": "success", "position": 145, "request_type": "statement", "request": "CREATE TABLE tab0(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT)", "reason": "success"}, "149": {"status": "success", "position": 149, "request_type": "statement", "request": "INSERT INTO tab0 VALUES(0,535,860.48,'uxbns',253,640.58,'jvqkl')", "reason": "success"}, "152": {"status": "error", "position": 152, "request_type": "query", "request": "SELECT + col2 AS col5 FROM tab0 WHERE NOT ( col0 ) * - - col4 IS NULL", "reason": "Got non-integer result 'uxbns' for I type."}}}}}, "sqlite-vs-sqlite": {"dbms_name": "sqlite", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 15, "fail": 4}, "total": {"success": 21, "fail": 4}}, "input_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "output_dir": "/test_output/self-test/self-test-stages/sqlite-vs-sqlite", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/sqlite-complete/test.test", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 15, "fail": 4}, "total": {"success": 21, "fail": 4}}, "requests": {"5": {"status": "success", "position": 5, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER)", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "success", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "success"}, "42": {"status": "success", "position": 42, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: no such column: c"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "success", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "124": {"status": "success", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "135": {"status": "success", "position": 135, "request_type": "query", "request": "WITH RECURSIVE cnt(x) AS ( SELECT 1 UNION ALL SELECT x+1 FROM cnt LIMIT 20 ) SELECT x FROM cnt;", "reason": "success"}, "154": {"status": "success", "position": 154, "request_type": "statement", "request": "CREATE TABLE tab0(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT)", "reason": "success"}, "158": {"status": "success", "position": 158, "request_type": "statement", "request": "INSERT INTO tab0 VALUES(0,535,860.48,'uxbns',253,640.58,'jvqkl')", "reason": "success"}, "161": {"status": "error", "position": 161, "request_type": "query", "request": "SELECT + col2 AS col5 FROM tab0 WHERE NOT ( col0 ) * - - col4 IS NULL", "reason": "Got non-integer result 'uxbns' for I type."}}}}}, "clickhouse-complete": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 15, "fail": 4}, "total": {"success": 21, "fail": 4}}, "input_dir": "/clickhouse-tests/sqllogic/self-test", "output_dir": "/test_output/self-test/self-test-stages/clickhouse-complete", "tests": {"test.test": {"test_name": "test.test", "test_file": "/clickhouse-tests/sqllogic/self-test/test.test", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 15, "fail": 4}, "total": {"success": 21, "fail": 4}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "25": {"status": "error", "position": 25, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "query execution failed with an exception, exception: ('HY000', \"[HY000] HTTP status code: 404\\nReceived error:\\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1 ORDER BY c ASC, a ASC', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.7.1.1)\\n\\n (1) (SQLExecDirectW)\")"}, "32": {"status": "success", "position": 32, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "44": {"status": "error", "position": 44, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\\nReceived error:\\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.7.1.1)\\n\\n (1) (SQLExecDirectW)\")"}, "49": {"status": "success", "position": 49, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "55": {"status": "success", "position": 55, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "64": {"status": "success", "position": 64, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "70": {"status": "success", "position": 70, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "76": {"status": "success", "position": 76, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "82": {"status": "error", "position": 82, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "88": {"status": "error", "position": 88, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "94": {"status": "success", "position": 94, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "99": {"status": "success", "position": 99, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "105": {"status": "success", "position": 105, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "110": {"status": "success", "position": 110, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "116": {"status": "success", "position": 116, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "121": {"status": "success", "position": 121, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "139": {"status": "success", "position": 139, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}, "145": {"status": "success", "position": 145, "request_type": "statement", "request": "CREATE TABLE tab0(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT)", "reason": "success"}, "149": {"status": "success", "position": 149, "request_type": "statement", "request": "INSERT INTO tab0 VALUES(0,535,860.48,'uxbns',253,640.58,'jvqkl')", "reason": "success"}}}}}, "clickhouse-vs-clickhouse": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 16, "fail": 3}, "total": {"success": 22, "fail": 3}}, "input_dir": "/test_output/self-test/self-test-stages/clickhouse-complete", "output_dir": "/test_output/self-test/self-test-stages/clickhouse-vs-clickhouse", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/clickhouse-complete/test.test", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 16, "fail": 3}, "total": {"success": 22, "fail": 3}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "success", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "success"}, "36": {"status": "success", "position": 36, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\\nReceived error:\\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.7.1.1)\\n\\n (1) (SQLExecDirectW)\")"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "success", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "118": {"status": "success", "position": 118, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "124": {"status": "success", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "148": {"status": "success", "position": 148, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}, "154": {"status": "success", "position": 154, "request_type": "statement", "request": "CREATE TABLE tab0(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT)", "reason": "success"}, "158": {"status": "success", "position": 158, "request_type": "statement", "request": "INSERT INTO tab0 VALUES(0,535,860.48,'uxbns',253,640.58,'jvqkl')", "reason": "success"}}}}}, "sqlite-vs-clickhouse": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 13, "fail": 6}, "total": {"success": 19, "fail": 6}}, "input_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "output_dir": "/test_output/self-test/self-test-stages/sqlite-vs-clickhouse", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/sqlite-complete/test.test", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 13, "fail": 6}, "total": {"success": 19, "fail": 6}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "error", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "canonic and actual results have different exceptions, details: canonic: query execution failed with an exception, original is: no such column: c, actual: query execution failed with an exception, original is: ('HY000', \"[HY000] HTTP status code: 404\\nReceived error:\\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1 ORDER BY c ASC, a ASC', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.7.1.1)\\n\\n (1) (SQLExecDirectW)\")"}, "36": {"status": "success", "position": 36, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\\nReceived error:\\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.7.1.1)\\n\\n (1) (SQLExecDirectW)\")"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "error", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "actual result has exception and canonic result doesn't, details: actual: query execution failed with an exception, original is: ('ODBC SQL type 0 is not yet supported. column-index=0 type=0', 'HY106')"}, "118": {"status": "success", "position": 118, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "124": {"status": "error", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "actual result has exception and canonic result doesn't, details: actual: query execution failed with an exception, original is: ('ODBC SQL type 0 is not yet supported. column-index=0 type=0', 'HY106')"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "148": {"status": "success", "position": 148, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}, "154": {"status": "success", "position": 154, "request_type": "statement", "request": "CREATE TABLE tab0(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT)", "reason": "success"}, "158": {"status": "success", "position": 158, "request_type": "statement", "request": "INSERT INTO tab0 VALUES(0,535,860.48,'uxbns',253,640.58,'jvqkl')", "reason": "success"}}}}}} \ No newline at end of file diff --git a/tests/sqllogic/self-test/test.test b/tests/sqllogic/self-test/test.test index 85b27ed7d60..503153acef8 100644 --- a/tests/sqllogic/self-test/test.test +++ b/tests/sqllogic/self-test/test.test @@ -142,4 +142,13 @@ SELECT number+1 from system.numbers LIMIT 20 ---- 20 values hashing to 52c46dff81346ead02fcf6245c762b1a +# Debug how incorrect result type parses +statement ok +CREATE TABLE tab0(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT) +statement ok +INSERT INTO tab0 VALUES(0,535,860.48,'uxbns',253,640.58,'jvqkl') + +skipif ClickHouse +query I rowsort label-20 +SELECT + col2 AS col5 FROM tab0 WHERE NOT ( col0 ) * - - col4 IS NULL diff --git a/tests/sqllogic/test_parser.py b/tests/sqllogic/test_parser.py index 42adb83809f..f6ad955e7b0 100755 --- a/tests/sqllogic/test_parser.py +++ b/tests/sqllogic/test_parser.py @@ -9,7 +9,13 @@ from enum import Enum from hashlib import md5 from functools import reduce -from exceptions import Error, ProgramError, ErrorWithParent, DataResultDiffer +from exceptions import ( + Error, + ProgramError, + ErrorWithParent, + DataResultDiffer, + QueryExecutionError, +) logger = logging.getLogger("parser") @@ -480,6 +486,7 @@ class QueryResult: for row in rows: res_row = [] for c, t in zip(row, types): + logger.debug(f"Builging row. c:{c} t:{t}") if c is None: res_row.append("NULL") continue @@ -490,7 +497,12 @@ class QueryResult: else: res_row.append(str(c)) elif t == "I": - res_row.append(str(int(c))) + try: + res_row.append(str(int(c))) + except ValueError as ex: + raise QueryExecutionError( + f"Got non-integer result '{c}' for I type." + ) elif t == "R": res_row.append(f"{c:.3f}") diff --git a/tests/sqllogic/test_runner.py b/tests/sqllogic/test_runner.py index 3df38e7fce5..f9ed23566b4 100644 --- a/tests/sqllogic/test_runner.py +++ b/tests/sqllogic/test_runner.py @@ -361,7 +361,7 @@ class TestRunner: continue if block.get_block_type() == test_parser.BlockType.control: - clogger.debug("Skip control block", name_pos) + clogger.debug("Skip control block %s", name_pos) block.dump_to(out_stream) continue @@ -374,13 +374,14 @@ class TestRunner: continue request = block.get_request() - exec_res = execute_request(request, self.connection) if block.get_block_type() in self.skip_request_types: clogger.debug("Runtime skip block for %s", self.dbms_name) block.dump_to(out_stream) continue + exec_res = execute_request(request, self.connection) + if block.get_block_type() == test_parser.BlockType.statement: try: clogger.debug("this is statement") diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 47dd2fc9f2d..4e1184cc9a5 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -20,7 +20,6 @@ if (ENABLE_UTILS) add_subdirectory (zookeeper-cli) add_subdirectory (zookeeper-dump-tree) add_subdirectory (zookeeper-remove-by-list) - add_subdirectory (wikistat-loader) add_subdirectory (check-marks) add_subdirectory (checksum-for-compressed-block) add_subdirectory (check-mysql-binlog) diff --git a/utils/backup/print_backup_info.py b/utils/backup/print_backup_info.py new file mode 100755 index 00000000000..54e5c745a8c --- /dev/null +++ b/utils/backup/print_backup_info.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +""" +print_backup_info: Extract information about a backup from ".backup" file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Usage: print_backup_info +""" +import sys +import os +import xml.etree.ElementTree as ET + + +def main(): + if len(sys.argv) != 2: + print(__doc__) + sys.exit(1) + backup_xml = sys.argv[1] + + if not os.path.isfile(backup_xml): + print("error: {} does not exist".format(backup_xml)) + sys.exit(1) + + # Process the file line-by-line + tree = ET.parse(backup_xml) + root = tree.getroot() + contents = root.find("contents") + + version_node = root.find("version") + version = int(version_node.text) if (version_node != None) else None + + timestamp_node = root.find("timestamp") + timestamp = timestamp_node.text if (timestamp_node != None) else None + + base_backup_node = root.find("base_backup") + base_backup = base_backup_node.text if (base_backup_node != None) else None + + number_of_files = 0 + size_of_files = 0 + number_of_files_from_base_backup = 0 + size_of_files_from_base_backup = 0 + databases = set() + tables = {} + + for file in contents: + name = file.find("name").text + size = int(file.find("size").text) + + use_base_node = file.find("use_base") + use_base = (use_base_node.text == "true") if (use_base_node != None) else False + + if use_base: + base_size_node = file.find("base_size") + base_size = int(base_size_node.text) if (base_size_node != None) else size + else: + base_size = 0 + + data_file_node = file.find("data_file") + data_file = data_file_node.text if (data_file_node != None) else name + + has_data_file = name == data_file + + if has_data_file: + if size > base_size: + number_of_files += 1 + size_of_files += size - base_size + if base_size > 0: + number_of_files_from_base_backup += 1 + size_of_files_from_base_backup += base_size + + table_name = extract_table_name_from_path(name) + if table_name: + if table_name not in tables: + tables[table_name] = [0, 0, 0, 0] + if not name.endswith(".sql") and has_data_file: + table_info = tables[table_name] + if size > base_size: + table_info[0] += 1 + table_info[1] += size - base_size + if base_size > 0: + table_info[2] += 1 + table_info[3] += base_size + tables[table_name] = table_info + + database_name = extract_database_name_from_path(name) + if database_name: + databases.add(database_name) + + size_of_backup = size_of_files + os.path.getsize(backup_xml) + + print(f"version={version}") + print(f"timestamp={timestamp}") + print(f"base_backup={base_backup}") + print(f"size_of_backup={size_of_backup}") + print(f"number_of_files={number_of_files}") + print(f"size_of_files={size_of_files}") + print(f"number_of_files_from_base_backup={number_of_files_from_base_backup}") + print(f"size_of_files_from_base_backup={size_of_files_from_base_backup}") + print(f"number_of_databases={len(databases)}") + print(f"number_of_tables={len(tables)}") + + print() + + print(f"{len(databases)} database(s):") + for database_name in sorted(databases): + print(database_name) + + print() + + print(f"{len(tables)} table(s):") + table_info_format = "{:>70} | {:>20} | {:>20} | {:>26} | {:>30}" + table_info_separator_line = ( + "{:->70}-+-{:->20}-+-{:->20}-+-{:->26}-+-{:->30}".format("", "", "", "", "") + ) + table_info_title_line = table_info_format.format( + "table name", + "num_files", + "size_of_files", + "num_files_from_base_backup", + "size_of_files_from_base_backup", + ) + print(table_info_title_line) + print(table_info_separator_line) + for table_name in sorted(tables): + table_info = tables[table_name] + print( + table_info_format.format( + table_name, table_info[0], table_info[1], table_info[2], table_info[3] + ) + ) + + +# Extracts a table name from a path inside a backup. +# For example, extracts 'default.tbl' from 'shards/1/replicas/1/data/default/tbl/all_0_0_0/data.bin'. +def extract_table_name_from_path(path): + path = strip_shards_replicas_from_path(path) + if not path: + return None + if path.startswith("metadata/"): + path = path[len("metadata/") :] + sep = path.find("/") + if sep == -1: + return None + database_name = path[:sep] + path = path[sep + 1 :] + sep = path.find(".sql") + if sep == -1: + return None + table_name = path[:sep] + return database_name + "." + table_name + if path.startswith("data/"): + path = path[len("data/") :] + sep = path.find("/") + if sep == -1: + return None + database_name = path[:sep] + path = path[sep + 1 :] + sep = path.find("/") + if sep == -1: + return None + table_name = path[:sep] + return database_name + "." + table_name + return None + + +# Extracts a database name from a path inside a backup. +# For example, extracts 'default' from 'shards/1/replicas/1/data/default/tbl/all_0_0_0/data.bin'. +def extract_database_name_from_path(path): + path = strip_shards_replicas_from_path(path) + if not path: + return None + if path.startswith("metadata/"): + path = path[len("metadata/") :] + sep = path.find(".sql") + if sep == -1 or path.find("/") != -1: + return None + return path[:sep] + if path.startswith("data/"): + path = path[len("data/") :] + sep = path.find("/") + if sep == -1: + return None + return path[:sep] + return None + + +# Removes a prefix "shards//replicas//" from a path. +def strip_shards_replicas_from_path(path): + if path.startswith("shards"): + sep = path.find("/") + if sep == -1: + return None + sep = path.find("/", sep + 1) + if sep == -1: + return None + path = path[sep + 1 :] + if path.startswith("replicas"): + sep = path.find("/") + if sep == -1: + return None + sep = path.find("/", sep + 1) + if sep == -1: + return None + path = path[sep + 1 :] + return path + + +if __name__ == "__main__": + main() diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 65271830555..0b6d97998c1 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -41,6 +41,7 @@ AsynchronousMetricsUpdateInterval AsynchronousReadWait Authenticator Authenticators +AutoFDO AutoML Autocompletion AvroConfluent @@ -98,6 +99,7 @@ BlockWriteOps BlockWriteTime Bool BrokenDistributedFilesToInsert +Bugfix BuildID BuilderBinAarch BuilderBinAmd @@ -146,6 +148,7 @@ ChannelID Cidr Ciphertext CityHash +ClickBench ClickCat ClickHouse ClickHouse's @@ -312,6 +315,7 @@ Greenwald HDDs HHMM HMAC +HNSW HSTS HTTPConnection HTTPThreads @@ -694,10 +698,13 @@ Promtail Protobuf ProtobufSingle ProxySQL +PyArrow PyCharm QEMU QTCreator Quantile +QueryCacheBytes +QueryCacheEntries QueryCacheHits QueryCacheMisses QueryPreempted @@ -760,9 +767,9 @@ RoaringBitmap RocksDB Rollup RowBinary +RowBinaryWithDefaults RowBinaryWithNames RowBinaryWithNamesAndTypes -RowBinaryWithDefaults Runtime SATA SELECTs @@ -775,7 +782,6 @@ SMALLINT SPNEGO SQEs SQLAlchemy -SquaredDistance SQLConsoleDetail SQLInsert SQLSTATE @@ -810,6 +816,7 @@ Smirnov'test Soundex SpanKind Spearman's +SquaredDistance StartTLS StartTime StartupSystemTables @@ -916,6 +923,7 @@ URL's URLHash URLHierarchy URLPathHierarchy +USearch UUIDNumToString UUIDStringToNum UUIDs @@ -1058,6 +1066,10 @@ arrayReverse arrayReverseFill arrayReverseSort arrayReverseSplit +arrayRotateLeft +arrayRotateRight +arrayShiftLeft +arrayShiftRight arraySlice arraySort arraySplit @@ -1081,6 +1093,7 @@ authenticators autocompletion autodetect autodetected +autogenerate autogenerated autogeneration autostart @@ -1090,6 +1103,7 @@ avro avx aws azureBlobStorage +azureBlobStorageCluster backend backoff backticks @@ -1155,6 +1169,7 @@ brotli bson bsoneachrow buffersize +bugfix buildId buildable builtins @@ -1269,6 +1284,7 @@ cryptographic csv csvwithnames csvwithnamesandtypes +curdate currentDatabase currentProfiles currentRoles @@ -1313,6 +1329,7 @@ ddl deallocation deallocations debian +decodeHTMLComponent decodeURLComponent decodeURLFormComponent decodeXMLComponent @@ -1328,17 +1345,19 @@ defaultProfiles defaultRoles defaultValueOfArgumentType defaultValueOfTypeName -deltaLake -deltaSum -deltaSumTimestamp +delim deltalake +deltaLake deltasum +deltaSum deltasumtimestamp +deltaSumTimestamp demangle denormalize denormalized denormalizing denormals +dequeued deserialization deserialized deserializing @@ -1394,6 +1413,7 @@ encodings encryptions endian endsWith +endsWithUTF enum enum's enums @@ -1422,6 +1442,7 @@ farmFingerprint farmHash fastops fcoverage +fifo filesystem filesystemAvailable filesystemCapacity @@ -1453,6 +1474,7 @@ formatter freezed fromModifiedJulianDay fromModifiedJulianDayOrNull +fromUTCTimestamp fromUnixTimestamp fromUnixTimestampInJodaSyntax fsync @@ -1538,14 +1560,15 @@ gzipped hadoop halfMD halfday +hardlink hardlinks +hasAll +hasAny +hasColumnInTable hasSubsequence hasSubsequenceCaseInsensitive hasSubsequenceCaseInsensitiveUTF hasSubsequenceUTF -hasAll -hasAny -hasColumnInTable hasSubstr hasToken hasTokenCaseInsensitive @@ -1587,10 +1610,11 @@ incrementing indexHint indexOf infi -initialQueryID -initializeAggregation +inflight initcap initcapUTF +initialQueryID +initializeAggregation injective innogames inodes @@ -1617,6 +1641,7 @@ isNotNull isNull isValidJSON isValidUTF +isZeroOrNull iteratively jaccard javaHash @@ -1881,7 +1906,6 @@ overfitting packetpool packetsize pageviews -pandahouse parallelization parallelize parallelized @@ -1978,6 +2002,7 @@ privateKeyPassphraseHandler prlimit procfs profiler +proleptic prometheus proto protobuf @@ -2128,9 +2153,9 @@ routineley rowNumberInAllBlocks rowNumberInBlock rowbinary +rowbinarywithdefaults rowbinarywithnames rowbinarywithnamesandtypes -rowbinarywithdefaults rsync rsyslog runnable @@ -2182,8 +2207,8 @@ sleepEachRow snowflakeToDateTime socketcache soundex -sparkbar sparkBar +sparkbar sparsehash speedscope splitByChar @@ -2200,6 +2225,7 @@ src stacktrace stacktraces startsWith +startsWithUTF statbox stateful stddev @@ -2224,6 +2250,8 @@ strtoll strtoull struct structs +structureToCapnProtoSchema +structureToProtobufSchema studentTTest studentttest subBitmap @@ -2236,6 +2264,7 @@ subdirectory subexpression subexpressions subfolder +subfolders subinterval subintervals subkey @@ -2253,6 +2282,8 @@ subreddits subseconds subsequence substring +substringIndex +substringIndexUTF substringUTF substrings subtitiles @@ -2328,6 +2359,7 @@ toDateTimeOrZero toDayOfMonth toDayOfWeek toDayOfYear +toDaysSinceYearZero toDecimal toDecimalString toFixedString @@ -2378,6 +2410,7 @@ toTimeZone toType toTypeName toUInt +toUTCTimestamp toUUID toUUIDOrDefault toUUIDOrNull @@ -2418,6 +2451,7 @@ tsv tui tumbleEnd tumbleStart +tupleConcat tupleDivide tupleDivideByNumber tupleElement @@ -2482,6 +2516,7 @@ uring url urlCluster urls +usearch userspace userver utils @@ -2508,6 +2543,7 @@ visitParamExtractRaw visitParamExtractString visitParamExtractUInt visitParamHas +vruntime wchc wchs webpage @@ -2542,6 +2578,7 @@ xlarge xml xxHash xz +yaml yandex youtube zLib @@ -2553,4 +2590,3 @@ znode znodes zookeeperSessionUptime zstd -curdate diff --git a/utils/check-style/check-include b/utils/check-style/check-include index 3c0c6103958..efa5121157d 100755 --- a/utils/check-style/check-include +++ b/utils/check-style/check-include @@ -51,7 +51,7 @@ inc="-I. \ -I./base/daemon \ -I./base/consistent-hashing \ -I./contrib/libhdfs3/include \ --I./contrib/base64/include \ +-I./contrib/aklomp-base64/include \ -I./contrib/protobuf/src \ -I./contrib/cppkafka/include \ -I./contrib/librdkafka-cmake/include \ diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 0b3b86b4772..b728602ef40 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -12,6 +12,7 @@ # (simple regexps) to check if the code is likely to have basic style violations. # and then to run formatter only for the specified files. +LC_ALL="en_US.UTF-8" ROOT_PATH=$(git rev-parse --show-toplevel) EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml' @@ -410,3 +411,9 @@ find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep # The stateful directory should only contain the tests that depend on the test dataset (hits or visits). find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -v '00076_system_columns_bytes' | xargs -I{} bash -c 'grep -q -P "hits|visits" "{}" || echo "The test {} does not depend on the test dataset (hits or visits table) and should be located in the 0_stateless directory. You can also add an exception to the check-style script."' + +# Check for bad punctuation: whitespace before comma. +find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'" + +# Cyrillic characters hiding inside Latin. +find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place." diff --git a/utils/config-processor/CMakeLists.txt b/utils/config-processor/CMakeLists.txt index 53b6163ba87..80c3535ef4e 100644 --- a/utils/config-processor/CMakeLists.txt +++ b/utils/config-processor/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (config-processor config-processor.cpp) -target_link_libraries(config-processor PRIVATE clickhouse_common_config_no_zookeeper_log) +target_link_libraries(config-processor PRIVATE dbms) diff --git a/utils/keeper-bench/CMakeLists.txt b/utils/keeper-bench/CMakeLists.txt index 49ce2068246..5514c34f4ef 100644 --- a/utils/keeper-bench/CMakeLists.txt +++ b/utils/keeper-bench/CMakeLists.txt @@ -4,4 +4,5 @@ if (NOT TARGET ch_contrib::rapidjson) endif () clickhouse_add_executable(keeper-bench Generator.cpp Runner.cpp Stats.cpp main.cpp) -target_link_libraries(keeper-bench PRIVATE clickhouse_common_config_no_zookeeper_log ch_contrib::rapidjson) +target_link_libraries(keeper-bench PRIVATE dbms) +target_link_libraries(keeper-bench PRIVATE ch_contrib::rapidjson) diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8b535e3d897..598fe88bf21 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,5 +1,14 @@ +v23.8.2.7-lts 2023-09-04 +v23.8.1.2992-lts 2023-09-01 +v23.7.5.30-stable 2023-08-28 +v23.7.4.5-stable 2023-08-08 +v23.7.3.14-stable 2023-08-05 +v23.7.2.25-stable 2023-08-03 +v23.7.1.2470-stable 2023-07-27 +v23.6.3.87-stable 2023-08-28 v23.6.2.18-stable 2023-07-09 v23.6.1.1524-stable 2023-06-30 +v23.5.5.92-stable 2023-08-28 v23.5.4.25-stable 2023-06-29 v23.5.3.24-stable 2023-06-17 v23.5.2.7-stable 2023-06-10 @@ -10,6 +19,11 @@ v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.13.6-lts 2023-09-05 +v23.3.12.11-lts 2023-09-04 +v23.3.11.5-lts 2023-08-28 +v23.3.10.5-lts 2023-08-23 +v23.3.9.55-lts 2023-08-21 v23.3.8.21-lts 2023-07-13 v23.3.7.5-lts 2023-06-29 v23.3.6.7-lts 2023-06-28 @@ -58,6 +72,7 @@ v22.9.4.32-stable 2022-10-26 v22.9.3.18-stable 2022-09-30 v22.9.2.7-stable 2022-09-23 v22.9.1.2603-stable 2022-09-22 +v22.8.21.38-lts 2023-08-28 v22.8.20.11-lts 2023-07-09 v22.8.19.10-lts 2023-06-17 v22.8.18.31-lts 2023-06-12 diff --git a/utils/prepare-time-trace/prepare-time-trace.sh b/utils/prepare-time-trace/prepare-time-trace.sh new file mode 100755 index 00000000000..7cacdec8c94 --- /dev/null +++ b/utils/prepare-time-trace/prepare-time-trace.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# This scripts transforms the output of clang's -ftime-trace JSON files into a format to upload to ClickHouse + +# Example: +# mkdir time_trace +# utils/prepare-time-trace/prepare-time-trace.sh build time_trace + +# See also https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview + +< \"${OUTPUT_DIR}/\$\$\" +" + +# Now you can upload it as follows: + +#cat "$OUTPUT_DIR"/* | clickhouse-client --progress --query "INSERT INTO build_time_trace (extra_column_names, file, library, time, pid, tid, ph, ts, dur, cat, name, detail, count, avgMs, args_name) FORMAT JSONCompactEachRow" diff --git a/utils/wikistat-loader/CMakeLists.txt b/utils/wikistat-loader/CMakeLists.txt deleted file mode 100644 index fc5416dea2e..00000000000 --- a/utils/wikistat-loader/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -clickhouse_add_executable (wikistat-loader main.cpp ${SRCS}) -target_link_libraries (wikistat-loader PRIVATE clickhouse_common_io boost::program_options) diff --git a/utils/wikistat-loader/main.cpp b/utils/wikistat-loader/main.cpp deleted file mode 100644 index 493f1df05da..00000000000 --- a/utils/wikistat-loader/main.cpp +++ /dev/null @@ -1,225 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include - - -/** Reads uncompressed wikistat data from stdin, - * and writes transformed data in tsv format, - * ready to be loaded into ClickHouse. - * - * Input data has format: - * - * aa Wikipedia 1 17224 - * aa.b Main_Page 2 21163 - * - * project, optional subproject, path, hits, total size in bytes. - */ - - -template -static void readString(std::string & s, DB::ReadBuffer & buf) -{ - s.clear(); - - while (!buf.eof()) - { - const char * next_pos; - - if (break_at_dot) - next_pos = find_first_symbols<' ', '\n', '.'>(buf.position(), buf.buffer().end()); - else - next_pos = find_first_symbols<' ', '\n'>(buf.position(), buf.buffer().end()); - - s.append(buf.position(), next_pos - buf.position()); - buf.position() += next_pos - buf.position(); - - if (!buf.hasPendingData()) - continue; - - if (*buf.position() == ' ' || *buf.position() == '\n' || (break_at_dot && *buf.position() == '.')) - return; - } -} - - -/** Reads path before whitespace and decodes %xx sequences (to more compact and handy representation), - * except %2F '/', %26 '&', %3D '=', %3F '?', %23 '#' (to not break structure of URL). - */ -static void readPath(std::string & s, DB::ReadBuffer & buf) -{ - s.clear(); - - while (!buf.eof()) - { - const char * next_pos = find_first_symbols<' ', '\n', '%'>(buf.position(), buf.buffer().end()); - - s.append(buf.position(), next_pos - buf.position()); - buf.position() += next_pos - buf.position(); - - if (!buf.hasPendingData()) - continue; - - if (*buf.position() == ' ' || *buf.position() == '\n') - return; - - if (*buf.position() == '%') - { - ++buf.position(); - - char c1; - char c2; - - if (buf.eof() || *buf.position() == ' ') - break; - - DB::readChar(c1, buf); - - if (buf.eof() || *buf.position() == ' ') - break; - - DB::readChar(c2, buf); - - if ((c1 == '2' && (c2 == 'f' || c2 == '6' || c2 == '3' || c2 == 'F')) - || (c1 == '3' && (c2 == 'd' || c2 == 'f' || c2 == 'D' || c2 == 'F'))) - { - s += '%'; - s += c1; - s += c2; - } - else - s += static_cast(static_cast(unhex(c1)) * 16 + static_cast(unhex(c2))); - } - } -} - - -static void skipUntilNewline(DB::ReadBuffer & buf) -{ - while (!buf.eof()) - { - const char * next_pos = find_first_symbols<'\n'>(buf.position(), buf.buffer().end()); - - buf.position() += next_pos - buf.position(); - - if (!buf.hasPendingData()) - continue; - - if (*buf.position() == '\n') - { - ++buf.position(); - return; - } - } -} - - -namespace DB -{ - namespace ErrorCodes - { - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; - } -} - - -int main(int argc, char ** argv) -try -{ - boost::program_options::options_description desc("Allowed options"); - desc.add_options() - ("help,h", "produce help message") - ("time", boost::program_options::value()->required(), - "time of data in YYYY-MM-DD hh:mm:ss form") - ; - - boost::program_options::variables_map options; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Reads uncompressed wikistat data from stdin and writes transformed data in tsv format." << std::endl; - std::cout << "Usage: " << argv[0] << " --time='YYYY-MM-DD hh:00:00' < in > out" << std::endl; - std::cout << desc << std::endl; - return 1; - } - - std::string time_str = options.at("time").as(); - LocalDateTime time(time_str); - LocalDate date(time_str); - - DB::ReadBufferFromFileDescriptor in(STDIN_FILENO); - DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO); - - std::string project; - std::string subproject; - std::string path; - UInt64 hits = 0; - UInt64 size = 0; - - size_t row_num = 0; - while (!in.eof()) - { - try - { - ++row_num; - readString(project, in); - - if (in.eof()) - break; - - if (*in.position() == '.') - readString(subproject, in); - else - subproject.clear(); - - DB::assertChar(' ', in); - readPath(path, in); - DB::assertChar(' ', in); - DB::readIntText(hits, in); - DB::assertChar(' ', in); - DB::readIntText(size, in); - DB::assertChar('\n', in); - } - catch (const DB::Exception & e) - { - /// Sometimes, input data has errors. For example, look at first lines in pagecounts-20130210-130000.gz - /// To save rest of data, just skip lines with errors. - if (e.code() == DB::ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED) - { - std::cerr << "At row " << row_num << ": " << DB::getCurrentExceptionMessage(false) << '\n'; - skipUntilNewline(in); - continue; - } - else - throw; - } - - DB::writeText(date, out); - DB::writeChar('\t', out); - DB::writeText(time, out); - DB::writeChar('\t', out); - DB::writeText(project, out); - DB::writeChar('\t', out); - DB::writeText(subproject, out); - DB::writeChar('\t', out); - DB::writeText(path, out); - DB::writeChar('\t', out); - DB::writeText(hits, out); - DB::writeChar('\t', out); - DB::writeText(size, out); - DB::writeChar('\n', out); - } - - return 0; -} -catch (...) -{ - std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; - throw; -}